Skip to content

Commit

Permalink
Propogate a probability type.
Browse files Browse the repository at this point in the history
  • Loading branch information
rleonid committed Mar 2, 2016
1 parent e4d2ae1 commit eb6c73c
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 63 deletions.
3 changes: 2 additions & 1 deletion src/lib/rgr/eval_multivariate.ml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
open Util
open SolvedLPViaSvd
open Lacaml.D
module Pr = Probability
module P = Printf
module D = Statistics.Distributions
module Ht = Statistics.Hypothesis_test
Expand Down Expand Up @@ -101,7 +102,7 @@ let confidence_interval, prediction_interval =
in
let p' = if plus_one then 1. +. p else p in
let sc = sqrt (glm.inferred_var *. p') in
let t = D.student_quantile ~degrees_of_freedom (alpha /. 2.0) in
let t = D.student_quantile ~degrees_of_freedom (Pr.restrict (alpha /. 2.0)) in
let d = sc *. (abs_float t) in
let y = eval glm x in
(y -. d), (y +. d)
Expand Down
3 changes: 2 additions & 1 deletion src/lib/rgr/univariate.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*)

open Util
module Pr = Probability
module P = Printf
module D = Statistics.Distributions
module Ht = Statistics.Hypothesis_test
Expand Down Expand Up @@ -114,7 +115,7 @@ let confidence_interval, prediction_interval =
let interval a lrm ~alpha x =
let dgf = lrm.size -. 2.0 in
let degrees_of_freedom = truncate dgf in
let t = D.student_quantile ~degrees_of_freedom (alpha /. 2.0) in
let t = D.student_quantile ~degrees_of_freedom (Pr.restrict (alpha /. 2.0)) in
let b = (x -. lrm.m_pred) ** 2.0 /. lrm.s_xx in
let c = lrm.sum_residuals /. dgf in
let se = sqrt ((a +. b) *. c) in
Expand Down
36 changes: 19 additions & 17 deletions src/lib/stats/distributions.ml
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@

open Util
module F = Uncategorized.Functions
module P = Probability
let invalid_arg ~f fmt = invalid_arg ~m:"Distributions" ~f fmt

let normal_cdf ?(mean=0.0) ?(std=1.0) x =
let z = ((x -. mean) /. std) in
(1.0 +. F.erf (z /. sqrt 2.0)) /. 2.0
P.restrict ((1.0 +. F.erf (z /. sqrt 2.0)) /. 2.0)

let normal_pdf ?(mean=0.0) ?(std=1.0) x =
let z = ((x -. mean) /. std) in
(exp ((-1.0 /. 2.0) *. (z ** 2.0))) /. (std *. (sqrt (2.0 *. pi)))

let normal_quantile ?(mean=0.0) ?(std=1.0) p =
if p < 0. || p > 1. then invalid_arg ~f:"normal_quantile" "p %f" p else
mean +. std *. F.normal_cdf_inv p
let normal_quantile ?(mean=0.0) ?(std=1.0) (p : P.t) =
mean +. std *. F.normal_cdf_inv (p :> float)

let poisson_cdf ~mean k =
F.regularized_upper_gamma ~a:(floor (k +. 1.0)) mean
P.restrict (F.regularized_upper_gamma ~a:(floor (k +. 1.0)) mean)

let ln_beta_pdf ~alpha ~beta =
if alpha <= 0.0 then invalid_arg ~f:"ln_beta_pdf" "alpha" else
Expand All @@ -57,10 +57,10 @@ let beta_cdf ~alpha ~beta =
if alpha <= 0.0 then invalid_arg ~f:"beta_cdf" "alpha" else
if beta <= 0.0 then invalid_arg ~f:"beta_cdf" "beta" else
let reg = F.regularized_beta ~alpha ~beta in
fun x -> if x <= 0.0 then 0.0 else if x >= 1.0 then 1.0 else reg x
fun x -> P.restrict (reg x)

let chi_square_cdf ~k x =
F.regularized_lower_gamma ~a:((float k) /. 2.0) (x /. 2.0)
P.restrict (F.regularized_lower_gamma ~a:((float k) /. 2.0) (x /. 2.0))

(* According to Wikipedia, haven't research a more efficient algorithm.
Implemented it here for completeness.
Expand All @@ -73,16 +73,18 @@ let student_pdf ~degrees_of_freedom t =
let student_cdf ~degrees_of_freedom t =
let v = float degrees_of_freedom in
let x = Float.(v / (t * t + v)) in
if t > 0.0 then
Float.(1.0 - 0.5 * (F.regularized_beta ~alpha:(v/2.) ~beta:0.5 x))
else if t < 0.0 then
Float.(0.5 * (F.regularized_beta ~alpha:(v/2.) ~beta:0.5 x))
else (* 0.0 *)
0.5

let student_quantile ~degrees_of_freedom p =
if p < 0. || p > 1. then invalid_arg ~f:"student_quantile" "p %f" p else
F.student_cdf_inv ~degrees_of_freedom p
let p =
if t > 0.0 then
Float.(1.0 - 0.5 * (F.regularized_beta ~alpha:(v/2.) ~beta:0.5 x))
else if t < 0.0 then
Float.(0.5 * (F.regularized_beta ~alpha:(v/2.) ~beta:0.5 x))
else (* 0.0 *)
0.5
in
P.restrict p

let student_quantile ~degrees_of_freedom (p : P.t) =
F.student_cdf_inv ~degrees_of_freedom (p :> float)

let ln_dirichlet_pdf ~alphas =
if alphas = [||] || Array.any (fun a -> a <= 0.0) alphas then
Expand Down
25 changes: 10 additions & 15 deletions src/lib/stats/distributions.mli
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ open Util

(** [normal_cdf mean std x] is the probability that a normal random
variable with [mean] and standard deviation [std] takes a value less than
or equal to [x]. [mean] defaults to 0.0 and [std] to 1.0.
or equal to [x]. [mean] defaults to 0.0 and [std] to 1.0.
*)
val normal_cdf : ?mean:float -> ?std:float -> float -> float
val normal_cdf : ?mean:float -> ?std:float -> float -> Probability.t

(** [normal_pdf mean std x] is the value of the Normal distribution
function (with [mean] and standard deviation [std]) at [x].
Expand All @@ -34,17 +34,14 @@ val normal_cdf : ?mean:float -> ?std:float -> float -> float
val normal_pdf : ?mean:float -> ?std:float -> float -> float

(** [normal_quantile ?mean ?std p] computes [x] such that
[normal_cdf mean std x = p]. [mean] defaults to 0.0 and [std] to 1.0.
@raise Invalid_argument if [p] is outside [0,1].
*)
val normal_quantile : ?mean:float -> ?std:float -> float -> float
[normal_cdf mean std x = p]. [mean] defaults to 0.0 and [std] to 1.0. *)
val normal_quantile : ?mean:float -> ?std:float -> Probability.t -> float

(** {2 Poisson} *)

(** [poisson_cdf mean x] is the probability that a Poisson random variable with
[mean] will take a value less than or equal to [x]. *)
val poisson_cdf : mean:float -> float -> float
val poisson_cdf : mean:float -> float -> Probability.t

(** {2 Beta} *)
(** [beta_pdf ~alpha ~beta x] natural log value of the Beta distribution
Expand All @@ -58,13 +55,13 @@ val beta_pdf : alpha:float -> beta:float -> float -> float
(** [beta_cdf ~alpha ~beta x] probability that a beta random variable with
[alpha] and [beta] shape parameters takes a value less than or equal to
[x]. *)
val beta_cdf : alpha:float -> beta:float -> float -> float
val beta_cdf : alpha:float -> beta:float -> float -> Probability.t

(** {2 Chi} *)

(** [chi_square_cdf ~k x] computes the probability of seeing a value less than
[x] in a Chi-square distribution with [k] degrees of freedom.*)
val chi_square_cdf : k:int -> float -> float
val chi_square_cdf : k:int -> float -> Probability.t

(** {2 Student} *)

Expand All @@ -74,13 +71,11 @@ val student_pdf : degrees_of_freedom:int -> float -> float

(** [student_cdf degrees_of_freedom x] computest the probability that the Student T's
distrubtion with [degrees_of_freedom] takes a value less than [x].*)
val student_cdf : degrees_of_freedom:int -> float -> float
val student_cdf : degrees_of_freedom:int -> float -> Probability.t

(** [student_quantile degrees_of_freedom p] computes [x] such that
[student_cdf degrees_of_freedom x = p].
@raise Invalid_argument if [p] is outside [0,1]. *)
val student_quantile : degrees_of_freedom:int -> float -> float
[student_cdf degrees_of_freedom x = p]. *)
val student_quantile : degrees_of_freedom:int -> Probability.t -> float

(** {2 Dirichlet} *)

Expand Down
16 changes: 9 additions & 7 deletions src/lib/stats/hypothesis_test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
*)

open Util
module P = Probability
open Descriptive
open Distributions
module F = Uncategorized.Functions

let prediction_interval_sub k std mean alpha =
let t_v = student_quantile ~degrees_of_freedom:(k - 1) (1.0 -. alpha /. 2.) in
let p = P.restrict (1.0 -. alpha /. 2.) in
let t_v = student_quantile ~degrees_of_freedom:(k - 1) p in
let fk = float k in
let d = Float.(t_v * std * sqrt (1. + (1. / fk))) in
(mean -. d, mean +. d)
Expand All @@ -33,7 +35,7 @@ type t =
{ degrees_of_freedom : float (* can be non-integer due to corrections. *)
; statistic : float
; standard_error : float
; prob_by_chance : float
; prob_by_chance : P.t
}

let test_to_string t =
Expand All @@ -44,7 +46,7 @@ let test_to_string t =
t.degrees_of_freedom
t.statistic
t.standard_error
t.prob_by_chance
(t.prob_by_chance :> float)

(* TODO: Refactor the Chi logic to use a cdf *)
let chi observed expected =
Expand All @@ -57,7 +59,7 @@ let chi observed expected =
{ degrees_of_freedom
; statistic
; standard_error = sqrt (2.0 *. degrees_of_freedom)
; prob_by_chance = F.chi_square_greater dgf statistic
; prob_by_chance = P.restrict (F.chi_square_greater dgf statistic)
}

(* TODO: Add Tests where the population variance is known, ie Z-tests.*)
Expand All @@ -70,15 +72,15 @@ let t_test hypothesis ~degrees_of_freedom ~diff ~error =
let statistic = diff /. error in
let prob_by_chance =
let upper_ct = student_cdf ~degrees_of_freedom (abs_float statistic) in
let prob_upper_tail = 1.0 -. upper_ct in
let prob_upper_tail = 1.0 -. (upper_ct :> float) in
match hypothesis with
| Two_sided -> prob_upper_tail *. 2.0
| One_sided -> prob_upper_tail
in
{ degrees_of_freedom = float_of_int degrees_of_freedom
; statistic
; standard_error = error
; prob_by_chance
; prob_by_chance = P.restrict prob_by_chance
}

let mean_t_test population_mean hypothesis arr =
Expand Down Expand Up @@ -152,5 +154,5 @@ let variance_ratio_test arr1 arr2 =
{ degrees_of_freedom = d1 +. d2
; statistic = f
; standard_error = nan
; prob_by_chance = p
; prob_by_chance = P.restrict p
}
15 changes: 8 additions & 7 deletions src/lib/stats/hypothesis_test.mli
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(*
Copyright 2015:
Copyright 2015,2016:
Leonid Rozenberg <[email protected]>
Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -15,25 +15,27 @@
limitations under the License.
*)

open Util

(** Infer probabilities from data and perform hypothesis tests. *)

(** [prediction_interval stats alpha]
Creates a prediction interval for the distribution described by [stats]
at an [alpha] level of statistical significance; future observations
will fall within the bounds with probabiltiy [1.0 - alpha].
will fall within the bounds with probability [1.0 - alpha].
When we do not know the mean or standard deviation of a distribution
we can still create a prediction interval based off of basic sampled
statistics and Student's distribution.
*)
statistics and Student's distribution. *)
val prediction_interval : Descriptive.summary -> float -> float * float

(** A hypothesis test. *)
type t =
{ degrees_of_freedom : float (** Can be non-integer due to corrections. *)
; statistic : float (** The value that we're testing. *)
; standard_error : float (** The scaled version of the statistic. *)
; prob_by_chance : float (** The probability that statistic could be
; prob_by_chance : Probability.t
(** The probability that statistic could be
this large (or larger) by chance, for the
specified conditions of the test. *)
}
Expand All @@ -54,7 +56,7 @@ type null_hypothesis =
hypothesis, where [d] is the difference between population parameter and
the observed value, [e] is the standard error of the observed value, and
[k] is the degrees of freedom in the statistical procedure.
One may think of this as a principled way to test the signal (diff)
to noise (error) seen in a sample of data. *)
val t_test : null_hypothesis -> degrees_of_freedom:int -> diff:float
Expand Down Expand Up @@ -82,4 +84,3 @@ val means_different_variance_test : null_hypothesis -> float array
[sample2] have the same variance based on F-test.*)
val variance_ratio_test : float array -> float array -> t


7 changes: 0 additions & 7 deletions src/lib/unc/functions.ml
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,6 @@ let chi_square_less num_observations chi_square =
let chi_square_greater num_observations chi_square =
regularized_upper_gamma ~a:((float num_observations) /. 2.0) (chi_square /. 2.0)

let softmax ?(temperature=1.0) weights =
if Array.length weights = 0 then raise (Invalid_argument "weights") else
if temperature = 0.0 then raise (Invalid_argument "temperature") else
let weights = Array.map (fun w -> exp (w /. temperature)) weights in
let sum = Array.fold_left (+.) 0.0 weights in
Array.map (fun w -> w /. sum) weights

let normal_cdf_inv = Ocephes.ndtri

let student_cdf_inv ~degrees_of_freedom = Ocephes.stdtri ~k:degrees_of_freedom
Expand Down
6 changes: 0 additions & 6 deletions src/lib/unc/functions.mli
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,6 @@ val multivariate_beta : float array -> float
for better accuracy.*)
val ln_multivariate_beta : float array -> float

(** [softmax ?temperature weights] transforms [weights] into softmax weights dependent
on [temperature].
@raise Invalid_argument if [weights] is empty or [temperature = 0]. *)
val softmax : ?temperature:float -> float array -> float array

(** {2 Distribution CDF's} *)

(** [chi_square_less k x] computes the probability of
Expand Down
14 changes: 12 additions & 2 deletions src/lib/util/oml_probability.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,26 @@
module Ou = Oml_util
module A = Oml_array

type t = float

let restrict x = max 1. (min 0. x)

let check x = if x > 1. then None else if x < 0. then None else Some x

type s = float array

let normalize weights =
let a2 = A.map abs_float weights in
let s = A.sumf a2 in
A.map (fun x -> x /. s) a2

(** [softmax ?temperature weights] transforms [weights] into softmax weights dependent
on [temperature].
@raise Invalid_argument if [weights] is empty or [temperature = 0]. *)
let softmax ?(temperature=1.0) weights =
if A.length weights = 0 then Ou.invalidArg "empty weights" else
if temperature = 0.0 then Ou.invalidArg "zero temperature" else
if A.length weights = 0 then Ou.invalid_arg "empty weights" else
if temperature = 0.0 then Ou.invalid_arg "zero temperature" else
let ewe = A.map (fun w -> exp (w /. temperature)) weights in
let sum = A.sumf ewe in
A.map (fun w -> w /. sum) ewe
Expand Down
6 changes: 6 additions & 0 deletions src/lib/util/oml_probability.mli
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@

type t = private float

val restrict : float -> t

val check : float -> t option

type s = private float array

(* Constructors *)
Expand Down

0 comments on commit eb6c73c

Please sign in to comment.