-
Notifications
You must be signed in to change notification settings - Fork 3
/
gaussian.py
81 lines (72 loc) · 3.24 KB
/
gaussian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# functions related to Guassian kernel functions or normal distributions
import config
from math import exp
from scipy.stats import multivariate_normal
def gaussian_log_prob(x, bandwidth):
""" Evaluate a Gaussian function at x for a distribution centered on zero
with a height of 1 given a bandwidth. Return the natural log of the
probability. Used as a distance decay function.
See: https://en.wikipedia.org/wiki/Gaussian_function
"""
return -(x**2 / (2 * bandwidth**2))
# unlogged: return exp(-(x**2 / (2 * bandwidth**2)))
def min_peak(GPS_error_sd, total_time):
"""
Estimate minimum peak height given time threshold and variance parameters.
We assume that the volume under the total KDE PDF ~= 1.
- total_time is the sum of the time weights used in the KDE.
- threshold_time is the minimum activity time.
Times are given in seconds
"""
total_variance = GPS_error_sd**2 + config.kernel_bandwidth**2
peak_height = multivariate_normal.pdf([0.5, 0.5], [0, 0],
[total_variance, total_variance])
# this is the peak height if we have no movement and
# total_time == threshold_time
assert total_time > config.minimum_activity_time
rv = peak_height * (float(config.minimum_activity_time) / total_time)
return rv
def kde(input_points,eval_points):
"""Do weighted 2d KDE in R KS package, returning python results.
Returns list of P estimates corresponding to estimate_points.
input_points should be a list of points with x,y,weight attributes
eval_points are points where the PDF will be estimated (not elsewhere).
Another possible way of doing KDE (in Python) is with
http://pysal.readthedocs.io/en/latest/users/tutorials/
smoothing.html#non-parametric-smoothing ?
or with http://scikit-learn.org/stable/modules/density.html ?
But I'm not convinced yet that it's worthwhile to explore these.
R is quite fast and works fine.
"""
# import R tools
from rpy2.robjects.packages import importr # for importing packages
ks = importr('ks') # load the ks package (making kde function available)
from rpy2.robjects import r, FloatVector # variable names
cbind, diag = r['cbind'], r['diag'] # get some basic R functions into Python
# create the inputs from input_points
x_in, y_in = [p.x for p in input_points], [p.y for p in input_points]
weights = [p.weight for p in input_points]
# standardize the weights to the sample size
if sum(weights) != len(weights):
adjust_factor = len(weights) / float(sum(weights))
weights = [w * adjust_factor for w in weights]
# get eval_points processed for R
x_ev, y_ev = [p.x for p in eval_points], [p.y for p in eval_points]
# do the KDE
if config.debug_output:
print('\tRunning KDE on', len(input_points), 'points')
bandwidth = config.kernel_bandwidth
surface = ks.kde(
x = cbind(FloatVector(x_in), FloatVector(y_in)),
eval_points = cbind(FloatVector(x_ev), FloatVector(y_ev)),
w = FloatVector(weights),
H = diag( FloatVector([bandwidth**2, bandwidth**2]) ),
binned = False
)
estimates = surface.rx2('estimate')
# assign PDF estimates to the eval_points themselves
# We don't want rpy2 syntax leaving this function
assert len(estimates) == len(eval_points)
for i in range(0, len(estimates)):
eval_points[i].kde_p = estimates.rx(i+1)[0]
# (the index difference is due to the different styles of R and Python)