-
Notifications
You must be signed in to change notification settings - Fork 3
/
my_mystery_module.py
122 lines (91 loc) · 2.78 KB
/
my_mystery_module.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
from scipy import stats
def kmeans(X, n, n_iter=100, centroids=None):
"""
simple k-means implemenmtation
Parameters:
-----------
X: array (n_samples, n_dim)
the data
n : int
number of clusters
n_iter : int
number of iterations
centroids : array (optional)
Returns:
--------
centroids : array
labels : array
"""
def assign(centroids):
dist = np.average(np.square(X[:, :, np.newaxis] - centroids.T), axis=1)
return np.argmin(dist, axis=1)
def get_centroids(labels):
centroids = []
for i in range(np.max(labels)+1):
centroids.append(np.average(X[labels==i], axis=0))
return np.stack(centroids)
if centroids is None:
# start with random seed
centroids = np.random.rand(n, X.shape[1])
# scale to roughly match the data
centroids *= np.std(X, axis=0)
centroids += np.mean(X, axis=0)
labels = assign(centroids)
# iterate
for i in range(n_iter):
centroids = get_centroids(labels)
labels = assign(centroids)
return centroids, labels
def gmm(X, n, n_iter=100, mus=None, covs=None):
"""
simple GMM implemenmtation (2d right now)
Parameters:
-----------
X: array (n_samples, n_dim)
the data
n : int
number of gaussians
n_iter : int
number of iterations
mus : array (optional)
covs = array (optional)
Returns:
--------
mus : array
mean values
covs : array
covariances
exps : array
probabilities
"""
def E(mus, covs):
probs = []
exps = []
for i in range(n):
probs.append(stats.multivariate_normal(mus[i], covs[i]).pdf(X))
probs = np.array(probs)
tot_prob = np.sum(probs, axis=0)
for i in range(n):
exps.append(probs[i] / tot_prob)
return np.nan_to_num(exps)
def M(exps):
mus = []
covs = []
for i in range(n):
mus.append(np.average(X, weights=exps[i], axis=0))
covs.append(np.cov(X.T, aweights=exps[i]))
return mus, covs
if mus is None:
std = np.std(X, axis=0)
mus = np.random.rand(n, X.shape[1])
# scale to roughly match the data
mus *= std
mus += np.mean(X, axis=0)
# some random covariances
covs = np.random.rand(n, X.shape[1], X.shape[1])
covs = covs @ np.swapaxes(covs, 1, 2)
for i in range(n_iter):
exps = E(mus, covs)
mus, covs = M(exps)
return mus, covs, np.array(E(mus, covs)).T