-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
94 lines (83 loc) · 2.45 KB
/
main.py
File metadata and controls
94 lines (83 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
李偉的第一個gmm模型用來做聚類
"""
print(__doc__)
from scipy.stats import multivariate_normal as MN
import numpy as np
from sklearn import preprocessing
from sklearn.datasets.samples_generator import make_blobs
from sklearn import metrics
from sklearn import cluster, datasets, mixture
from pprint import pprint
## INIT PROCESS
ITER = 50
CLUSTER_NUM = 2
centers = [[2, 2], [-2, -2], [2, -2],[-2,2]]
"""
DATA, LABEL = make_blobs(n_samples=1000, centers=centers, cluster_std=0.9)
"""
DATA, LABEL = datasets.make_circles(
n_samples=1000,
factor=.5,
noise=.05
)
FEAT_NUM = DATA.shape[1]
INS_NUM = DATA.shape[0]
MEAN = DATA.mean(axis=0)
MIN,MAX = DATA.min(axis=0),DATA.max(axis=0)
WEIGHT = np.random.random_sample((1,CLUSTER_NUM))
#WEIGHT = [0.3,0.7]
##DISTRI
DISTRI = [
MN(mean=np.random.uniform(MIN,MAX))
for i in range(CLUSTER_NUM)
]
for i in range(ITER):
dis_all = np.array([
[
DISTRI[c].pdf(d)
for c in range(CLUSTER_NUM)
]
for d in DATA
],dtype=np.float64)
dis_all = dis_all * WEIGHT
dis_all = dis_all.transpose()/dis_all.sum(axis=1)
dis_all = dis_all.transpose()
WEIGHT = dis_all.sum(axis=0,keepdims=1)/INS_NUM
print(WEIGHT)
label = np.argmax(dis_all,axis=1)
loss = metrics.adjusted_rand_score(LABEL, label)
print("iter %s loss %.3f" % (i,loss))
mu_mo = np.matmul(DATA.transpose(),dis_all)
mu_no = dis_all.sum(axis=0,keepdims=1)
mu_mat = mu_mo/mu_no
cov_mat = []
for c in range(CLUSTER_NUM):
#tmp = np.eye(FEAT_NUM,FEAT_NUM)
tmp = np.zeros([FEAT_NUM,FEAT_NUM])
for d in range(INS_NUM):
D = DATA[d]-mu_mat.transpose()[c]
D = np.expand_dims(D,axis=0)
tmp += (
np.matmul(
D.transpose(),
D
) * dis_all[d][c]
)
cov_mat.append(tmp/mu_no[0][c])
DISTRI = [
MN(mean=mu_mat.transpose()[i],cov=cov_mat[i])
for i in range(CLUSTER_NUM)
]
pprint(dis_all)
pprint(cov_mat)
#print([LABEL,np.argmax(dis_all,axis=1)])
label = np.argmax(dis_all,axis=1)
import matplotlib.pyplot as plt
from itertools import cycle
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(CLUSTER_NUM), colors):
my_members = label == k
plt.plot(DATA[my_members, 0], DATA[my_members, 1], col + '.')
plt.title('Estimated number of clusters: %d' % CLUSTER_NUM)
plt.show()