forked from probml/pmtk3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
167 lines (138 loc) · 4.75 KB
/
utils.py
File metadata and controls
167 lines (138 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python3
# Miscellaneous utility functions
import os
import scipy.io as sio
import numpy as np
import glob
def nsubplots(n):
'''Returns [ynum, xnum], which how many plots in the y and x directions to
cover n in total while keeping the aspect ratio close to rectangular'''
if n==2:
ynum = 2; xnum = 2;
else:
xnum = np.ceil(np.sqrt(n));
ynum = np.ceil(n/np.float(xnum));
return ynum, xnum
def add_ones(X):
"""Add a column of ones to X"""
n = len(X)
return np.column_stack((np.ones(n), X))
def degexpand(X, deg, add_ones=False):
"""Expand input vectors to contain powers of the input features"""
n = len(X)
xx = X
for i in xrange(1, deg):
xx = np.column_stack((xx, np.power(X, i + 1)))
if add_ones:
xx = np.column_stack((np.ones(n), xx))
return xx
def rescale_data(X, min_val=-1, max_val=1, minx=None, rangex=None):
"""
Rescale columns to lie in the range
[min_val, max_val] (defaults to [-1,1]])
"""
if minx is None:
minx = X.min(axis=0)
if rangex is None:
rangex = X.max(axis=0) - X.min(axis=0)
return (max_val - min_val) * (X - minx) / rangex + min_val
def center_cols(X, mu=None):
"""
Make each column be zero mean
"""
if mu is None:
mu = X.mean(axis=0)
return X - mu, mu
def mk_unit_variance(X, s=None):
"""
Make each column of X be variance 1
"""
if s is None:
s = X.std(axis=0)
try:
len(s)
s[s < np.spacing(1)] = 1
except TypeError:
s = s if s > np.spacing(1) else 1
return X / s, s
class preprocessor_create():
def __init__(self, standardize_X=False, rescale_X=False, kernel_fn=None,
poly=None, add_ones=False):
self.standardize_X = standardize_X
self.rescale_X = rescale_X
self.kernel_fn = kernel_fn
self.poly = poly
self.add_ones = add_ones
def poly_data_make(sampling="sparse", deg=3, n=21):
"""
Create an artificial dataset
"""
np.random.seed(0)
if sampling == "irregular":
xtrain = np.concatenate(
(np.arange(-1, -0.5, 0.1), np.arange(3, 3.5, 0.1)))
elif sampling == "sparse":
xtrain = np.array([-3, -2, 0, 2, 3])
elif sampling == "dense":
xtrain = np.arange(-5, 5, 0.6)
elif sampling == "thibaux":
xtrain = np.linspace(0, 20, n)
xtest = np.arange(0, 20, 0.1)
sigma2 = 4
w = np.array([-1.5, 1/9.])
fun = lambda x: w[0]*x + w[1]*np.square(x)
if sampling != "thibaux":
assert deg < 4, "bad degree, dude %d" % deg
xtest = np.arange(-7, 7, 0.1)
if deg == 2:
fun = lambda x: (10 + x + np.square(x))
else:
fun = lambda x: (10 + x + np.power(x, 3))
sigma2 = np.square(5)
ytrain = fun(xtrain) + np.random.normal(0, 1, xtrain.shape) * \
np.sqrt(sigma2)
ytestNoisefree = fun(xtest)
ytestNoisy = ytestNoisefree + np.random.normal(0, 1, xtest.shape) * \
np.sqrt(sigma2)
return xtrain, ytrain, xtest, ytestNoisefree, ytestNoisy, sigma2
def load_mat(matName):
"""look for the .mat file in pmtk3/pmtkdataCopy/
currently only support .mat files create by Matlab 5,6,7~7.2,
"""
print 'looking for ', matName, ' in ', DATA_DIR
try:
data = sio.loadmat(os.path.join(DATA_DIR, matName))
except NotImplementedError:
raise
except FileNotFoundError:
raise
return data
def generate_rst():
"""generate chX.rst in current working directory"""
cwd = os.getcwd()
demo_dir = os.path.join(cwd, 'demos')
chapters = os.listdir(demo_dir)
for chapter in chapters:
if not os.path.isdir(os.path.join(demo_dir, chapter)):
continue
reg_py = os.path.join(demo_dir, chapter, '*.py')
scripts = glob.glob(reg_py)
rst_file = chapter + '.rst'
rst_file = os.path.join(demo_dir, chapter, rst_file)
with open(rst_file, 'w') as f:
f.write(chapter)
f.write('\n========================================\n')
for script in scripts:
script_name = os.path.basename(script)
f.write('\n' + script_name[:-3])
f.write('\n----------------------------------------\n')
reg_png = os.path.join(demo_dir,
chapter,
script_name[:-3] + '*.png')
for img in glob.glob(reg_png):
img_name = os.path.basename(img)
f.write(".. image:: " + img_name + "\n")
f.write(".. literalinclude:: " + script_name + "\n")
if __name__ == '__main__':
generate_rst()
print("Finished generate chX.rst!")