-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
165 lines (123 loc) · 5.14 KB
/
main.py
File metadata and controls
165 lines (123 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
This module defines tbe training of the model based on the various hyperparameters input by the user.
"""
import os
import csv
import argparse
from keras import callbacks
from keras.utils import plot_model
from keras.utils import to_categorical
from sklearn.preprocessing import normalize
from data import *
from Convnet import *
# setting the hyper parameters and other configurations
parser = argparse.ArgumentParser(description="1D Convolutional Network on Histone Modification Signals.")
parser.add_argument('--epochs', default=20, type=int)
parser.add_argument('--batch_size', default=100, type=int)
parser.add_argument('--lr', default=0.001, type=float,
help="Initial learning rate")
parser.add_argument('--lr_decay', default=0.9, type=float,
help="The value multiplied by lr at each epoch. Set a larger value for larger epochs")
parser.add_argument('--data_dir', default='./data',
help="The directory of the data files")
parser.add_argument('--debug', action='store_true',
help="Save weights by TensorBoard")
parser.add_argument('--save_dir', default='./without_spat')
parser.add_argument('--mix', default=False,
help="Train on the data by mixing all cell types")
# parser.add_argument('-w', '--weights', default=None,
# help="The path of the saved weights. Should be specified when testing")
args = parser.parse_args()
print(args)
# If the path of save_dir does not exist create one
if not os.path.exists(args.save_dir):
os.makedirs(args.save_dir)
# If the data is not to be mixed
if not args.mix:
scores = {}
cells = os.listdir(args.data_dir)
count = 1
# Iterate over all cells individually
for cell in cells :
print('-'*50 + 'Start'+ '-'*50)
print("Cell Number: ",count)
count += 1
print("Cell Type: "+cell)
# Load the individual cell data
(x_train, y_train), (x_val, y_val), (x_test, y_test) = load_data(args.data_dir+'/'+cell+'/classification')
filepath = args.save_dir+'/'+cell
if not os.path.exists(filepath):
os.makedirs(filepath)
# define model
model = Conv1DNet(input_shape=x_train.shape[1:])
# Train the model
model.summary()
plot_model(model, filepath+'/model.png', show_shapes=True)
model = train(model=model, data=((x_train, y_train), (x_val, y_val)), args=args, dirs=filepath)
# Test the model
test_model = models.load_model(filepath+'/model.h5')
auc = test(model=test_model, data=(x_test, y_test), filepath=filepath)
scores[cell] = auc
print('-'*50 + ' End '+ '-'*50)
print(' ')
print(' ')
print(' ')
# write the results to a file
w = csv.writer(open("output.csv", "w"))
for key, val in scores.items():
w.writerow([key, val])
# If training on the complete mixed data
else:
print('-'*50+' Loading Data '+'-'*50)
# Load the complete mixed data
(x_train, y_train), (x_val, y_val), (x_test, y_test) = load_data_mix(args)
print('-'*50+' Finished loading Data '+'-'*50)
# define the model
model = Conv1DNet(input_shape=x_train.shape[1:])
model.summary()
plot_model(model, args.save_dir+'/model.png', show_shapes=True)
# Train the model
model = train(model=model, data=((x_train, y_train), (x_val, y_val)), args=args, dirs=args.save_dir)
# test_model = models.load_model(args.save_dir+'/model.h5')
print('-'*50+' Testing Begins '+'-'*50)
scores = {}
cells = os.listdir(args.data_dir)
count = 1
for cell in cells :
print('-'*50 + 'Start'+ '-'*50)
print("Cell Number: ",count)
count += 1
print("Cell Type: "+cell)
filepath = args.data_dir+'/'+cell+'/classification'
# Load data and transform it such that it can be fed to the model
test_data = pd.read_csv(filepath+'/test.csv', header=None)
y_test = test_data[7]
x_test = test_data.drop([0, 1, 7], axis=1)
x_test = np.array(x_test, dtype=float)
y_test = np.array(y_test, dtype=float)
y_test = y_test.reshape((y_test.shape[0]//100, 100))
y_test = y_test[:, 0]
x_test = x_test.reshape((x_test.shape[0]//100,100,x_test.shape[1]))
for i in range(x_test.shape[0]):
s = x_test[i,:,:]
s = normalize(s, axis=1, norm='max')
x_test[i,:,:] = s
a = x_test.sum(1)
a = normalize(a, axis=1, norm='max')
x_test = x_test.reshape((x_test.shape[0],-1))
x_test = np.c_[x_test, a]
x_test = x_test.reshape((x_test.shape[0],-1,1))
y_test = to_categorical(y_test)
if not os.path.exists(args.save_dir+'/'+cell):
os.makedirs(args.save_dir+'/'+cell)
# Test the model
auc = test(model=model, data=(x_test, y_test), filepath=args.save_dir+'/'+cell)
scores[cell] = auc
print('-'*50 + ' End '+ '-'*50)
print(' ')
print(' ')
print(' ')
# write the results to a file
w = csv.writer(open(args.save_dir+'/output-mix.csv', "w"))
for key, val in scores.items():
w.writerow([key, val])