-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmainTrainClassifier.py
More file actions
86 lines (67 loc) · 3.49 KB
/
mainTrainClassifier.py
File metadata and controls
86 lines (67 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import numpy as np
from utils.DatasetOptions import DatasetOptions
from utils.Dataset import Dataset
from utils.Results import Results
from learning.ClassifierRF import ClassifierRF
from learning.ClassifierRF import OptionsRF
from learning.ClassifierLogisticRegression import ClassifierLogisticRegression
from learning.ClassifierLogisticRegression import OptionsLogisticRegression
from learning.ClassifierSVM import OptionsSVM
from learning.ClassifierSVM import ClassifierSVM
import helpers.constants as constantsPATREC
if __name__ == '__main__':
# dirProject = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + '/';
dirProject = '/home/thomas/fusessh/scicore/projects/patrec'
dirData = os.path.join(dirProject, 'data');
dirResultsBase = os.path.join(dirProject, 'results');
dirModelsBase = os.path.join(dirProject, 'classifiers')
dict_options_dataset_training = {
'dir_data': dirData,
'data_prefix': 'patrec',
'dataset': '20122015',
'grouping': 'verylightgrouping',
'encoding': 'categorical',
'newfeatures': {'names': constantsPATREC.NEW_FEATURES},
'featurereduction': None,
'filtering': 'EntlassBereich_Gyn'
}
options_training = DatasetOptions(dict_options_dataset_training);
dataset_training = Dataset(dataset_options=options_training);
early_readmission_flagname = options_training.getEarlyReadmissionFlagname();
print('dataset filename: ' + str(dataset_training.getFilename()))
dict_opt_rf = {'n_estimators': 500, 'max_depth': 50};
options_rf = OptionsRF(dirModelsBase, options_training.getFilenameOptions(filteroptions=True), options_clf=dict_opt_rf);
clf_rf = ClassifierRF(options_rf);
dict_opt_lr = {'penalty': 'l1', 'C': 0.5};
options_lr = OptionsLogisticRegression(dirModelsBase, options_training.getFilenameOptions(filteroptions=True), options_clf=dict_opt_lr);
clf_lr = ClassifierLogisticRegression(options_lr);
dict_options_svm = {'kernel': 'rbf', 'C': 1.0};
options_svm = OptionsSVM(dirModelsBase, options_training.getFilenameOptions(filteroptions=True), options_clf=dict_options_svm)
clf_svm = ClassifierSVM(options_svm);
options_clf = options_lr
clf = clf_lr;
results_all_runs_train = Results(dirResultsBase, options_training, options_clf, 'train');
results_all_runs_eval = Results(dirResultsBase, options_training, options_clf, 'eval');
num_runs = 10;
eval_aucs = [];
for run in range(0,num_runs):
print('');
[df_balanced_train, df_balanced_eval] = dataset_training.getBalancedSubsetTrainingAndTesting();
clf.train(df_balanced_train, early_readmission_flagname);
results_train = clf.predict(df_balanced_train, early_readmission_flagname);
results_eval = clf.predict(df_balanced_eval, early_readmission_flagname);
results_all_runs_train.addResultsSingleRun(results_train);
results_all_runs_eval.addResultsSingleRun(results_eval);
auc_train = results_train.getAUC();
auc_eval = results_eval.getAUC();
print('train auc: ' + str(auc_train));
print('eval auc: ' + str(auc_eval));
clf.save(run);
clf.saveLearnedFeatures(run)
eval_aucs.append(auc_eval);
results_all_runs_train.writeResultsToFileDataset();
results_all_runs_eval.writeResultsToFileDataset();
print('')
print('mean eval auc: ' + str(np.mean(np.array(eval_aucs))))
print('')