forked from shibing624/python-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
92 lines (75 loc) · 2.92 KB
/
test.py
File metadata and controls
92 lines (75 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding: utf-8 -*-
# Author: XuMing <[email protected]>
# Data: 18/1/25
# Brief:
import unittest
from util import load_sample_data
from feature import Feature
from lr import LR
from xgb import XGB
from xgb_lr import XGBLR
train_file = "./data/train.data.sample"
test_file = "./data/test.data.sample"
max_feature_cnt = 40
feature_max_df = 0.55
feature_min_df = 3
ngram_range = (1, 2)
model_path = './data/'
tfidf_model_name = model_path + 'tfidf_feature.model'
best_feature_model_name = model_path + 'best_feature.model'
xgb_model_name = model_path + 'xgb.model'
lr_model_name = model_path + 'lr.model'
xgblr_xgb_model_name = model_path + 'xgblr_xgb.model'
xgblr_lr_model_name = model_path + 'xgblr_lr.model'
one_hot_encoder_model_name = model_path + 'xgblr_ont_hot_encoder.model'
class ClassificationTest(unittest.TestCase):
"""Test Case for classification
"""
@classmethod
def setUpClass(cls):
pass
@classmethod
def tearDownClass(cls):
pass
def test_init(self):
print("test_init")
"""测试初始化函数,捕捉异常"""
data_x, data_y = load_sample_data(train_file)
self.assertEqual(len(data_x) > 0, True)
def model_train(self, train_file):
train_x, train_y = load_sample_data(train_file)
features = Feature(tfidf_model_name, best_feature_model_name)
features.fit(max_feature_cnt, feature_max_df,
feature_min_df, ngram_range, train_x, train_y)
model_train_x_feature = features.transform(train_x)
# xgboost
print('train a single xgb model...')
xgb_clf = LR(xgb_model_name)
xgb_clf.train_model(model_train_x_feature, train_y)
print('train a single xgb model done.\n')
# lr
print('train a single lr model...')
lr_clf = LR(lr_model_name)
lr_clf.train_model(model_train_x_feature, train_y)
print('train a single LR model done.\n')
# xgboost+lr
print('train a xgboost+lr model...')
xgb_lr_clf = XGBLR(xgblr_xgb_model_name, xgblr_lr_model_name, one_hot_encoder_model_name)
xgb_lr_clf.train_model(model_train_x_feature, train_y)
print('train a xgboost+lr model done.\n')
def model_test(self, test_file):
test_x, test_y = load_sample_data(test_file)
features = Feature(tfidf_model_name, best_feature_model_name)
features.load_model()
model_test_x_feature = features.transform(test_x)
xgb_clf = XGB(xgb_model_name)
xgb_clf.test_model(model_test_x_feature, test_y)
lr_clf = LR(lr_model_name)
lr_clf.test_model(model_test_x_feature, test_y)
xgb_lr_clf = XGBLR(xgblr_xgb_model_name, xgblr_lr_model_name, one_hot_encoder_model_name)
xgb_lr_clf.test_model(model_test_x_feature, test_y)
def test_models(self):
self.model_train(train_file)
self.model_test(test_file=test_file)
if __name__ == '__main__':
unittest.main()