forked from shibing624/python-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.py
More file actions
52 lines (44 loc) · 1.71 KB
/
train.py
File metadata and controls
52 lines (44 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# -*- coding: utf-8 -*-
# Author: XuMing <[email protected]>
# Brief:
from feature import Feature
from lr import LR
from xgb_lr import XGBLR
from xgb_util import load_sample_data
train_file = "./data/training_seg.txt"
sep = '\t'
max_feature_cnt = 1000
feature_max_df = 0.95
feature_min_df = 3
ngram_range = (1, 2)
model_path = './data/'
tfidf_model_name = model_path + 'tfidf_feature.model'
best_feature_model_name = model_path + 'best_feature.model'
xgb_model_name = model_path + 'xgb.model'
lr_model_name = model_path + 'lr.model'
xgblr_xgb_model_name = model_path + 'xgblr_xgb.model'
xgblr_lr_model_name = model_path + 'xgblr_lr.model'
one_hot_encoder_model_name = model_path + 'xgblr_ont_hot_encoder.model'
def model_train(train_file):
train_x, train_y = load_sample_data(train_file, sep=sep, has_pos=True)
features = Feature(tfidf_model_name, best_feature_model_name)
features.fit(max_feature_cnt, feature_max_df,
feature_min_df, ngram_range, train_x, train_y)
model_train_x_feature = features.transform(train_x)
# xgboost
print('train a single xgb model...')
xgb_clf = LR(xgb_model_name)
xgb_clf.train_model(model_train_x_feature, train_y)
print('train a single xgb model done.\n')
# lr
print('train a single lr model...')
lr_clf = LR(lr_model_name)
lr_clf.train_model(model_train_x_feature, train_y)
print('train a single LR model done.\n')
# xgboost+lr
print('train a xgboost+lr model...')
xgb_lr_clf = XGBLR(xgblr_xgb_model_name, xgblr_lr_model_name, one_hot_encoder_model_name)
xgb_lr_clf.train_model(model_train_x_feature, train_y)
print('train a xgboost+lr model done.\n')
if __name__ == "__main__":
model_train(train_file=train_file)