Skip to content

Commit 617e0dd

Browse files
committed
the 12th chapter
1 parent 33318be commit 617e0dd

45 files changed

Lines changed: 68929 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

chapter12/.ipynb_checkpoints/1_1pagetypeAnalysis-checkpoint.ipynb

Lines changed: 1759 additions & 0 deletions
Large diffs are not rendered by default.

chapter12/.ipynb_checkpoints/1_2clicktimesAnalysis-checkpoint.ipynb

Lines changed: 1713 additions & 0 deletions
Large diffs are not rendered by default.

chapter12/.ipynb_checkpoints/1_3pagerankAnalysis-checkpoint.ipynb

Lines changed: 966 additions & 0 deletions
Large diffs are not rendered by default.

chapter12/.ipynb_checkpoints/2_2dataChange-checkpoint.ipynb

Lines changed: 2115 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"collapsed": true
8+
},
9+
"outputs": [],
10+
"source": [
11+
"# -*- coding:utf-8 -*-\n",
12+
"# 属性规约:确定模型构建中需要的属性\n",
13+
"import pandas as pd\n",
14+
"import numpy as np\n",
15+
"from pandas import Series,DataFrame\n",
16+
"from sqlalchemy import create_engine\n",
17+
"import MySQLdb as msd"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": null,
23+
"metadata": {},
24+
"outputs": [
25+
{
26+
"name": "stderr",
27+
"output_type": "stream",
28+
"text": [
29+
"D:\\Anaconda2\\lib\\site-packages\\pymysql-0.7.11-py2.7.egg\\pymysql\\cursors.py:165: Warning: (1366, u\"Incorrect string value: '\\\\xD6\\\\xD0\\\\xB9\\\\xFA\\\\xB1\\\\xEA...' for column 'VARIABLE_VALUE' at row 480\")\n",
30+
" result = self._query(query)\n",
31+
"D:\\Anaconda2\\lib\\site-packages\\pandas\\io\\sql.py:1168: UserWarning: The provided table name 'Allformodel_realIP' is not found exactly as such in the database after writing the table, possibly due to case sensitivity issues. Consider using lower case table names.\n",
32+
" warnings.warn(msg, UserWarning)\n"
33+
]
34+
}
35+
],
36+
"source": [
37+
"engine = create_engine('mysql+pymysql://root:@127.0.0.1:3306/jing?charset=utf8')\n",
38+
"sql = pd.read_sql('changed_six', engine, chunksize = 10000)\n",
39+
"\n",
40+
"for i in sql:\n",
41+
" j = i[['realIP','fullURL']].copy()\n",
42+
" j.to_sql('Allformodel_realIP', engine, index=False,if_exists = 'append')\n"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": null,
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"# 获取后续建模需要的数据 咨询(ask)和婚姻(hunyin)数据\n",
52+
"# 读取数据库数据 \n",
53+
"engine = create_engine('mysql+pymysql://root:@127.0.0.1:3306/jing?charset=utf8')\n",
54+
"sql = pd.read_sql('changed_six', engine, chunksize = 10000)\n",
55+
"l1 = 0\n",
56+
"l2 = 0\n",
57+
"for i in sql:\n",
58+
" zixun = i[['realIP','fullURL']][i['fullURL'].str.contains('(ask)|(askzt)')].copy()\n",
59+
"# l1 = len(zixun) + l1\n",
60+
" hunyin = i[['realIP','fullURL']][i['fullURL'].str.contains('hunyin')].copy() \n",
61+
"# l2 = len(hunyin) + l2\n",
62+
" zixun.to_sql('zixunformodel', engine, index=False,if_exists = 'append')\n",
63+
" hunyin.to_sql('hunyinformodel', engine, index=False,if_exists = 'append')\n",
64+
"# print l1,l2"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"metadata": {
71+
"collapsed": true
72+
},
73+
"outputs": [],
74+
"source": []
75+
}
76+
],
77+
"metadata": {
78+
"kernelspec": {
79+
"display_name": "Python 2",
80+
"language": "python",
81+
"name": "python2"
82+
},
83+
"language_info": {
84+
"codemirror_mode": {
85+
"name": "ipython",
86+
"version": 2
87+
},
88+
"file_extension": ".py",
89+
"mimetype": "text/x-python",
90+
"name": "python",
91+
"nbconvert_exporter": "python",
92+
"pygments_lexer": "ipython2",
93+
"version": "2.7.13"
94+
}
95+
},
96+
"nbformat": 4,
97+
"nbformat_minor": 2
98+
}

0 commit comments

Comments
 (0)