Skip to content

Commit 8ca0c17

Browse files
committed
2016.12.7 lesson 2 small edit
1 parent 43bda18 commit 8ca0c17

1 file changed

Lines changed: 23 additions & 22 deletions

File tree

python_basic/python_basic_lesson_02.ipynb

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@
2828
" * list 用法\n",
2929
" * dict 用法\n",
3030
" * tuple 用法\n",
31-
"* 随机数介绍"
31+
"* 随机数介绍\n",
32+
"* 举例\n",
33+
" * 中文分词介绍\n",
34+
" * 小程序联系"
3235
]
3336
},
3437
{
@@ -1064,49 +1067,47 @@
10641067
},
10651068
{
10661069
"cell_type": "code",
1067-
"execution_count": 22,
1070+
"execution_count": 34,
10681071
"metadata": {
10691072
"collapsed": false
10701073
},
10711074
"outputs": [
1072-
{
1073-
"name": "stderr",
1074-
"output_type": "stream",
1075-
"text": [
1076-
"Building prefix dict from the default dictionary ...\n",
1077-
"Dumping model to file cache /var/folders/j8/7nj196c56plf7tg3rtjxjwhr0000gn/T/jieba.cache\n",
1078-
"Loading model cost 2.197 seconds.\n",
1079-
"Prefix dict has been built succesfully.\n"
1080-
]
1081-
},
10821075
{
10831076
"name": "stdout",
10841077
"output_type": "stream",
10851078
"text": [
1086-
"Full Mode: 今天/ 上海/ 的/ 天气/ 怎么样\n",
1079+
"Full Mode: 今天/ 天上/ 上海/ 的/ 天气/ 怎么/ 怎么样\n",
10871080
"Default Mode: 明天/ 纽约/ 下雨/ 么\n",
10881081
"现在, 天气, 怎么样\n",
1089-
"2016, 年, 第一季度, 支付, 事业部, 交易量, 报表\n",
1082+
"小明, 硕士, 毕业, 于, 中国科学院, 计算所, ,, 后, 在, 日本京都大学, 深造\n",
10901083
"小明, 硕士, 毕业, 于, 中国, 科学, 学院, 科学院, 中国科学院, 计算, 计算所, ,, 后, 在, 日本, 京都, 大学, 日本京都大学, 深造\n"
10911084
]
10921085
}
10931086
],
10941087
"source": [
10951088
"import jieba\n",
10961089
"\n",
1097-
"seg_list = jieba.cut(\"今天上海的天气怎么样\", cut_all=False)\n",
1098-
"print(\"Full Mode: \" + \"/ \".join(seg_list)) # 全模式\n",
1090+
"# 全模式\n",
1091+
"# 把句子中所有的可以称此的词语都扫描出来,速度非常快,但是不能解决歧义\n",
1092+
"seg_list = jieba.cut(\"今天上海的天气怎么样\", cut_all = True)\n",
1093+
"print(\"Full Mode: \" + \"/ \".join(seg_list)) \n",
10991094
"\n",
1100-
"seg_list = jieba.cut(\"明天纽约下雨么\", cut_all=False)\n",
1101-
"print(\"Default Mode: \" + \"/ \".join(seg_list)) # 精确模式\n",
1095+
"# 精确模式\n",
1096+
"# 试图将句子最精确的切开,适合文本分析\n",
1097+
"seg_list = jieba.cut(\"明天纽约下雨么\", cut_all = False)\n",
1098+
"print(\"Default Mode: \" + \"/ \".join(seg_list)) \n",
11021099
"\n",
1103-
"seg_list = jieba.cut(\"现在天气怎么样\") # 默认是精确模式\n",
1100+
"# 默认是精确模式\n",
1101+
"seg_list = jieba.cut(\"现在天气怎么样\") \n",
11041102
"print(\", \".join(seg_list))\n",
11051103
"\n",
1106-
"seg_list = jieba.cut(\"2016年第一季度支付事业部交易量报表\") # 默认是精确模式\n",
1104+
"# 默认是精确模式\n",
1105+
"seg_list = jieba.cut(\"小明硕士毕业于中国科学院计算所,后在日本京都大学深造\") \n",
11071106
"print(\", \".join(seg_list))\n",
11081107
"\n",
1109-
"seg_list = jieba.cut_for_search(\"小明硕士毕业于中国科学院计算所,后在日本京都大学深造\") # 搜索引擎模式\n",
1108+
"# 搜索引擎模式\n",
1109+
"# 在精确模式的基础上,对长词再次切分,提高召回率,适合用于搜索引擎分词 \n",
1110+
"seg_list = jieba.cut_for_search(\"小明硕士毕业于中国科学院计算所,后在日本京都大学深造\") \n",
11101111
"print(\", \".join(seg_list))"
11111112
]
11121113
},
@@ -1248,7 +1249,7 @@
12481249
"name": "python",
12491250
"nbconvert_exporter": "python",
12501251
"pygments_lexer": "ipython3",
1251-
"version": "3.5.1"
1252+
"version": "3.4.4"
12521253
}
12531254
},
12541255
"nbformat": 4,

0 commit comments

Comments
 (0)