forked from alicelmx/DataAnalysisbyPython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommonlast.py
More file actions
83 lines (68 loc) · 1.98 KB
/
commonlast.py
File metadata and controls
83 lines (68 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#coding:utf-8
# -*- coding: gbk -*-
"""
Created on Fri Mar 03 14:39:34 2017
@author: Zhu Wen Jing
"""
import re
# import chardet
f = open(r'C:\Users\Zhu Wen Jing\Desktop\PythonStudy\WP_Ict_test.txt', 'r')
f1 = f.readlines() # f1是一个列表类型
lines1 = len(f1)
# print f1
print '1————————————————————————'
a1 = []
b1 = []
# 此处去除头部end及序号
for i in range(lines1):
b1 = f1[i].split()
del b1[0]
a1.extend(b1)
f.close()
print a1
print '2————————————————————————'
mm1 = str(a1)
# 统计该文本中wiki词性的词的个数
print mm1.count('/wiki')
f = open(r'C:\Users\Zhu Wen Jing\Desktop\PythonStudy\WP_Ict_test2.txt', 'r')
f2 = f.readlines() # f1是一个列表类型
lines2 = len(f2)
# print f2
print '1————————————————————————'
a2 = []
b2 = []
# 此处去除头部end及序号
for i in range(lines2):
b2 = f2[i].split()
del b2[0]
a2.extend(b2)
f.close()
print a2
print '2————————————————————————'
mm2 = str(a2)
# 统计该文本中wiki词性的词的个数
print mm2.count('/wiki')
c1 = set (a1)
c2 = set (a2)
print len(c1.difference(c2))
print len(c1.intersection(c2))
print len(c1-c2)
print len(c2.difference(c1))
print len(c2.intersection(c1))
print len(c2-c1)
fil = open(r'C:\Users\Zhu Wen Jing\Desktop\PythonStudy\WP_Ict_test-WP_Ict_test2.txt','w')
for i in c1.difference(c2):
fil.write(i+' ')
fil.close()
fil = open(r'C:\Users\Zhu Wen Jing\Desktop\PythonStudy\WP_Ict_test2-WP_Ict_test.txt', 'w')
for i in c2.difference(c1):
fil.write(i+' ')
fil.close()
fil = open(r'C:\Users\Zhu Wen Jing\Desktop\PythonStudy\WP_Ict_test_WP_Ict_test2.txt', 'w')
for i in c1.intersection(c2):
fil.write(i+' ')
fil.close()
fil = open(r'C:\Users\Zhu Wen Jing\Desktop\PythonStudy\WP_Ict_test+WP_Ict_test2.txt', 'w')
for i in c1.union(c2):
fil.write(i+' ')
fil.close()