-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsearch.py
More file actions
138 lines (132 loc) · 5.02 KB
/
search.py
File metadata and controls
138 lines (132 loc) · 5.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#python搜索关键词 统计数量脚本
import os
import xlrd
import xlwt
from xlutils.copy import copy
from docx import Document
#读取目录下所有文件名
def ListFilesToTxt(dir, extype):
exts = extype.split(" ")
to_list = []
files = os.listdir(dir)
for name in files:
for ext in exts:
if (name.endswith(ext)):
to_list.append(name)
return to_list
#数据写入excel中
def write_excel_xls_append(path, value):
index = len(value) # 获取需要写入数据的行数
workbook = xlrd.open_workbook(path) # 打开工作簿
sheets = workbook.sheet_names() # 获取工作簿中的所有表格
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数
new_workbook = copy(workbook) # 将xlrd对象拷贝转化为xlwt对象
new_worksheet = new_workbook.get_sheet(0) # 获取转化后工作簿中的第一个表格
for row in range(1,rows_old):
case_name = worksheet.cell_value(row, 1) #获取单元格值
for i in range(0, index):
if value[i][0] == case_name:
for k in range(1, len(value[i])):
new_worksheet.write(row, 12+k, value[i][k]) # 统计数据从N开始 14
break
new_workbook.save(path) # 保存工作簿
print("xls格式表格【追加】写入数据成功!")
if __name__ == '__main__':
#列名 案例名称
cloumn = ['序号','案例名称','单位名称','案例来源','所属行业','云计算','大数据','人工智能','物联网','机器人/无人机','区块链','AR/VR','移动通信']
bigdata = ['大数据','数据挖掘','数据分析']
ai = ['人工智能','图像识别', '人脸识别' ,'语音识别', '深度学习', '机器学习', '联邦学习','语义分析']
robot = ['机器人','无人机']
vr_ar = ['VR','AR','虚拟现实','增强现实']
blockchain = ['区块链','以太坊','智能合约','比特币','加密货币','去中心化' ]
mobileconnect = ['移动通信','移动互联']
dir_path = "./" #目录名
all_files = ListFilesToTxt(dir_path,"docx")
print("找到docx文件数量:")
print(len(all_files))
serial_num = 1
dic_word = []
search_file = []
for path in all_files:
document = Document(path)
tables = document.tables #获取文件中的表格集
if tables is None or len(tables) == 0:
continue
else:
table0 = tables[0] #获取文件中的第一个表格
dic = []
#序号
search_file.append(path)
# 案例名称
name = table0.cell(0,1).text
dic.append(name)
# 单位名称
#company_name = table0.cell(1,1).text
#dic.append(company_name)
#dic.append("案例征集")
#所属行业
#industry = table0.cell(4,1).text
#dic.append(industry)
result = ""
for i in range(1,len(table0.rows)):
#读取第2列的数据
try :
result += table0.cell(i,1).text
except IndexError:
pass
#1.云计算
if result.find('云计算') != -1:
dic.append(1)
else:
dic.append(" ")
#2.大数据
big_flag = " "
for j in bigdata:
if result.find(j) != -1:
big_flag = 1
break
dic.append(big_flag)
#3.人工智能
ai_flag = " "
for h in ai:
if result.find(h) != -1:
ai_flag = 1
break
dic.append(ai_flag)
#4.物联网
if result.find('物联网') != -1:
dic.append(1)
else :
dic.append(" ")
#5.机器人/无人机
rb_flag = " "
for k in robot:
if result.find(k) != -1:
rb_flag = 1
dic.append(rb_flag)
#6.区块链
blockchain_falg = " "
for b in blockchain:
if result.find(b) != -1:
blockchain_falg = 1
dic.append(blockchain_falg)
#7.VR/AR
va_falg = " "
for l in vr_ar:
if result.find(l) != -1:
va_falg = 1
dic.append(va_falg)
#8.移动通信
mobile_co_falg = " "
for m in mobileconnect:
if result.find(m) != -1:
mobile_co_falg = 1
dic.append(mobile_co_falg)
serial_num += 1
dic_word.append(dic)
xlsx_path = ".\案例分析-阶段-汇总test1.xlsx"
write_excel_xls_append(xlsx_path,dic_word)
print("一共找到匹配的数据总数:")
print(len(dic_word))
print(dic_word)