forked from PantsuDango/Dango-Translator
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbaidufanyi.py
More file actions
113 lines (83 loc) · 3.54 KB
/
baidufanyi.py
File metadata and controls
113 lines (83 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# 面向对象
# 百度翻译 -- 网页版(自动获取token,sign)
import requests
import js2py
import json
import re
from traceback import print_exc
class BaiduWeb():
"""百度翻译网页版爬虫"""
def __init__(self, query_str):
self.session = requests.session()
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}
self.session.headers = headers
self.baidu_url = "https://www.baidu.com/"
self.root_url = "https://fanyi.baidu.com/"
self.lang_url = "https://fanyi.baidu.com/langdetect"
self.trans_url = "https://fanyi.baidu.com/v2transapi"
self.query_str = query_str
def get_token_gtk(self):
'''获取token和gtk(用于合成Sign)'''
self.session.get(self.root_url)
resp = self.session.get(self.root_url)
html_str = resp.content.decode()
token = re.findall(r"token: '(.*?)'", html_str)[0]
gtk = re.findall(r"window.gtk = '(.*?)'", html_str)[0]
return token,gtk
def generate_sign(self,gtk):
"""生成sign"""
# 1. 准备js编译环境
context = js2py.EvalJs()
with open('.\\config\\webtrans.js', encoding='utf8') as f:
js_data = f.read()
js_data = re.sub("window\[l\]",'"'+gtk+'"',js_data)
# js_data = re.sub("window\[l\]", "\"{}\"".format(gtk), js_data)
# print(js_data)
context.execute(js_data)
sign = context.e(self.query_str)
return sign
def lang_detect(self):
'''获取语言转换类型.eg: zh-->en'''
lang_resp = self.session.post(self.lang_url,data={"query":self.query_str})
lang_json_str = lang_resp.content.decode() # {"error":0,"msg":"success","lan":"zh"}
lan = json.loads(lang_json_str)['lan']
to = "en" if lan == "zh" else "zh"
return lan,to
def parse_url(self,post_data):
trans_resp = self.session.post(self.trans_url,data=post_data)
trans_json_str = trans_resp.content.decode()
trans_json = json.loads(trans_json_str)
self.result = trans_json["trans_result"]["data"][0]["dst"]
def run(self):
try:
"""实现逻辑"""
# 1.获取百度的cookie,(缺乏百度首页的cookie会始终报错998)
self.session.get(self.baidu_url)
# 2. 获取百度翻译的token和gtk(用于合成sign)
token, gtk = self.get_token_gtk()
# 3. 生成sign
sign = self.generate_sign(gtk)
# 4. 获取语言转换类型.eg: zh-->en
lan, to = self.lang_detect()
# 5. 发送请求,获取响应,输出结果
post_data = {
#"from": lan,
"from": lan,
"to": to,
"query": self.query_str,
"transtype": "realtime",
"simple_means_flag": 3,
"sign": sign,
"token": token
}
self.parse_url(post_data)
except Exception:
print_exc()
self.result = '网页百度:我抽风啦!'
return self.result
if __name__ == '__main__':
webfanyi = BaiduWeb('一歩ひくと见えてくる 何かの中にどっぷり浸かっていると何がなんだか分からなくなってしまうことがある。')
a = webfanyi.run()
print(a)