python
项目一:爬虫
项目背景
公司网站被黑客共计了,导致百度能搜索到30万条记录。
解决方法:上传死链接到百度,百度可以自动消除。
由于连接太多,手动处理太慢。第一次自动化用的是控制台js语句+键盘模拟器处理。缺点,键盘模拟器点击位置容易出错。改用python后速度快多了
项目代码
import requests # 发送请求
import random # 随机
from bs4 import BeautifulSoup # 解析页面
from time import sleep # 等待间隔
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
"Connection": "keep-alive",
"Accept-Encoding": "gzip, deflate, br",
"Host": "www.baidu.com",
# 需要更换Cookie
"Cookie": "BIDUPSID=76DBD5A86BE0BD7625AADFB8B90EAD27; PSTM=1640784058; __yjs_duid=1_468e8de14fcb94e78193b0ae56115b251641473993177; BAIDUID=76DBD5A86BE0BD76327198379EDC9366:SL=0:NR=10:FG=1; BDUSS=VYb35OUFZLeU53SEM0WVFTU2loVy1oRG10SjRwYmx3REdVYVY3blYyMFpMenhqRVFBQUFBJCQAAAAAAAAAAAEAAAA73gUCbXRoc29sYXIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABmiFGMZohRjRH; BDUSS_BFESS=VYb35OUFZLeU53SEM0WVFTU2loVy1oRG10SjRwYmx3REdVYVY3blYyMFpMenhqRVFBQUFBJCQAAAAAAAAAAAEAAAA73gUCbXRoc29sYXIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABmiFGMZohRjRH; BDSFRCVID=XuLOJeCmHRbrH05jabgLKwTWk2KK0gOTHllnJB-JkWIwb-KVJeC6EG0Ptf8g0KubuTkzogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tbCeoK0-tDt3qn7I5KIhDjo-qxbXqhvtBgOZ0lOEWUoDqlPxjt6H-4-TypJnL-PHW20j0h7mWnRSDlctqf6O3UFW-mciLfT-0bc4KKJxbnLWeIJo5t5h3-PhhUJiB5OMBan7_qvIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtpChbRO4-TF5j5vyjU5; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=37377_36555_37355_37300_36885_37402_37406_36789_37260_26350_37364; BAIDUID_BFESS=76DBD5A86BE0BD76327198379EDC9366:SL=0:NR=10:FG=1; BDSFRCVID_BFESS=XuLOJeCmHRbrH05jabgLKwTWk2KK0gOTHllnJB-JkWIwb-KVJeC6EG0Ptf8g0KubuTkzogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tbCeoK0-tDt3qn7I5KIhDjo-qxbXqhvtBgOZ0lOEWUoDqlPxjt6H-4-TypJnL-PHW20j0h7mWnRSDlctqf6O3UFW-mciLfT-0bc4KKJxbnLWeIJo5t5h3-PhhUJiB5OMBan7_qvIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtpChbRO4-TF5j5vyjU5; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=1; BA_HECTOR=00ala405a100aha12l0kr68j1hitloe19; ZFY=q2Ougyzq8HanC6MrzZ:Al5LCZbJiQLnlR97TmNk4rMn0:C"
}
def main():
page = 1
print('开始爬取第{}页'.format(page + 1))
wait_seconds = random.uniform(1, 2) # 等待时长秒
print('开始等待{}秒'.format(wait_seconds))
sleep(wait_seconds) # 随机等待
v_keyword = 'site:www.tusenergy.com'
url = 'https://www.baidu.com/s?wd=' + v_keyword + '&pn=' + str(page * 10)
r = requests.get(url, headers=headers)
html = r.text
print('响应码是:{}'.format(r.status_code))
soup = BeautifulSoup(html, 'html.parser')
print(soup)
result_list = soup.find_all(class_='result c-container new-pmd')
print('正在爬取:{},共查询到{}个结果'.format(url, len(result_list)))
if __name__ == '__main__':
main()
项目二:k3cloud
费用单自报销模块
动获取ceo、cfo、财务、出纳,实现一次更改,多次使用。
代码分析连接:二叉树
基础资料
供应商审核后,自动分配到指定公司
代码分析连接:基础资料API
Loading...
