python

项目一:爬虫

项目背景

公司网站被黑客共计了,导致百度能搜索到30万条记录。

解决方法:上传死链接到百度,百度可以自动消除。

由于连接太多,手动处理太慢。第一次自动化用的是控制台js语句+键盘模拟器处理。缺点,键盘模拟器点击位置容易出错。改用python后速度快多了

项目代码

import requests  # 发送请求
import random  # 随机
from bs4 import BeautifulSoup  # 解析页面
from time import sleep  # 等待间隔

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
    "Connection": "keep-alive",
    "Accept-Encoding": "gzip, deflate, br",
    "Host": "www.baidu.com",
    # 需要更换Cookie
    "Cookie": "BIDUPSID=76DBD5A86BE0BD7625AADFB8B90EAD27; PSTM=1640784058; __yjs_duid=1_468e8de14fcb94e78193b0ae56115b251641473993177; BAIDUID=76DBD5A86BE0BD76327198379EDC9366:SL=0:NR=10:FG=1; BDUSS=VYb35OUFZLeU53SEM0WVFTU2loVy1oRG10SjRwYmx3REdVYVY3blYyMFpMenhqRVFBQUFBJCQAAAAAAAAAAAEAAAA73gUCbXRoc29sYXIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABmiFGMZohRjRH; BDUSS_BFESS=VYb35OUFZLeU53SEM0WVFTU2loVy1oRG10SjRwYmx3REdVYVY3blYyMFpMenhqRVFBQUFBJCQAAAAAAAAAAAEAAAA73gUCbXRoc29sYXIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABmiFGMZohRjRH; BDSFRCVID=XuLOJeCmHRbrH05jabgLKwTWk2KK0gOTHllnJB-JkWIwb-KVJeC6EG0Ptf8g0KubuTkzogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tbCeoK0-tDt3qn7I5KIhDjo-qxbXqhvtBgOZ0lOEWUoDqlPxjt6H-4-TypJnL-PHW20j0h7mWnRSDlctqf6O3UFW-mciLfT-0bc4KKJxbnLWeIJo5t5h3-PhhUJiB5OMBan7_qvIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtpChbRO4-TF5j5vyjU5; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=37377_36555_37355_37300_36885_37402_37406_36789_37260_26350_37364; BAIDUID_BFESS=76DBD5A86BE0BD76327198379EDC9366:SL=0:NR=10:FG=1; BDSFRCVID_BFESS=XuLOJeCmHRbrH05jabgLKwTWk2KK0gOTHllnJB-JkWIwb-KVJeC6EG0Ptf8g0KubuTkzogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tbCeoK0-tDt3qn7I5KIhDjo-qxbXqhvtBgOZ0lOEWUoDqlPxjt6H-4-TypJnL-PHW20j0h7mWnRSDlctqf6O3UFW-mciLfT-0bc4KKJxbnLWeIJo5t5h3-PhhUJiB5OMBan7_qvIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtpChbRO4-TF5j5vyjU5; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=1; BA_HECTOR=00ala405a100aha12l0kr68j1hitloe19; ZFY=q2Ougyzq8HanC6MrzZ:Al5LCZbJiQLnlR97TmNk4rMn0:C"
}

def main():
    page = 1
    print('开始爬取第{}页'.format(page + 1))
    wait_seconds = random.uniform(1, 2)  # 等待时长秒
    print('开始等待{}秒'.format(wait_seconds))
    sleep(wait_seconds)  # 随机等待
    v_keyword = 'site:www.tusenergy.com'
    url = 'https://www.baidu.com/s?wd=' + v_keyword + '&pn=' + str(page * 10)
    r = requests.get(url, headers=headers)
    html = r.text
    print('响应码是:{}'.format(r.status_code))
    soup = BeautifulSoup(html, 'html.parser')
    print(soup)
    result_list = soup.find_all(class_='result c-container new-pmd')
    print('正在爬取:{},共查询到{}个结果'.format(url, len(result_list)))

if __name__ == '__main__':
    main()

项目二:k3cloud

费用单自报销模块

动获取ceo、cfo、财务、出纳,实现一次更改,多次使用。

代码分析连接:二叉树在新窗口打开

基础资料

供应商审核后,自动分配到指定公司

代码分析连接:基础资料API在新窗口打开

Loading...