A-A+

最新python request使用异步协程async/await详解

2021年08月19日 20:00 学习笔记暂无评论共6360字 (阅读4,271 views次)

【注意：此文章为博主原创文章！转载需注意，请带原文链接，至少也要是txt格式！】

在看本文之前，你必须要弄明白async/await的原理，可以参考：超级详细解释 Python 中的 async await 概念虽然你也可以直接看本文，但是建议你还是先理解理解概念。

这里首先要确保你的python是3.9+的最新版本，其次请一定安装aiohttp，也要确保是最新版本。

首先我们通过一个大量的请求来做一个实验，【关于单一异步协程】代码如下：

#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @File    : 协程.py
# @Time    : 2021/8/19
# @Desc   :
import asyncio
import time
import aiohttp
import ujson
 
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Connection": "close",
    "User-Agent": "Mozilla/5.0 (Windows NT 11.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4515.131 Safari/537.36 Edg/95.0.902.73",
    "Sec-Fetch-Site": "none", "Sec-Fetch-Dest": "document", "Accept-Encoding": "gzip, deflate",
    "Sec-Fetch-Mode": "navigate",
    "sec-ch-ua": "\"Chromium\";v=\"92\", \" Not A;Brand\";v=\"99\", \"Microsoft Edge\";v=\"92\"",
    "sec-ch-ua-mobile": "?0", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-User": "?1",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"}
cookies = {"siteToken": "abc", "siteUserId": "efd"}
 
 
async def main():
    async with aiohttp.ClientSession(headers=headers, cookies=cookies) as session:
        for num in range(100000, 100500):
            paramsPost = {"pregnantCycle": "", "s": "{}".format(num)}
            url = f'https://www.baidu.com/?queryMemberList'
            async with session.post(url, data=paramsPost, timeout=20) as resp:
                data = ujson.loads(await resp.text())
                status = resp.status
                try:
                    if status == 200 and data['data']['list']:
                        print("【当前商铺ID：{} --- 会员总数量：{}】\n[举例其中一条数据]：会员昵称--{}，"
                              "会员真实姓名--{}，会员手机--{}".format(num, data['data']['pageCond']['count'],
                                                           data['data']['list'][0]['nickname'],
                                                           data['data']['list'][0]['truename'],
                                                           data['data']['list'][0]['mobile']))
                except Exception as e:
                    print(e)
            if num % 100 == 0:
                print("已遍历至商户ID：{}".format(num))
 
if __name__ == '__main__':
    tt = time.time()
    asyncio.run(main())
    print(time.time() - tt)

#!/usr/bin/python3 # -*- coding: utf-8 -*- # @File : 协程.py # @Time : 2021/8/19 # @Desc : import asyncio import time import aiohttp import ujson headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Connection": "close", "User-Agent": "Mozilla/5.0 (Windows NT 11.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4515.131 Safari/537.36 Edg/95.0.902.73", "Sec-Fetch-Site": "none", "Sec-Fetch-Dest": "document", "Accept-Encoding": "gzip, deflate", "Sec-Fetch-Mode": "navigate", "sec-ch-ua": "\"Chromium\";v=\"92\", \" Not A;Brand\";v=\"99\", \"Microsoft Edge\";v=\"92\"", "sec-ch-ua-mobile": "?0", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-User": "?1", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"} cookies = {"siteToken": "abc", "siteUserId": "efd"} async def main(): async with aiohttp.ClientSession(headers=headers, cookies=cookies) as session: for num in range(100000, 100500): paramsPost = {"pregnantCycle": "", "s": "{}".format(num)} url = f'https://www.baidu.com/?queryMemberList' async with session.post(url, data=paramsPost, timeout=20) as resp: data = ujson.loads(await resp.text()) status = resp.status try: if status == 200 and data['data']['list']: print("【当前商铺ID：{} --- 会员总数量：{}】\n[举例其中一条数据]：会员昵称--{}，" "会员真实姓名--{}，会员手机--{}".format(num, data['data']['pageCond']['count'], data['data']['list'][0]['nickname'], data['data']['list'][0]['truename'], data['data']['list'][0]['mobile'])) except Exception as e: print(e) if num % 100 == 0: print("已遍历至商户ID：{}".format(num)) if __name__ == '__main__': tt = time.time() asyncio.run(main()) print(time.time() - tt)

上面的脚本最终运行时间：193.5414 秒，，注意上面的代码，结果默认是str格式，转换json一定要用ujson，处理字符速度贼快。

利用 asyncio 提高性能，在上面的案例中，我们await在每个单独的 HTTP 请求之后使用，这并不理想。我们可以改为“同时”将所有这些请求作为异步任务运行，然后在最后检查结果，使用asyncio.ensure_future和asyncio.gather。

如果实际发出请求的代码被分解为自己的协程函数，我们可以创建一个任务列表，由每个请求的任务组成。然后我们可以将这个列表解压到一个gather调用中，该调用将它们一起运行。当我们await调用时asyncio.gather，我们将返回所有传入的任务的可迭代对象，并保持它们在列表中的顺序。

【关于多任务异步协程】代码如下：

# -*- coding:utf-8 -*-
# __author__ = shrimp
# __Blog__ = https://woj.app
# __Date__ = 2021/8/19
# __ver__ = python3
 
import asyncio
import time
import aiohttp
import ujson
 
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Connection": "close",
    "User-Agent": "Mozilla/5.0 (Windows NT 11.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4515.131 Safari/537.36 Edg/95.0.902.73",
    "Sec-Fetch-Site": "none", "Sec-Fetch-Dest": "document", "Accept-Encoding": "gzip, deflate",
    "Sec-Fetch-Mode": "navigate",
    "sec-ch-ua": "\"Chromium\";v=\"92\", \" Not A;Brand\";v=\"99\", \"Microsoft Edge\";v=\"92\"",
    "sec-ch-ua-mobile": "?0", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-User": "?1",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"}
cookies = {"siteToken": "abc", "siteUserId": "efd"}
 
 
async def get_pokemon(session, url, paramsPost):
    async with session.post(url, data=paramsPost, timeout=20) as resp:
        data = ujson.loads(await resp.text())
        status = resp.status
        return data, status
 
 
async def main():
    # loop = asyncio.get_running_loop()
    task_list = []
    async with aiohttp.ClientSession(headers=headers, cookies=cookies) as session:
        for num in range(100000, 100500):
            paramsPost = {"pregnantCycle": "", "s": "{}".format(num)}
            url = f'https://www.baidu.com/?queryMemberList'
            future = asyncio.ensure_future(get_pokemon(session, url, paramsPost))
            task_list.append(future)
 
        ##没办法计数，因为任务是并发执行
        datas = await asyncio.gather(*task_list)
        for data, status in datas:  # 并发执行
            # print(data)
            try:
                if status == 200 and data['data']['list']:
                    print("【当前商铺ID：{} --- 会员总数量：{}】\n[举例其中一条数据]：会员昵称--{}，"
                          "会员真实姓名--{}，会员手机--{}".format(num, data['data']['pageCond']['count'],
                                                       data['data']['list'][0]['nickname'],
                                                       data['data']['list'][0]['truename'],
                                                       data['data']['list'][0]['mobile']))
            except Exception as e:
                print(e)
 
 
if __name__ == '__main__':
    tt = time.time()
    asyncio.run(main())
    print(time.time() - tt)

# -*- coding:utf-8 -*- # __author__ = shrimp # __Blog__ = https://woj.app # __Date__ = 2021/8/19 # __ver__ = python3 import asyncio import time import aiohttp import ujson headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Connection": "close", "User-Agent": "Mozilla/5.0 (Windows NT 11.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4515.131 Safari/537.36 Edg/95.0.902.73", "Sec-Fetch-Site": "none", "Sec-Fetch-Dest": "document", "Accept-Encoding": "gzip, deflate", "Sec-Fetch-Mode": "navigate", "sec-ch-ua": "\"Chromium\";v=\"92\", \" Not A;Brand\";v=\"99\", \"Microsoft Edge\";v=\"92\"", "sec-ch-ua-mobile": "?0", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-User": "?1", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"} cookies = {"siteToken": "abc", "siteUserId": "efd"} async def get_pokemon(session, url, paramsPost): async with session.post(url, data=paramsPost, timeout=20) as resp: data = ujson.loads(await resp.text()) status = resp.status return data, status async def main(): # loop = asyncio.get_running_loop() task_list = [] async with aiohttp.ClientSession(headers=headers, cookies=cookies) as session: for num in range(100000, 100500): paramsPost = {"pregnantCycle": "", "s": "{}".format(num)} url = f'https://www.baidu.com/?queryMemberList' future = asyncio.ensure_future(get_pokemon(session, url, paramsPost)) task_list.append(future) ##没办法计数，因为任务是并发执行 datas = await asyncio.gather(*task_list) for data, status in datas: # 并发执行 # print(data) try: if status == 200 and data['data']['list']: print("【当前商铺ID：{} --- 会员总数量：{}】\n[举例其中一条数据]：会员昵称--{}，" "会员真实姓名--{}，会员手机--{}".format(num, data['data']['pageCond']['count'], data['data']['list'][0]['nickname'], data['data']['list'][0]['truename'], data['data']['list'][0]['mobile'])) except Exception as e: print(e) if __name__ == '__main__': tt = time.time() asyncio.run(main()) print(time.time() - tt)

上面的脚本最终运行时间：19.5669 秒，运行了多次，就在这个结果附近。
这里经过调试发现，上面的循环是运行500次么，当第一次进行session.post请求时，必然需要等待对方系统返回相应的数据，这个时候任务会进行下一个请求。大家可以这么理解，我请求1之后，你愿意多久返回数据就多久，你慢慢返回数据，我直接去请求2，如果此时1返回数据了，则2的请求等待着它的，我回来处理1的数据，以此类推。【个人理解，其实也就是瞬间发出去500个请求，然后看哪个返回数据，就先处理哪个任务，毕竟发送请求不怎么耗费IO】

有点夸张，快了10倍。本着求真务实的原则，我有用多线程(设置40个线程)，不用协程，脚本如下：

#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @File    : 多线程.py
# @Time    : 2021/8/19
 
import asyncio
import concurrent.futures
import time
import requests
 
session = requests.Session()
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Connection": "close",
    "User-Agent": "Mozilla/5.0 (Windows NT 11.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4515.131 Safari/537.36 Edg/95.0.902.73",
    "Sec-Fetch-Site": "none", "Sec-Fetch-Dest": "document", "Accept-Encoding": "gzip, deflate",
    "Sec-Fetch-Mode": "navigate",
    "sec-ch-ua": "\"Chromium\";v=\"92\", \" Not A;Brand\";v=\"99\", \"Microsoft Edge\";v=\"92\"",
    "sec-ch-ua-mobile": "?0", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-User": "?1",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"}
cookies = {"siteToken": "abc", "siteUserId": "efd"}
 
def main():
    # 标准启动
    ret = async_task(0)  # 启动异步任务
    print(ret)
 
def async_task(x):
    task_list = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=40) as pool:
        for i in range(100000, 100500):
            future = pool.submit(long_time_task, x,i)
            task_list.append(future)
 
        for future in concurrent.futures.as_completed(task_list):  # 在Future实例运行结束后产出.
            future.result()  # 此时不会阻塞，因为 as_completed 只会产出运行结束后的Future实例.
 
 
def long_time_task(a1, id):
    paramsGet = {"pregnantCycle": "", "s": "{}".format(num)}
    response = session.get("https://www.baidu.com/?queryMemberList", params=paramsGet,
                           headers=headers, cookies=cookies, timeout=20)
    try:
        if response.status_code == 200 and response.json()['data']['list']:
            print("【当前商铺ID：{} --- 会员总数量：{}】\n[举例其中一条数据]：会员昵称--{}，"
                  "会员真实姓名--{}，会员手机--{}".format(id, response.json()['data']['pageCond']['count'],
                                               response.json()['data']['list'][0]['nickname'],
                                               response.json()['data']['list'][0]['truename'],
                                               response.json()['data']['list'][0]['mobile']))
        if id % 500 == 0:
            print("已遍历至商户ID：{}".format(id))
    except Exception as e:
        print(e)
    return
 
 
if __name__ == '__main__':
    tt = time.time()
    main()
    print(time.time() - tt)

#!/usr/bin/python3 # -*- coding: utf-8 -*- # @File : 多线程.py # @Time : 2021/8/19 import asyncio import concurrent.futures import time import requests session = requests.Session() headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Connection": "close", "User-Agent": "Mozilla/5.0 (Windows NT 11.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4515.131 Safari/537.36 Edg/95.0.902.73", "Sec-Fetch-Site": "none", "Sec-Fetch-Dest": "document", "Accept-Encoding": "gzip, deflate", "Sec-Fetch-Mode": "navigate", "sec-ch-ua": "\"Chromium\";v=\"92\", \" Not A;Brand\";v=\"99\", \"Microsoft Edge\";v=\"92\"", "sec-ch-ua-mobile": "?0", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-User": "?1", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"} cookies = {"siteToken": "abc", "siteUserId": "efd"} def main(): # 标准启动 ret = async_task(0) # 启动异步任务 print(ret) def async_task(x): task_list = [] with concurrent.futures.ThreadPoolExecutor(max_workers=40) as pool: for i in range(100000, 100500): future = pool.submit(long_time_task, x,i) task_list.append(future) for future in concurrent.futures.as_completed(task_list): # 在Future实例运行结束后产出. future.result() # 此时不会阻塞，因为 as_completed 只会产出运行结束后的Future实例. def long_time_task(a1, id): paramsGet = {"pregnantCycle": "", "s": "{}".format(num)} response = session.get("https://www.baidu.com/?queryMemberList", params=paramsGet, headers=headers, cookies=cookies, timeout=20) try: if response.status_code == 200 and response.json()['data']['list']: print("【当前商铺ID：{} --- 会员总数量：{}】\n[举例其中一条数据]：会员昵称--{}，" "会员真实姓名--{}，会员手机--{}".format(id, response.json()['data']['pageCond']['count'], response.json()['data']['list'][0]['nickname'], response.json()['data']['list'][0]['truename'], response.json()['data']['list'][0]['mobile'])) if id % 500 == 0: print("已遍历至商户ID：{}".format(id)) except Exception as e: print(e) return if __name__ == '__main__': tt = time.time() main() print(time.time() - tt)

这个纯粹就是多线程，大家猜猜运行时长是多少？？？
运行多次结果都在13--19秒之间。。。

此刻的我，破口大骂，我CTMD协程，浪费感情么不是。。。你瞬间发N多个请求，对方不封你IP？因为你没办法控制协程任务数量啊！！！但是多线程你能啊，所以，，，老子要你有个乱用？！

如果多线程稍微设置多一点，是不是就和协程一样了，其实不然，对本机器所耗费的资源还是不同的。协程相对来说能比多线程节省一些资源。

蜗居

窄小蜗居，虽非富贵王侯宅；清闲螺径，也异寻常百姓家。 woj → 蜗居

最新python request使用异步协程async/await详解

给我留言取消回复

布施恩德可便相知重

微信扫一扫打赏

支付宝扫一扫打赏

给我留言取消回复