直接上代码吧。消费者这块我不知道怎么实现。大神帮我瞅瞅思路。
[Python] 纯文本查看 复制代码 import asyncio
import aiohttp
import queue
import pymysql
from proxy import proxy_pool
import random
async def crawl(session, data, html_queue, pool, proxy_auth):
url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
async with session.post(url, data, proxy=random.choice(pool),proxy_auth=proxy_auth) as response:
html_queue.put(await response.json())
async def parse(html_queue, conn):
html = html_queue.get()
for info in html['list']:
name = info['EPS_NAME']
sn = info['PRODUCT_SN']
manager_name = info['QF_MANAGER_NAME']
issue_date = info['XC_DATE']
validity_date = info['XK_DATE']
cursor = conn.cursor()
sql = 'insert into yjzj(企业名称, 许可证编号, 发证机关, 有效期至, 发证日期) values(%s,%s,%s,%s,%s)'
cursor.execute(sql, (name, sn, manager_name, issue_date, validity_date))
conn.commit()
cursor.close()
async def main():
html_queue = queue.Queue()
pool = proxy_pool()
proxy_auth = aiohttp.BasicAuth('xxxx', 'xxxx')
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False), trust_env=True) as session:
formdata = [{
'on': 'true',
'page': i,
'pageSize': '15',
'productName': '',
'conditionType': '1',
'applyname': '',
'applysn': ''
} for i in range(1, 51)]
crawl_tasks = [asyncio.create_task(crawl(session, data, html_queue, pool, proxy_auth)) for data in formdata]
await asyncio.wait(crawl_tasks)
conn = pymysql.connect(
host='xxxx',
port=3306,
user='xxxx',
password='xxxx',
db='text',
charset='utf8'
)
parse_tasks = [asyncio.create_task(parse(html_queue, conn))]
conn.close()
if __name__ == '__main__':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
loop = asyncio.get_event_loop()
loop.run_until_complete(main()) |