import requests
import asyncio
from bs4 import BeautifulSoup
import random
import aiohttp
import aiofiles
def ua():
headers_list = [
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)'
]
header = {'User-Agent': random.choice(headers_list)}
return header
def main_page(a):
main_page_url = a
res = requests.get(a,ua())
soup = BeautifulSoup(res.text,'lxml')
div = soup.find_all('div',attrs={'class':'mulu-list quanji'})
url_list = []
for i in div:
links = i.find_all('a')
for q in links:
href = q['href']
url_list.append(href)
return url_list
async def aio_down_one(chapter_url, signal):
number,c_name = get_book_name(chapter_url)
for c in range(10):
try:
# 控制协程的并发数据量
async with signal:
async with aiohttp.ClientSession() as session:
async with session.get(chapter_url) as resp:
page_source = await resp.text()
soup = BeautifulSoup(page_source, 'html.parser')
chapter_name = soup.find('h1').text
p_content = soup.find('div', attrs={'class': 'neirong'}).find_all('p')
p_tags = soup.find('div', attrs={'class': 'neirong'}).find_all('p')
p_contents = [p.get_text(strip=True) for p in p_tags]
for content in p_contents:
if not os.path.exists(f'{bookname}/{c_name}'):
os.makedirs(f'{bookname}/{c_name}')
async with aiofiles.open(f'{bookname}/{c_name}/{number}_{chapter_name}.txt', mode="w",
encoding='utf-8') as f:
await f.write(content)
print(chapter_url, "下载完毕!")
return ""
except Exception as e:
print(e)
print(chapter_url, "下载失败!, 重新下载. ")
return chapter_url
async def aio_down(parse_url_list):
tasks = []
semaphore = asyncio.Semaphore(10)
for i in parse_url_list:
tasks.append(asyncio.create_task(aio_down_one(i,semaphore)))
await asyncio.wait(tasks)
import aiofiles
import asyncio
async def test():
for c in 'AB':
# 在一个循环中不断打开文件,并且还是 write 模式,可怕>﹏<
async with aiofiles.open('temp.txt',mode='w') as f:
await f.write(c)
if __name__ == '__main__':
asyncio.run(test())
# 最终 temp.txt 文件中只有字符 B !!!!