水平有限,今天的代码如下:
还请大神多多留言指点----,
[Python] 纯文本查看 复制代码 # coding:utf-8
import requests
from bs4 import BeautifulSoup
import os
import time
path = 'f:\\bohaishibei'
# if not os.path.exists(path):
# os.makedirs(path)
#
# else:
# print('path is exist')
# 获取标题
def get_title(urls):
# 加个header,比较安全
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
res = requests.get(urls, params=header).text
bsobj = BeautifulSoup(res, 'lxml')
bs_select = bsobj.select('div > article > header > h2 > a')
# 返回a标签列表
return bs_select[2:]
# 创建文件夹
def write_title(t):
for titles in t:
print('正在创建文件夹:{}'.format((titles.text)[11:]))
time.sleep(1)
# 切换目录
os.chdir(path)
# 创建文件夹
os.makedirs(str((titles.text)[11:]))
print('**********文件创建完毕**********')
# todo 抓取图片,存入对应的标题文件夹
if __name__ == '__main__':
url = 'https://bh.sb/post/category/main/'
a = get_title(url)
write_title(a)
|