爬取3dm娱乐板块趣图，并生成预览网页，python代码

Mario_4420 发表于 2021-2-24 16:04

import os, sys
import time
import requests
from bs4 import BeautifulSoup
from tqdm import trange
# requests.packages.urllib3.disable_warnings()
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}')# Press Ctrl+F8 to toggle the breakpoint.

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('My Lord')

# url = input('请输入网址') or 'https://www.3dmgame.com/bagua/3944.html'
url_base = input('请输入网址编码如 https://www.3dmgame.com/bagua/4334.html 中 4334 ') or '4334'
time_stamp = time.strftime('%Y-%m-%d', time.localtime())

if not os.path.exists(time_stamp):
os.mkdir(time_stamp)

headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 '
               '(KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
def start_request(url):
# r = requests.get(url, headers=headers, verify=False)
r = requests.get(url, headers=headers)
# requests.packages.urllib3.disable_warnings()
r.encoding = 'utf-8'
if r.status_code == 200:
   return r.text
else:
   print('{}地址有误，可能已超过最后一页'.format(url))
   return None

# html = start_request(url_base)

def save_gif(html_text):
_str = ''
soup = BeautifulSoup(html_text, 'html5lib').find_all('p', attrs={"align": "center"})
def save_img(res_img_url, name):
   res_img = requests.get(res_img_url, headers=headers, verify=False).content
   with open('./' + time_stamp + '/' + name, 'wb') as f:
         f.write(res_img)
         f.close()

for i in soup:
   img = i.find('img')
   if img:
         src = img.get('src')
         names = src.split(r"/")
         _src = names[-1].replace(':', '').replace('~', '').replace('-', '')
         if _src.find('.') == -1:
            _src += '.gif'
         save_img(src, _src)
         _str += '\n<img src="%s"/>' % (_src)
return _str

def save_html(imgs_html):
def return_html_str(str):
   html_str = '<html lang="en"><head><title>图</title><meta charset="utf-8"><style>%s</style></head><body><br><p class="tip">图片较多，滑到底部需等待加载。</p><br>%s</body></html>' % ("img { display:block; width: 100% } \n.tip {font-size: 3rem}", str)
   return html_str

with open(time_stamp + '/index.html', 'w', encoding="utf-8") as f:
   html = return_html_str(imgs_html)
   f.write(html)
   f.close()

def request_each_html(url):
html_str = ''
html = None
for i in trange(20):
   if i == 0:
         html = start_request('https://www.3dmgame.com/bagua/{}.html'.format(url))
         html_str += save_gif(html)
         save_gif(html)
   else:
         html = start_request('https://www.3dmgame.com/bagua/{}_{}.html'.format(url, i + 1))
         html_str += save_gif(html)
         save_gif(html)

save_html(html_str)
print('任务完成')

request_each_html(url_base)

目前会出现证书警告，但图片保存及生成浏览页面正常。

页: [1]

吾爱破解 - 52pojie.cn's Archiver

爬取3dm娱乐板块趣图，并生成预览网页，python代码