本帖最后由 人生苦短丶 于 2019-11-15 11:04 编辑
[Python] 纯文本查看 复制代码
import os
import requests
from lxml import etree
import time
import datetime
import urllib.request
import re
from fake_useragent import UserAgent
import random
# 添加的模块乱七八糟..
ua = UserAgent(verify_ssl=False)
headers = {
'User-Agent': ua.random,
'Referer': 'https://www.mzitu.com'
}
def geturl(url, path):
r = requests.get(url, headers=headers)
dom = etree.HTML(r.content)
href = dom.xpath('//ul[@id="pins"]/li/a/@href')
for xurl in href:
down(xurl, path)
def down(xurl, path):
patten = re.compile(r'\d{6}')
num = patten.search(xurl).group()
header = headers
header['Referer'] = "https://www.mzitu.com/" + num
r = requests.get(xurl, headers=header)
dom = etree.HTML(r.content)
src = dom.xpath('//div[@class="main-image"]/p/a/img/@src')
finalurl = src[0]
title = dom.xpath('//div[@class="main-image"]/p/a/img/@alt')
img = requests.get(finalurl)
with open(path + '/' + str(title[0])+ '.jpg', 'wb') as f:
f.write(img.content)
f.close()
print("%s下载成功" % title)
time.sleep(2)
def main():
print("1:性感妹子")
print('*')
print("2:日本妹子")
print('*')
print('3:台湾妹子')
print('*')
print('4:清纯妹子')
print('-------------------------------------------------------------------------')
a = int(input("请输入序列号(如:1):" ))
url = "https://www.mzitu.com/"
if a == 1:
url = url + "xinggan"
elif a == 2:
url = url + "japan"
elif a == 3:
url = url + "taiwan"
elif a == 4:
url = url + "mm"
else:
print("您的输入有误,请重新输入")
path = input("请输入要存放的地址(把要存放的文件夹直接拉进来):" )
start_page = int(input("请输入下载的起始页码:"))
end_page = int(input("请输入下载的终止页码:"))
print("-----------------请稍等...")
for page in range(start_page, end_page):
url = url + "/" + "page" + "/" + str(page)
a = datetime.datetime.now()
print("开始下载第 %s 页的美图 " % page)
geturl(url, path)
b = datetime.datetime.now()
print("用时 %s " % str(b-a))
if __name__=='__main__':
main()
|