最近无聊想回顾下童年,看下粤语动漫,就写了两个爬虫
之前看到论坛有大佬写好调用逍遥一仙的下载器的接口,就直接拿过来用了,忘了叫啥名了。先感谢。
这是第一个,有找到了动漫章节命名规则的
[Python] 纯文本查看 复制代码 #coding=utf-8
import base64
import json
import re
import requests
from lxml import etree
import ast
headers_pc = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"
}
def getPlayList(AcgUrl):
request = requests.get("https://www.ktwz.cc/detail/6307.html",headers=headers_pc)
html = request.content.decode()
el = etree.HTML(html)
playList = el.xpath('//div[@class="panel-default"]/ul[@class="dslist-group clearfix"]/li/a/@href')
setNumber = el.xpath('//div[@class="panel-default"]/ul[@class="dslist-group clearfix"]/li/a/text()')
title = el.xpath('//div[@class="detail-info"]/div[@class="detail-title"]/h2/text()')
url_prefix = "https://www.ktwz.cc"
for (playurl,setNumber)in zip(playList,setNumber):
getM3u8Url(url_prefix+playurl,title[0]+setNumber)
def getM3u8Url(playurl,setNumber):
request = requests.get(playurl,headers=headers_pc)
html = request.content.decode()
# print(html)
el = etree.HTML(html)
url = el.xpath('//div[@class="player"]/script[position()=1]')
urlJs = str.split(url[0].text,'=')
urlDict = ast.literal_eval(urlJs[1])
print(str.replace(urlDict["url"],"\/","/"))
m3u8Url = str.replace(urlDict["url"],"\/","/")
# setNumber = urlDict["nid"];
posttom3u8('',setNumber,m3u8Url)
def posttom3u8(key, title, url):
data = '#KEY,{0}\r\n{1},{2}'.format(key, title, url) if key else '{0},{1}'.format(title, url)
print(data)
try:
response = requests.post('http://127.0.0.1:8787/',
data={"data": base64.b64encode(data.encode('GBK')).decode()}).json()
print('推送成功') if response['message'] == 'success' else print('推送失败')
except:
print('推送失败')
if __name__ == "__main__":
getPlayList('') ##所需下载动漫目录地址
|