import
timeimport requests,os
import
urllib3
import
urllib.request
from startm3u8
import
get_start_m3u8
import
asyncio
import
aiofile
import
aiohttp
# start_url =
"https://www.jijikb.com/play/52825-0-1.html"
#一共5集
secend_m3u8 =
"https://vod4.buycar5.cn/20210617/DmV0P4zD/1000kb/hls/index.m3u8"
headers = {
'Referer'
:
'https://vod4.buycar5.cn/'
,
'host'
:
'vod4.buycar5.cn'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
,
}
def response(url):
rep
= requests.get(url=url,headers=headers,timeout = 20,verify =False) #使用Python3 requests发送HTTPS请求,已经关闭认证(verify=False)情况下,控制台会输出以下InsecureRequestWarning
rep
.encoding =
rep
.apparent_encoding
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) ## 禁用安全请求警告
if
rep
.status_code == 200:
return
rep
else
:
print(
"----没有响应----"
)
def get_second_m3u8_url(url):
rep
= requests.get(url)
print(
rep
)
with open(
'first.m3u8'
,
'w'
) as f:
f.write(
rep
.text
)
with open(
'first.m3u8'
,
"r"
) as r_f:
for
line
in
r_f:
if
line.startswith(
"#"
):
continue
start_m3u8_url =
"https://vod4.buycar5.cn"
+ line
return start_m3u8_url
def get_tc_url(resp):
with open(
'secend.m3u8'
,
'wb'
) as f:
f.write(resp)
tc_urls = []
with open(
'secend.m3u8'
,
"r"
) as r_f:
for
n
in
r_f:
if
n.startswith(
"#"
):
continue
else
:
print(n)
tc_urls.append(n)
return tc_urls
async def mov_down(url,semaphore):
async with semaphore:
async with aiohttp.ClientSession() as session:
tc_name = url.split(
'/'
)[-1].strip()
print(tc_name,
"---正在下载-----"
)
async with await session.get(url,headers=headers) as
rep
:
print(
rep
.status)
async with aiofile.async_open(
"mov2/"
+tc_name,
'wb'
) as p_f:
print(
"-----正在存储------"
)
rep1 = await
rep
.read()
await p_f.write(rep1)
print(tc_name,
'----下载完成---'
)
""
"
urllib.request.urlopen(url, data=
None
, [timeout, ])
传入的url就是你想抓取的地址;
data是指向服务器提交信息时传递的字典形式的信息,通常来说就是爬去需要登录的网址时传入的用户名和密码,可省略。
timeout参数指的是超时时间,也可省略。
""
"
def main():
semaphore = asyncio.Semaphore(100) # 限制并发量为20
start_time = time.time()
if
not
os.path.exists(
'mov2'
):
os.mkdir(
"mov2"
)
start_m3u8_url = get_start_m3u8(start_url)
secend_m3u8_url = get_second_m3u8_url(start_m3u8_url)
print(secend_m3u8_url)
resp = urllib.request.urlopen(secend_m3u8_url).read()
# resp =requests.get(url=secend_m3u8_url,headers=headers) #不知道为什么requests 请求不到
tc_urls = get_tc_url(resp)
tasks = []
for
url
in
tc_urls:
task =asyncio.ensure_future(mov_down(url,semaphore))
tasks.append(task)
loop
.run_until_complete(asyncio.
wait
(tasks))
loop
.close()
print(time.time()-start_time)
if
__name__ ==
'__main__'
:
loop
= asyncio.get_event_loop() #建立事件循环
main()