分享一个爬虫视频用于学习,仅供学习。这个用的是单线程,后期改造一下。
俗话说,师傅领进门,修行靠个人。提供一个思路爬虫视频,下面的大家自己研究吧。
代码使用的时候创一个文件夹
import urllib
from lxml import etree
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
def main():
# 请求首页地址
url = 'http://699pic.com/media/'
req = requests.get(url, headers=headers)
ae_html = req.content.decode()
# print(ae_html)
# 获取首页需要的每个视频地址
dom = etree.HTML(ae_html)
ae_video_html_url_list = dom.xpath(
'//div[@class="list-item video-list clearfix"]/ul/li/a[@class="video-name fl"]/@href')
# 获取视频的名字和时间长度
ae_video_name1_list = dom.xpath(
'//div[@class="list-item video-list clearfix"]/ul/li/a[@class="video-name fl"]/h3/text()')
ae_video_time2_list = dom.xpath(
'//div[@class="list-item video-list clearfix"]/ul/li/a[@class="video-name fl"]/span/text()')
# print(ae_video_time2)
print(ae_video_html_url_list)
# print(ae_video_name1_list)
for name, time, url in zip(ae_video_name1_list, ae_video_time2_list, ae_video_html_url_list):
# url=url.strip("/")
# 找到视频网页地址
url = "http://699pic.com" + url
# print(url)
# 请求这个网页
req_video = requests.get(url, headers=headers)
dom_url = etree.HTML(req_video.content.decode())
# 获取这个网页中的内容,获取其中的视频高清视频地址。
ae_video_url_list = dom_url.xpath('//div[@class="video-show"]//video/source/@src')
# 请求这个高清视频的地址
ae_video_url = 'http:' + ae_video_url_list[0]
print("视频真实地址:", ae_video_url)
# print(ae_video_url)
req_video = requests.get(ae_video_url, headers=headers)
# 获取视频名字和时间
url_name = ae_video_url.split(".")
time = time.strip("-").strip().replace(':', "-")
video_name = name + time + "." + url_name[-1]
# print(video_name)
print(video_name, "正在下载………")
# 保存
with open(r"I:\Pchong\pc_video\ae-video\{}".format(video_name), 'wb')as f:
f.write(req_video.content)
print(video_name, "下载完成!!")
if __name__ == '__main__':
main()
|