本帖最后由 yuyu226 于 2022-12-19 10:32 编辑
话不多说,直接进入主题
_______________________________________________________________________________
[Python] 纯文本查看 复制代码 def get_html_path():
global train_path, all_path, labels
train_path, all_path, labels = "./Downloads", [], []
# 获取文件夹内的文件列表
def get_label_and_wav(path, file):
dirs = os.listdir(path)
for a in dirs:
if os.path.isfile(path + "/" + a):
all_path.append(dirs)
if file != "":
labels.append(file)
else:
get_label_and_wav(str(path) + "/" + str(a), a)
return all_path, labels
#加载文件获取文件路径
[all_path, labels] = get_label_and_wav(train_path, "")
loop_parse_html(paths=all_path[0])
_______________________________________________________________________________
[Python] 纯文本查看 复制代码
# 解析链接和标题存入json中
def loop_parse_html(paths):
global res_json
res_json = []
for index in range(len(paths)):
v = paths[index]
v_len = len(v)
v_end = v[v_len - 4:len(v)]
if v_end == "html":
parse_html(position=index, path=f"{train_path}/{paths[index]}")
sleep(0.1)
res_json = json.dumps(res_json, ensure_ascii=False)
# 创建一个本地json文件存入结果
f = open('urls.json', 'w')
f.write(str(res_json))
f.close()
_______________________________________________________________________________
[Python] 纯文本查看 复制代码
def parse_html(position, path):
position = position + 1
print(f'开始第{position}次解析:{path}')
parser = etree.HTMLParser(encoding="utf-8")
html_tree = etree.parse(path, parser=parser)
print(f'开始解析下载链接')
aim_video_urls = html_tree.xpath('// *[ @ class = "demo-class"] / script / text()')[0]
a_start = aim_video_urls.find('("')
a_end = aim_video_urls.find('")')
aim_video_url = aim_video_urls[a_start + 2:a_end]
# 解码a标签
aim_video_urls = unquote(aim_video_url)
href_start = aim_video_urls.find('https')
href_end = aim_video_urls.find('>')
aim_video_urls = aim_video_urls[href_start:href_end]
print(f"链接解析已完成:{aim_video_urls}")
print(f'解析标题')
# 标题
aim_title = html_tree.xpath('/html//title/text()')[0]
# 处理标题格式
aim_title = aim_title.replace(" ", "")
aim_title = aim_title.replace("\n", "")
useless_str_start = aim_title.find("Chinesehomemadevideo")
aim_title = aim_title[0:useless_str_start]
print(f"标题解析已完成:{aim_title}\n")
write_json_file(title=aim_title, link=aim_video_urls)
存入全局的json变量,最后会写入本地json文件
[Python] 纯文本查看 复制代码 # 将标题和下载链接按键值对存入json文件,供下载脚本读取
def write_json_file(title, link):
temp = {
"title": f"{title}",
"link": f"{link}"
}
res_json.append(temp)
_______________________________________________________________________________
读取json本地配置
[Python] 纯文本查看 复制代码 def get_video(index):
if index > len(json_res) - 1:
print(f'下载完成,共下载了{index}次')
quit(200)
else:
print(f'开始第{index + 1}次下载')
save_video(index)
def main():
#读取json文件
with open('urls.json', encoding='utf-8') as res:
print('读取json配置中')
global json_res
json_res = json.load(res)
get_video(index=0)
循环下载,组合文件名将视频存入指定目录
[Python] 纯文本查看 复制代码 def save_video(index):
url = json_res[index].get("link")
title = json_res[index].get("title")
video_content = requests.get(url).content
with open('./output/' + title + '.mp4', 'wb') as f:
f.write(video_content)
print(f"第{index + 1}次下载完成\n")
ran = random.randint(0, 10)+random.randint(0, 10)/10+random.randint(0, 10)/100
# 随机休息
print(f'随机休息{ran}秒')
get_video(index + 1)
_______________________________________________________________________________
本人技术不是很好,写的代码仅供参考,如代码有什么纰漏大佬可以指点一下
代码仅供学习参考,如果你们需要视频下载软件,作者建议你们使用IDM下载器
构思来源于网络,如有侵权请联系我删帖
走过路过的朋友不要忘了给作者点一下下面免费的评分,你的支持是我的动力! |