def get_all_url(url, type):
if type == 'anime' or type == 'tv' or type == 'documentary':
response = requests.get(url, headers=headers1)
text = response.text
pattern1 = 'upInfo.*</html>'
pattern2 = '"share_url":"(http.*?)"'
a = re.sub(pattern1, '', text)
result = re.findall(pattern2, a)
return result
else:
return url
video = requests.get(url, headers=headers, stream=True)
chunk_size = 1024 #按块下载的数据块大小
content_size = int(video.headers['content-length']) #获取文件总大小
with open('test.mp4', 'wb') as f:
for data in video.iter_content(chunk_size=chunk_size): #遍历下载
f.write(data)
用法比较简单。
为了实现下载进度的可视化,可以结合以上代码做出一个进度条。
[Python] 纯文本查看复制代码
video = requests.get(url, headers=headers, stream=True)
chunk_size = 1024 #按块下载的数据块大小
content_size = int(video.headers['content-length']) #获取文件总大小
data_count = 0 #用于记录已下载的数据大小
with open('test.mp4', 'wb') as f:
for data in video.iter_content(chunk_size=chunk_size): #遍历下载
f.write(data)
data_count += len(data)
progress = 100 * data_count / content_size #已下载数据占比
print('/r 正在下载:[%s%s] %d%%' % (int(progress) * '█', '' * (100 - int(progress)), progress), end= ' ')
#/r为水平制表符,表示回到最左边;设置end=‘ ’,可以使得进度条在固定一行显示
至此,可以定义函数download_video(urls)
[Python] 纯文本查看复制代码
def down_video(urls):
if not os.path.exists('./download'):
os.mkdir('./download') # 创建临时文件夹以便存放音频,视频
if not os.path.exists(urls['title_of_series']):
os.mkdir(urls['title_of_series'])
try:
video = requests.get(urls['video'], headers=headers2, stream=True)
if video.status_code == 206:
chunk_size = 1024
content_size = int(video.headers['content-length'])
data_count = 0
with open('./download/' + urls['title'] + '.mp4', 'wb') as f:
for data in video.iter_content(chunk_size=chunk_size):
f.write(data)
data_count += len(data)
progress = data_count * 100 / content_size
print('\r 正在下载视频:[%s%s] %d%%' % (int(progress) * '█', ' ' * (100 - int(progress)), progress), end=' ')
except:
print("Error!")
shutil.rmtree('./download')
return False
try:
audio = requests.get(urls['audio'], headers=headers2, stream=True)
if audio.status_code == 206:
chunk_size = 1024
content_size = int(audio.headers['content-length'])
data_count = 0
with open('./download/' + urls['title'] + '.mp3', 'wb') as f:
for data in audio.iter_content(chunk_size=chunk_size):
f.write(data)
data_count += len(data)
progress = data_count * 100 / content_size
print('\r 正在下载音频[%s%s] %d%%' % (int(progress) * '█', ' ' * (100 - int(progress)), progress), end=' ')
except:
print('Error!')
shutil.rmtree('./download')
return False
merge(urls['title'], urls['title_of_series'])
shutil.rmtree('./download')
return True
Traceback (most recent call last):
File "C:\Users\wjdxs\Desktop\爬取哔哩哔哩视频.py", line 128, in <module>
type = type_of_video(url)
File "C:\Users\wjdxs\Desktop\爬取哔哩哔哩视频.py", line 10, in type_of_video
type = BeautifulSoup(response.text, 'lxml').find(attrs={'property': 'og:type'})['content']
File "D:\编程\Python\Python\Lib\site-packages\bs4\__init__.py", line 248, in __init__
raise FeatureNotFound(
bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library?