本帖最后由 yek2furw 于 2021-8-23 10:34 编辑
发现一个大佬分享的oneindex,特别想要他的资源所以写了这个代码,打开即用
[Python] 纯文本查看 复制代码 import requests, re, urllib
class GetFileUrl(object):
def __init__(self, url):
self.url = url
dict = {
'name': 'Germey'
}
data = bytes(urllib.parse.urlencode(dict), encoding='utf8')
headers = {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12A365 MicroMessenger/5.4.1 NetType/WIFI'
}
req = urllib.request.Request(url=url, data=data, headers=headers, method='GET')
# def get_data(self):
self.response = urllib.request.urlopen(req)
# print(self.response.read().decode('utf8'))
def re_data(self):
self.folder_list = []
self.description_list = []
data = self.response.read().decode('utf8')
# print(data)
# result = re.search(r'<a href=".." class="item" ><i class="material-icons">folder</i>..</a>(.+)</div></div></div>', data)
result = re.findall(r'</a><a href="(.+?)" class="item" size="(\d+?)"><i class="material-icons">(\w+?)</i>',
data, re.M)
# print(result.group())
try:
for item in result:
# print(item)
# item1 = urllib.request.quote(item[0])
if item[2] == "folder":
self.folder_list.append((item[0], item[1]))
else:
self.description_list.append((item[0], item[1]))
except:
print("没有匹配到链接..")
return self.folder_list, self.description_list
def get_file_url(self):
urls = []
for i in self.description_list:
j = urllib.request.quote(i[0])
urls.append((i, self.url + j))
return urls
def download_file(url, file_pname, file_size, chunk_size=1024 * 4):
"""
url: 下载链接
file_pname: 文件保存路径
chunk_size: 分块大小
"""
download_size = 0
response_data_file = requests.get(url, stream=True)
with open(file_pname, 'wb') as f:
for chunk in response_data_file.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
download_size += chunk_size
print('\r%s 已下载%dM,共%dM,剩余%dM' % (file_pname, download_size/1024/1024, file_size/1024/1024, (file_size - download_size)/1024/1024), end = "")
def main(url):
while True:
try:
geturl = GetFileUrl(url)
break
except urllib.error.HTTPError:
print("链接没有打开,正在重试...")
folder_name, file_name = geturl.re_data()
for file_pname, file_size in file_name:
file_url = url + urllib.request.quote(file_pname)
download_file(file_url, file_pname, int(file_size))
print("%s 下载完成"%file_pname)
if __name__ == '__main__':
url = input("请输入网址:")
main(url)
代码不完全,只能爬取当前页面,因为写了一半联系到了大佬,他把mover.io账号给我了,直接搬运不用走vps流量
原来搬运思路是先在vps挂载onedrive,然后直接用这个脚本下载到onedrive目录
想继续开发的大神可以继续开发,我只是提供一个思路
想练手没有onedrive的朋友们可以留下 昵称+前缀,我给你发5T的E5盘 |