import
os
import
random
import
json
import
time
import
requests
from
lxml
import
etree
def
get_user_agent():
MY_USER_AGENT
=
[
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"
,
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)"
,
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"
,
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)"
,
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)"
,
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)"
,
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)"
,
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)"
,
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6"
,
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1"
,
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0"
,
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5"
,
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6"
,
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"
,
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20"
,
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"
,
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11"
,
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER"
,
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)"
,
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)"
,
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER"
,
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)"
,
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)"
,
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)"
,
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)"
,
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)"
,
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)"
,
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"
,
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"
,
"Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5"
,
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre"
,
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0"
,
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"
,
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
,
]
return
MY_USER_AGENT
def
get_proxy():
proxy
=
[
'http://182.140.244.163:8118'
,
'http://113.124.86.180:9999'
,
'http://117.64.237.42:9999'
,
'http://182.34.102.48:9999'
,
'http://183.236.123.242:8060'
,
'http://27.192.203.80:9000'
,
'http://114.231.8.242:8888'
,
'http://36.134.91.82:8888'
,
'http://222.132.57.105:9000'
,
'http://61.216.156.222:60808'
,
'http://182.34.20.110:9999'
,
'http://60.205.132.71:80'
,
]
return
proxy
headers
=
{
'user-agent'
: random.choice(get_user_agent()),
}
proxy
=
{
'http'
: random.choice(get_proxy()),
}
def
get_index_source(url):
url
=
'https://www.sxcse.com/mov/sdd2023.html'
sess
=
requests.session()
response
=
sess.get(url, headers
=
headers, proxies
=
proxy)
response.encoding
=
'utf-8'
file_name
=
'index.html'
with
open
(file_name, mode
=
'w'
, encoding
=
'utf-8'
) as f:
f.write(response.text)
return
file_name
def
get_urls(source_file):
tree
=
etree.HTML(
open
(source_file,
'r'
, encoding
=
'utf-8'
).read())
all_urls_list
=
tree.xpath(
'//*[@id="playlist"]/div[2]/div[2]/div/div/ul/li/a/@href'
)
final_url_list
=
[]
for
url
in
all_urls_list:
final_url
=
'https://www.sxcse.com'
+
url
final_url_list.insert(
0
, final_url)
return
final_url_list
def
download_m3u8_file(url_list, m3u8_path
=
'm3u8_files'
, enckey_path
=
'enckey_files'
):
if
not
os.path.exists(m3u8_path):
os.mkdir(m3u8_path)
if
not
os.path.exists(enckey_path):
os.mkdir(enckey_path)
file
=
1
for
url
in
url_list:
print
(f
'正在下载第{file}集的m3u8文件和enckey文件...'
)
sess
=
requests.session()
response
=
sess.get(url, headers
=
headers, proxies
=
proxy)
response.encoding
=
'UTF-8'
tree
=
etree.HTML(response.text)
data
=
tree.xpath(
'/html/body/div[2]/div[1]/div/div/div[1]/div/script/text()'
)[
0
]
string
=
str
(data).split(
' = '
)[
1
].replace(
'\\', '
').split('
;')[
0
]
data_dict
=
json.loads(string)
tmp_url
=
data_dict.get(
'url'
)
key
=
tmp_url.split(
'%2F'
)[
4
]
m3u8_url
=
'https://1080p.jszyplay.com/play/'
+
key
+
'/index.m3u8'
enckey_url
=
'https://1080p.jszyplay.com/play/'
+
key
+
'/enc.key'
m3u8_file_path
=
os.path.join(m3u8_path, f
'第{str(file)}集.m3u8'
)
m3u8_resp
=
sess.get(m3u8_url, headers
=
headers)
with
open
(m3u8_file_path,
'wb'
) as f:
f.write(m3u8_resp.content)
print
(f
'第{file}集的m3u8文件下载完成!'
)
enckey_file_path
=
os.path.join(enckey_path, f
'第{str(file)}集.key'
)
enckey_resp
=
sess.get(enckey_url, headers
=
headers)
with
open
(enckey_file_path,
'wb'
) as f:
f.write(enckey_resp.content)
print
(f
'第{file}集的enckey文件下载完成!'
)
time.sleep(random.randint(
5
,
10
))
file
+
=
1
if
__name__
=
=
'__main__'
:
url
=
'https://www.sxcse.com/mov/sdd2023.html'
get_index_source(url)
urls
=
get_urls(
'index.html'
)
download_m3u8_file(urls)