import
requests
import
re
import
csv
urls
=
[f
'http://www.0818tuan.com/list-1-{i}.html'
for
i
in
range
(
0
,
11
)]
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
}
matches
=
[]
for
url
in
urls:
response
=
requests.get(url, headers
=
headers)
if
response.status_code
=
=
200
:
matches_temp
=
re.findall(r
'[d\/](\d{7,}).html" target="_blank" title="([\u4e00-\u9fa5].*)" '
, response.text)
matches.extend(matches_temp)
match_dict
=
{}
for
match
in
matches:
match_id
=
match[
0
]
match_title
=
match[
1
]
url
=
f
"http://www.0818tuan.com/xbhd/{match_id}.html"
match_dict[url]
=
match_title
with
open
(
"爬取结果.csv"
,
'w'
, newline
=
'
', encoding="GBK",errors='
ignore') as f:
writer
=
csv.writer(f)
writer.writerow([
'url链接'
,
'标题'
])
for
url, title
in
match_dict.items():
writer.writerow([url, title])