from
playwright.sync_api
import
sync_playwright
def
get_data(url):
page.goto(url)
count
=
0
title
=
''
rating
=
''
num
=
''
quote
=
''
movies
=
page.query_selector_all(
'.grid_view .item'
)
for
movie
in
movies:
title_element
=
movie.query_selector(
'.hd a span.title'
)
rating_element
=
movie.query_selector(
'.star .rating_num'
)
num_element
=
movie.query_selector(
'.star span:nth-child(4)'
)
quote_element
=
movie.query_selector(
'.quote span.inq'
)
if
title_element:
title
=
title_element.inner_text()
rating
=
rating_element.inner_text()
num
=
num_element.inner_text()
if
quote_element:
quote
=
quote_element.inner_text()
print
(f
"名称: {title}, 评分: {rating}, 评价人数: {num}, 引言:{quote}"
)
count
+
=
1
else
:
print
(
"Not found"
)
return
count
with sync_playwright() as p:
browser
=
p.chromium.launch()
page
=
browser.new_page()
base_url
=
'https://movie.douban.com/top250'
sum
=
0
for
i
in
range
(
0
,
10
):
url
=
r
'https://movie.douban.com/top250?start='
+
str
(i
*
25
)
+
'&filter='
count
=
get_data(url)
sum
+
=
count
print
(
"总记录数:"
,
sum
)