import
time
import
threading
from
bs4
import
BeautifulSoup
from
selenium
import
webdriver
from
selenium.webdriver.chrome.service
import
Service
live_dy_url
=
'https://live.douyin.com/670238762772'
crawling_browser_dy
=
None
comment_list
=
[]
previous_comment_count
=
0
def
init_crawling_browser_dy(url):
global
crawling_browser_dy
service
=
Service(r
'D:\Python\Python3.10.7\chromedriver.exe'
)
options
=
webdriver.ChromeOptions()
options.add_experimental_option(
'detach'
,
True
)
options.add_argument(
'--ignore-certificate-errors'
)
options.add_experimental_option(
'excludeSwitches'
, [
'enable-automation'
])
options.add_experimental_option(
'excludeSwitches'
, [
'enable-logging'
])
crawling_browser_dy
=
webdriver.Chrome(options
=
options, service
=
service)
crawling_browser_dy.get(
'https://live.douyin.com/'
)
crawling_browser_dy.maximize_window()
time.sleep(
2
)
crawling_browser_dy.get(url)
threading.Thread(target
=
poll_comments).start()
def
poll_comments():
global
crawling_browser_dy, previous_comment_count
while
True
:
try
:
content_dy
=
crawling_browser_dy.page_source
soup_dy
=
BeautifulSoup(content_dy,
'html.parser'
)
comment_items
=
soup_dy.find_all(
'div'
,
class_
=
'TNg5meqw'
)
if
len
(comment_items) > previous_comment_count:
latest_comment_items
=
comment_items[
-
5
:]
for
comment_item
in
latest_comment_items:
username_span
=
comment_item.find(
'span'
,
class_
=
'u2QdU6ht'
)
username
=
username_span.text
if
username_span
else
'未知用户'
comment_span
=
comment_item.find(
'span'
,
class_
=
'webcast-chatroom___content-with-emoji-text'
)
if
comment_span:
comment_text
=
comment_span.text
print
(f
"用户:{username}----- 评论:{comment_text}"
)
else
:
comment_text
=
'无评论'
previous_comment_count
=
len
(comment_items)
time.sleep(
1
)
except
Exception as e:
print
(f
'抖音自动化测试浏览器:发生异常 - {e}'
)
init_crawling_browser_dy(live_dy_url)