由于工作需求,需要生成大量的文章内容,目前遇到的一个问题就是,这个360ai每次生成15篇以后就会提示访问频繁,这个有办法能够解决吗
[Python] 纯文本查看 复制代码 from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from docx import Document
import pandas as pd
import os
def search_and_save(keyword, driver, save_path):
# 构造搜索URL
url = f"https://www.sou.com/?q={keyword}"
driver.get(url)
# 等待特定的<div>元素出现,以此判断页面是否加载完毕
WebDriverWait(driver, 60).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "div.mt-16px.text-black.text-opacity-30.text-14px.leading-20px"))
)
# 提取内容
content_div = driver.find_element(By.CSS_SELECTOR, "div.markdown-container")
content = content_div.text # 获取文本内容
# 创建docx文档并保存到指定路径
document = Document()
document.add_heading(keyword, level=1)
document.add_paragraph(content)
docx_filename = os.path.join(save_path, f"{keyword}.docx")
document.save(docx_filename)
print(f"文件已保存:{docx_filename}")
def main():
# 初始化selenium webdriver
service = Service(ChromeDriverManager().install())
options = webdriver.ChromeOptions()
# 设置User-Agent
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome(service=service, options=options)
# 指定存放docx文档的文件夹路径
save_path = "E:\\360AI"
# 如果文件夹不存在,则创建
if not os.path.exists(save_path):
os.makedirs(save_path)
# 读取Excel文件中的关键词
excel_path = "C:\\Users\\zmn\\Desktop\\360ai.xlsx" # Excel文件路径,关键词放在第一列即可,一行一个
df = pd.read_excel(excel_path) # 假设关键词在第一列
keywords = df.iloc[:, 0].dropna().tolist() # 获取第一列所有非空值
try:
# 访问网站以便设置Cookie
driver.get("https://www.sou.com/")
# 添加Cookie,此处省略cookie_str的值,请根据需要填写
# 分解Cookie字符串并添加每个Cookie
cookie_str = "" # 请在这里填写您的Cookie字符串
for cookie_kv in cookie_str.split('; '):
k, v = cookie_kv.split('=', 1)
cookie = {'name': k, 'value': v, 'domain': 'sou.com'}
driver.add_cookie(cookie)
# 对每个关键词进行搜索并保存结果
for keyword in keywords:
search_and_save(keyword, driver, save_path)
finally:
driver.quit()
if __name__ == "__main__":
main()
|