python爬取cnn新闻及gpt总结,实现新闻自由!!
本帖最后由 24WOK 于 2023-8-27 19:02 编辑import os
import datetime
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import openai
import time
# 在这里设置你的 OpenAI API 密钥
openai.api_key = ''
# 获取当前日期
current_date = datetime.date.today()
# 创建文件夹路径
folder_path = os.path.join("C:/桌面/每日新闻", str(current_date))
# 创建文件夹
os.makedirs(folder_path, exist_ok=True)
# 创建文件路径
file_path = os.path.join(folder_path, "CNN新闻.txt")
url = "https://edition.cnn.com/"
response = requests.get(url)
html_content = response.content
soup = BeautifulSoup(html_content, "html.parser")
container = soup.find(class_="container__field-links container_ribbon__field-links")
if container:
links = container.find_all("a")
# 打开文件并写入内容
with open(file_path, "w", encoding="utf-8") as file:
# 遍历链接并访问每个链接
for link in links:
href = link.get("href")
full_link = urljoin(url, href)
try:
response = requests.get(full_link)
response.raise_for_status()# 检查是否有异常状态码
html = BeautifulSoup(response.content, "html.parser")
articles = html.find_all(class_="article__content")
if articles is None:
continue
content = ' '.join()
user_input = f"摘要以下文章内容:\n{content}\n摘要:"
# 控制请求频率
time_between_requests = 60 / 3# 3 RPM
time.sleep(time_between_requests)
summary_response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_input}
],
temperature=1,
max_tokens=256,
)
summary = summary_response.choices.message['content'].strip()
# 将摘要写入文件
file.write(summary + "\n\n")
# 打印摘要
print(summary)
print('---------------------------------------------------------------------------------')
except requests.RequestException as e:
print(f"请求出错:{str(e)}")
print("文件写入完成!")
openai.error.RateLimitError: You exceeded your current quota, please check your plan and billing details.
免费的账户可以用吗?还是要升级为付费账户? 本帖最后由 hrh123 于 2023-8-27 19:23 编辑
欢迎讨论交流,但是楼主的api key我就收下了{:301_997:} 这个需要有魔法环境吧,正常应该不能直接访问GPT 雾都孤尔 发表于 2023-8-27 19:04
这个需要有魔法环境吧,正常应该不能直接访问GPT
对,环境自己准备下 代码收下了,感谢楼主{:1_893:} 学习时间到,芜湖 环境要怎么准备 好材料,收藏了 这个是不是需要环境啊? 有没有那种逆向爬虫的,加密解密的