[Python] 纯文本查看 复制代码
import re
import threading
import time
import driver
import pymysql
import requests
from selenium import webdriver
import time
import random
import xlwt
from selenium.webdriver.common.by import By
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib3.util import wait
import datetime
from datetime import datetime, timedelta
from apscheduler.schedulers.blocking import BlockingScheduler
import pymssql
import csv
import os
def get_ts(keyword, text):
pattern = r'{}[\u4e00-\u9fa5]*([\d\.]+)'.format(keyword)
match = re.search(pattern, text)
if match:
return match.group(1) # 输出数字
else:
return "1"
def get_gs(keyword, text):
pattern = fr"(\d+){keyword}"
match = re.search(pattern, text)
if match:
return match.group(1) # 输出数字
else:
return "0"
def get_yp(keyword, text):
pattern = fr"(\d+){keyword}"
match = re.search(pattern, text)
if match:
return match.group(1) # 输出数字
else:
return "0"
def get_tnr(keyword, text):
pattern = r'{}[\u4e00-\u9fa5]*([\d\.]+)'.format(keyword)
match = re.search(pattern, text)
if match:
return match.group(1) # 输出数字
else:
return "0"
# def my_job():
# # 执行JavaScript来刷新页面
# driver.execute_script("window.location.reload();")
# time.sleep(random.randint(2, 3))
# driver.find_element(by=By.XPATH, value='//*[@id="layout"]/div[2]/div[2]/div[3]/div[1]/div/span[2]/span').click()
# time.sleep(random.randint(3, 4))
# driver.find_element(by=By.XPATH, value='//*[@id="layout"]/div[2]/div[2]/div[3]/div[1]/div/span[2]/span').click()
# get_data()
# work_book.save("盼之.xls")
def extract_datetime(text):
pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
match = re.search(pattern, text)
if match:
return match.group()
else:
return None
# 配置邮箱发送信息
def con():
db = pymysql.connect(
port=3306,
user='root',
password='cyj19950610',
db='mysql',
charset='utf8'
)
# 计算页面文本中包含关键字的数量
class save:
con()
db = pymysql.connect(
port=3306,
user='root',
password='cyj19950610',
db='mysql',
charset='utf8'
)
cursor = db.cursor()
try:
sql = 'insert ignore into db_px_wwqy (bh, nss, amount, rate, info_time) values(%s, %s, %s, %s, %s)'
except:
print("数据有问题")
def get_data():
# headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)
# AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53",
# "Host": "g-search1.alicdn.com"}
global col
global row
# divs = driver.find_elements(by=By.XPATH,
# value='//*[@id="top"]/div[5]/div[2]/div[2]/div[1]/a/div/div[2]/div[1]') # 所有的div标签
divs = driver.find_elements(by=By.XPATH, value='//div[@class="game_list"]/div[2]/div')
print(divs)
for div in divs:
nss = div.find_element(by=By.XPATH,
value='.//div/div[2]/a/div/div[2]').text
amount = div.find_element(by=By.XPATH,
value='//div/div[3]/div').text
# nss = div.find_element(by=By.XPATH,
# value='//*[@id="top"]/div[5]/div[2]/div[2]/div[1]/a/div/div[2]/div[1]/h4').text
# amount = div.find_element(by=By.XPATH,
# value='//*[@id="top"]/div[5]/div[2]/div[2]/div[1]/a/div/div[3]').text
threshold = 300
text = nss
amount = get_ts("¥", amount)
info_time = datetime.now()
int_amount = round(int(amount))
bh = extracted_text = text[:10]
ds = get_ts("无畏点数", nss)
rate = round(int(ds) * 0.04)
print(f"文本: {nss}")
print(f"金额: {amount}")
print(f"折损: {rate}")
# 连接到MySQL数据库
connection = pymysql.connect(host='localhost', user='root', password='cyj19950610', db='mysql')
with connection.cursor() as cursor:
# 编写SQL查询语句,假设表名为your_table,检查字段your_field是否包含数据data_on_page
sql = "SELECT 1 FROM db_px_wwqy WHERE bh=%s"
cursor.execute(sql, bh)
# 获取查询结果
result = cursor.fetchone()
connection.close()
product = {'编号': bh,
'内容': nss,
'金额': amount,
'折损': rate,
'发布时间': info_time
}
bh = product.get('编号')
nss = product.get('内容')
amount = product.get('金额')
rate = product.get('折损')
info_time = product.get('发布时间')
save.cursor.execute(save.sql,
(bh, nss, amount, rate, info_time))
save.db.commit()
if __name__ == '__main__':
# div_list = ["编号", "天赏石", "天霓染", "金额", "天赏均价", "发布时间"]
# col = 0 # 设置行、列
# row = 1
# work_book = xlwt.Workbook(encoding='utf-8') # 创建工作簿
# work_sheet = work_book.add_sheet('tb_shopping') # 创建一张表
# pattern = xlwt.Pattern() # 设置单元格
# pattern.pattern = xlwt.Pattern.SOLID_PATTERN
# pattern.pattern_fore_colour = 5
# for i in range(6): # 设置标头
# work_sheet.col(i).width = 4444
# work_sheet.write(0, i, div_list[i])
opts = webdriver.ChromeOptions()
opts.headless = True
# opts.add_argument('-headless')
chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument("--headless") # 设置Chrome为无头模式
chrome_options.add_argument('--ignore-certificate-errors') # 忽略证书错误
opts.add_argument("--disable-3d-apis")
opts.add_argument('log-level=3')
chrome_options.add_argument('blink-settings=imagesEnabled=false')
driver = webdriver.Chrome(options=opts)
driver.get('https://www.pxb7.com/selectgame?game_id=148&game_alias=games&gameAlias=wwqy')
driver.implicitly_wait(1)
driver.maximize_window()
driver.find_element(by=By.XPATH, value='//*[@id="layout"]/div[2]/div[2]/div[3]/div[1]/div/span[2]/span').click()
time.sleep(random.randint(2, 3))
driver.find_element(by=By.XPATH, value='//*[@id="layout"]/div[2]/div[2]/div[3]/div[1]/div/span[2]/span').click()
time.sleep(random.randint(2, 3))
get_data()
# # 创建调度器
# scheduler = BlockingScheduler()
# # 添加任务:间隔30秒执行一次
# scheduler.add_job(my_job, 'interval', seconds=10)
# scheduler.start()