吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 3315|回复: 28
收起左侧

[Python 原创] 分享一个去哪儿网查询机票的脚本

[复制链接]
bobo2017365 发表于 2024-1-1 03:26
本帖最后由 bobo2017365 于 2024-1-1 11:19 编辑

年末了,很多小伙伴会用得到。

注意不要大量快速的查询,小心被封IP


[Python] 纯文本查看 复制代码
# coding: utf-8
"""
"""
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import re
from pymongo import MongoClient


class Spider(object):
    def __init__(self, src_from=None, dest=None, DealTime=None):
        # MongoDB信息
        self.client = MongoClient(host="localhost", port=27017)
        # 库名
        self.db = self.client['spider']
        # 表名
        self.collection = self.db['feiji_piao']
        self.options = Options()
        self.options.add_argument('--headless')
        self.driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
        self.driver.implicitly_wait(10)
        self.verificationErrors = []
        self.accept_next_alert = True
        self.url = """
        https://flight.qunar.com/site/oneway_list.htm?searchDepartureAirport={}&searchArrivalAirport={}&searchDepartureTime={}
        """
        self.src_from = src_from
        self.dest = dest
        self.DealTime = DealTime

    def get_content_with_web(self):
        # 根据传入的参数,打开去哪儿网的页面, 获取网页源代码
        # 如果起飞地、目的地、出发时间三者任一为空,则报错不执行
        if self.src_from and self.dest and self.DealTime:
            url = self.url.format(self.src_from, self.dest, self.DealTime)
            try:
                self.driver.get(url)
                # 获取网页的源代码, 这样才能拿到所有的包括航班在内的信息
                content = self.driver.page_source
                return content
            except:
                return None
        else:
            print("需要同时指定 出发地、目的地、出发时间")
            return None

    def get_content_with_read_file(self):
        # 做测试用函数
        with open("1.html", "r", encoding="utf-8") as f:
            content = f.readlines()
        content = "".join(content)
        return content

    def get_single_page(self, content):
        # 获取航班信息,并且将信息存入MongoDB
        # 传入包含航班信息的网页代码
        soup = BeautifulSoup(content, 'lxml')
        # 这是定位到包含航班信息的div层, select返回一个list数据
        all_div_dingceng = soup.select("div['class=\"mb-10\"'] div['class=\"m-airfly-lst\"'] div['class=\"b-airfly\"']")
        for i in all_div_dingceng:
            # 公司名称
            name1 = i.select("div['class=\"air\"'] span")[0].text
            company_name = re.sub("\n|\s", "", name1)
            # 航班名称
            name2 = i.find("div", attrs={'class': "num"}).text
            hangban_name = re.sub("\n|\s", "", name2)

            # 起飞时间
            time1 = i.select("div['class=\"sep-lf\"'] h2")[0].text
            start_time = re.sub("\n|\s", "", time1)
            # 飞行时间
            range = i.select("div['class=\"range\"']")[0].text
            range_time = re.sub("\n|\s", "", range)
            # 航班结束时间
            time2 = i.select("div['class=\"sep-rt\"'] h2")[0].text
            stop_time = re.sub("\n|\s", "", time2)

            # 没搞明白网页的数字显示原理, 显示的数字跟实际的数字不一致, 这个是获取价格
            try:
                price = i.select("em['class=\"rel\"'] b['style=\"width:48px;left:-48px\"']")[0].text
                price = re.sub("\n|\s", "", price)
            except:
                price = i.find("em", attrs={'class': "rel"}).find("b").text
                price = re.sub("\n|\s", "", price)
            # 折扣信息
            zhekou = i.find("div", attrs={"class": "vim"}).text
            zhekou = re.sub("\n|\s", "", zhekou)

            # 组装为一个dict,然后插入MongoDB
            data = {"CompanyName": company_name, "HangBan": hangban_name, "StartTime": start_time,
                    "Range": range_time, "StopTime": stop_time, "Price": price, "Account": zhekou,
                    }
            print(data)
            try:
                self.collection.insert(data)
            except:
                print(data)
                pass
                # break

    def clear_cookie(self, delete_all=False):
        # 操作cookie示例
        if delete_all:
            self.driver.delete_all_cookies()
        else:
            try:
                self.driver.delete_cookie("QN277")
            except:
                pass

    def main(self):
        # 主函数,引用上面的函数,得到网页文本内容,然后用 get_single_page 来处理
        content = self.get_content_with_web()
        # content = self.get_content_with_read_file()
        self.get_single_page(content)


if __name__ == "__main__":
    # 起飞
    city_list = ["北京", "天津", "沈阳", "长春", "哈尔滨", "上海", "南京", "武汉", "广州", "重庆", "成都", "西安", "石家庄", "唐山", "太原", "包头", "大连",
                 "鞍山", "抚顺", "吉林", "齐齐哈尔", "徐州", "杭州", "福州", "南昌", "济南", "青岛", "淄博", "郑州", "长沙", "贵阳", "昆明", "兰州",
                 "乌鲁木齐", "邯郸", "保定", "张家口", "大同", "呼和浩特", "本溪", "丹东", "锦州", "阜新", "辽阳", "鸡西", "鹤岗", "大庆", "伊春", "佳木斯",
                 "牡丹江", "无锡", "常州", "苏州", "宁波", "合肥", "淮南", "淮北", "厦门", "枣庄", "烟台", "潍坊", "泰安", "临沂", "开封", "洛阳", "平顶山",
                 "安阳", "新乡", "焦作", "黄石", "襄樊", "荆州", "株洲", "湘潭", "衡阳", "深圳", "汕头", "湛江", "南宁", "柳州", "西宁"]
    for i in city_list:
        src_from = i
        # 目的地
        dest = "北京"
        # 出发时间
        deal_time = "2024-01-10"
        a = Spider(src_from=src_from, dest=dest, DealTime=deal_time)
        a.main()

免费评分

参与人数 6吾爱币 +12 热心值 +5 收起 理由
苏紫方璇 + 7 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
南方棋生 + 1 + 1 我很赞同!
Bob5230 + 1 + 1 我很赞同!
MakiseSatsuki + 1 热心回复!
woyucheng + 1 + 1 谢谢@Thanks!
为之奈何? + 1 + 1 我很赞同!

查看全部评分

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

Felix2857 发表于 2024-1-17 17:22
all_div_dingceng = soup.select("div['class=\"mb-10\"'] div['class=\"m-airfly-lst\"'] div['class=\"b-airfly\"']")

这段应该改为:

all_div_dingceng = soup.select("div.mb-10 div.m-airfly-lst div.b-airfly")
Felix2857 发表于 2024-1-17 16:33
大佬,运行报错,这段是怎么改呢:

Malformed attribute selector at position 3
  line 1:
div['class="mb-10"'] div['class="m-airfly-lst"'] div['class="b-airfly"']
   ^
Corgibro 发表于 2024-1-1 08:20
LuckyClover 发表于 2024-1-1 08:43
感谢楼主分享,应该能用得上
52soft 发表于 2024-1-1 09:05
强大的代码
Diamondzl 发表于 2024-1-1 09:27
谢谢,很有用
abc9452003 发表于 2024-1-1 09:38
正打算年前出去走一走。
wm517 发表于 2024-1-1 10:41
感谢分享 学习学习
FruitBaby 发表于 2024-1-1 10:44
请求url,解析response,,
zwh8698 发表于 2024-1-1 10:45
Corgibro 发表于 2024-1-1 08:20
不错,过完年又该出去了,感觉能用上

风貌毕露啊
Wryyy6 发表于 2024-1-1 11:03
感谢楼主分享,很有用。
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2025-1-9 13:40

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表