python萌新求助代码逻辑问题

Jack-yu 发表于 2020-12-20 10:25

本帖最后由 Jack-yu 于 2020-12-21 09:30 编辑

如图，我本意是通过input获取用户输入，再拼接url，不知道是哪里出了问题，
必须输入两次才能跑通，请大佬帮我看看哪里的代码逻辑出问题了：

https://static.52pojie.cn/static/image/hrline/1.gif

嗯~ o(*￣▽￣*)o，问题已经解决了，是我脑子抽了，忘记写main.py主程序入口了............

# -*- coding: utf-8 -*-
import json
import time
import scrapy
from scrapy.spiders import CrawlSpider
from ZhiLian.items import ZhilianItem

class ZhilianspiderSpider(scrapy.Spider):
start = time.time()
name = 'ZhiLianSpider'
allowed_domains = ['zhaopin.com']
# 构造城市名字典，利用字典的键值对形成映射关系
city_list = {'北京': '530', '上海': '538', '广州': '763', '深圳': '765', '长沙': '749', '杭州': '653', '成都': '801', '厦门': '682'}

city_name = input("请输入城市: ")
city_id = city_list
# 构造职位类型字典
job_list = {'销售/商务拓展': '19000000000000', '人事/行政/财务/法务': '14000000000000',
            '互联网/通信及硬件': '9000000000000', '运维/测试': '20000000000000', '视觉/交互/设计': '17000000000000',
            '运营/专业分析': '5000000000000', '产品/项目/高级管理': '3000000000000', '市场/品牌/公关': '16000000000000',
            '金融/保险': '12000000000000', '房地产/工程/建筑': '7000000000000', '物流/采购/供应链': '2000000000000',
            '生产制造/营运管理': '15000000000000', '农业/能源/环保': '21000000000000', '医疗/医美': '18000000000000',
            '教育/培训/科研': '11000000000000', '编辑/记者/翻译': '1000000000000', '影视传媒': '4000000000000',
            '商务服务/生活服务': '6000000000000', '管培生/非企业从业者': '8000000000000'
            }
job_name = input("请输入职位类型: ")
jod_id = job_list
# 拼接初始化Url
start_urls = [
   "https://fe-api.zhaopin.com/c/i/jobs/searched-jobs?pageNo=1&pageSize=90&cityId=" + city_id + "&workExperience=0305&jobType=" + jod_id + "&education=4&companyType=-1"]

cotype_list = ['国企: 1', '外商独资: 2', '代表处: 3', '合资: 4', '民营: 5', '股份制企业: 8', '上市公司: 9', '国家机关: 6', '事业单位: 10',
               '银行: 11',
               '医院: 12', '学校/下级学院: 13', '律师事务所: 14', '社会团体: 15', '港澳台公司: 16', '其它: 7']
cosize_list = ['20人以下: 1', '20-99人: 2', '100-299人: 3', '300-499人: 8', '500-999人: 4', '1000-9999人: 5', '10000人以上: 6']
custom_settings = {
   'LOG_LEVEL': 'INFO',
   'CONCURRENT_REQUESTS': 64
}

# 解析start_urls
def parse(self, response):
   # 对应json数据中的data
   datas = json.loads(response.text)
   try:
         totalcount = int(datas['data']['page']['total'])
   except Exception:
         totalcount = 0

   if totalcount == 0:
         # 没有数据
         pass
   elif totalcount <= 270:
         if totalcount <= 90:
            yield scrapy.Request(
               url=response.url,
               dont_filter=True,
               callback=self.parse_result
            )
         elif 90 < totalcount <= 180:
            for page in range(1, 3):
               yield scrapy.Request(
                     url=str(response.url).replace('pageNo=1', f'pageNo={page}'),
                     dont_filter=True,
                     callback=self.parse_result
               )
         else:
            for page in range(1, 4):
               yield scrapy.Request(
                     url=str(response.url).replace('pageNo=1', f'pageNo={page}'),
                     dont_filter=True,
                     callback=self.parse_result
               )
   else:
         for cotype in self.cotype_list:
            yield scrapy.Request(
               url=str(response.url).replace('companyType=-1', f'companyType={cotype.split(": ")}'),
               dont_filter=True,
               callback=self.parse_cotype
            )

# 按公司类型解析
def parse_cotype(self, response):
   datas = json.loads(response.text)
   try:
         totalcount = int(datas['data']['page']['total'])
   except Exception:
         totalcount = 0

   if totalcount == 0:
         pass
   elif totalcount <= 270:
         if totalcount <= 90:
            yield scrapy.Request(
               url=response.url,
               dont_filter=True,
               callback=self.parse_result
            )
         elif 90 < totalcount <= 180:
            for page in range(1, 3):
               yield scrapy.Request(
                     url=str(response.url).replace('pageNo=1', f'pageNo={page}'),
                     dont_filter=True,
                     callback=self.parse_result
               )
         else:
            for page in range(1, 4):
               yield scrapy.Request(
                     url=str(response.url).replace('pageNo=1', f'pageNo={page}'),
                     dont_filter=True,
                     callback=self.parse_result
               )
   else:
         for cosize in self.cosize_list:
            yield scrapy.Request(
               url=str(response.url).replace('companySize=-1', f'companySize={cosize.split(": ")}'),
               dont_filter=True,
               callback=self.parse_cosize
            )

# 按公司规模解析
def parse_cosize(self, response):
   datas = json.loads(response.text)
   try:
         totalcount = int(datas['data']['page']['total'])
   except Exception:
         totalcount = 0

   if totalcount == 0:
         pass
   elif totalcount <= 270:
         if totalcount <= 90:
            yield scrapy.Request(
               url=response.url,
               dont_filter=True,
               callback=self.parse_result
            )
         elif 90 < totalcount <= 180:
            for page in range(1, 3):
               yield scrapy.Request(
                     url=str(response.url).replace('pageNo=1', f'pageNo={page}'),
                     dont_filter=True,
                     callback=self.parse_result
               )
         else:
            for page in range(1, 4):
               yield scrapy.Request(
                     url=str(response.url).replace('pageNo=1', f'pageNo={page}'),
                     dont_filter=True,
                     callback=self.parse_result
               )
   else:
         for page in range(1, 4):
            yield scrapy.Request(
               url=str(response.url).replace('pageNo=1', f'pageNo={page}'),
               dont_filter=True,
               callback=self.parse_result
            )

# 对最终的结果进行解析
def parse_result(self, response):
   item = ZhilianItem()
   datas = json.loads(response.text)
   try:
         data_list = datas['data']['list']
   except Exception:
         data_list = []

   if len(data_list) > 0:
         for data in data_list:
            item = {}
            # 职位名称
            item['poname'] = data['name']
            # 公司名称
            item['coname'] = data['company']
            # 工作城市
            item['city'] = data['workCity']
            # 薪资范围
            item['providesalary'] = data['salary']
            # 学历要求
            item['degree'] = data['education']
            # 公司类型
            item['coattr'] = data['property']
            # 公司规模
            item['cosize'] = data['companySize']
            # 职位类别
            item['rank'] = data['jobType']
            # 工作经验
            item['worktime'] = data['workingExp']
            # 福利待遇
            # 提取json数据中的value值，先转换为列表，再转换为字符串返回
            json_data = data['welfareLabel']
            json_list = []
            for i in json_data:
               json_list.append(i['value'])
            temp_data =
            welfare_str = ','.join(temp_data)
            item['welfare'] = welfare_str
            # print(item)
            yield item

end = time.time()
print("本次爬取花费时间为：" + str(end - start))

深渊莫冥 发表于 2020-12-20 21:55

兄弟，可以把程序打个包发一下吗？非常感谢

深渊莫冥 发表于 2020-12-20 22:05

下载:https://wwi.lanzoui.com/i9JPWjjtu9g 密码:brg9
这个是爬取其他招聘网站的Python代码

页: [1]

吾爱破解 - 52pojie.cn's Archiver

python萌新求助代码逻辑问题