# 定义函数get_response()返回元组(response对象, 获取成功标志)。参数request_times为请求出错的尝试次数。
def get_response(url, proxy_ip_server, local_ip, request_times, sleep_seconds, used_ips_interval):
# 随机请求头
ua_list = [
{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0'},
{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1'},
{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0'},
{"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0"}
]
# 输出间隔行
border = "\xa0\xa0" + "*" * 100
print(border)
print("\xa0\xa0URL:{}".format(url))
while True:
proxy_ip = get_proxy_ip(proxy_ip_server, local_ip)
# 无代{过}{滤}理IP则使用
if proxy_ip == local_ip:
for n in range(1, request_times + 1):
time.sleep(sleep_seconds)
try:
print("\xa0\xa0本机IP{}第{}/{}次发起请求".format(proxy_ip, n, request_times), end=";")
r = requests.get(url=url, headers=random.choice(ua_list), timeout=5)
if r.status_code == 200:
print("请求成功!\n" + border)
return r, True
except Exception:
print("请求失败!")
if n == request_times:
print(border)
return "", False
else:
while check_proxy_ip(proxy_ip, local_ip, used_ips_interval):
for n in range(1, request_times + 1):
time.sleep(sleep_seconds)
try:
ip = proxy_ip.split(":")[0]
port = proxy_ip.split(":")[1]
print("\xa0\xa0代{过}{滤}理IP{}第{}/{}次发起请求".format(ip, n, request_times), end=";")
proxies = {"https": "https://{0:}:{1:}".format(ip, port), "http": "http://{0:}:{1:}".format(ip, port)}
r = requests.get(url=url, headers=random.choice(ua_list), proxies=proxies, verify=False, timeout=5)
if r.status_code == 200:
print("请求成功!\n" + border)
return r, True
except Exception:
print("请求失败!")
if n == request_times:
print(border)
return "", False
# 定义函数get_cities(),获取58同城省、市名称及城市URL。参数为Response对象。
def get_cities(url):
driver = webdriver.Chrome()
driver.get(url)
html_source = driver.page_source
dic_cities = dict()
letters = etree.HTML(html_source).xpath("//div[@class='content-letter']")
for letter in letters:
content_provinces = letter.xpath("div[@class='content-province']")
for content_province in content_provinces:
province_name = content_province.xpath("div[@class='content-province-title']/text()")[0]
dic_cities[province_name] = dict()
content_cities = content_province.xpath("div[@class='content-cities']")
for content_city in content_cities:
city_names = content_city.xpath("a/text()")
city_urls = content_city.xpath("a/@href")
for i in range(len(city_urls)):
city_urls = "https:" + city_urls
for city_name, city_url in zip(city_names, city_urls):
dic_cities[province_name][city_name] = list()
dic_cities[province_name][city_name].append(city_url)
driver.quit()
# 增加直辖市
dic_cities["直辖市"] = {
"北京": ["https://bj.58.com/"], "天津": ["https://tj.58.com/"],
"上海": ["https://sh.58.com/"], "重庆": ["https://cq.58.com/"]
}
# 删除字典中的境外数据
del dic_cities["其他"]
del dic_cities["海外"]
return dic_cities
# 定义函数 get_inputs(),获取输入数据,返回元组(省/自治区/直辖市名称、城市名称、及城市URL)。参数为get_cities()的返回值。
def get_inputs(dic):
province = ""
city = ""
print("\xa0\xa0[省/自治区/直辖市名称参照表:", end="\n\n")
print("\xa0\xa0", end="")
for i in range(len(list(dic.keys()))):
print(i, list(dic.keys()), end="、")
if (i + 1) % 10 == 0:
print("\n\xa0\xa0", end="")
flag = True
while flag:
print("\n")
number_province = input("\xa0\xa0请输入[省/自治区/直辖市序号:")
if number_province in [str(i) for i in range(len(list(dic.keys())))]:
province = list(dic.keys())[int(number_province)]
flag = False
else:
print("\xa0\xa0输入序号不在参照表范围,请重新输入!")
print("\n\xa0\xa0{}所属城市名称参照表:".format(province), end="\n\n")
print("\xa0\xa0", end="")
for i in range(len(list(dic[province].keys()))):
print(i, list(dic[province].keys()), end="、")
if (i + 1) % 10 == 0:
print("\n\xa0\xa0", end="")
flag = True
while flag:
print("\n")
number_city = input("\xa0\xa0请输入{}所属城市序号:".format(province))
if number_city in [str(i) for i in range(len(list(dic[province].keys())))]:
city = list(dic[province].keys())[int(number_city)]
flag = False
else:
print("\xa0\xa0输入序号不在参照表范围,请重新输入!")
return province, city, dic[province][city][0]
# ************将获取的整租房信息写入文件**************
print("4、将58同城{}{}整租房信息写入文件:\n".format(province_name, city_name))
with open("58同城{}{}整租房信息.csv".format(province_name, city_name), 'w+', encoding="utf-8") as f:
for line in house_rent_info:
f.write(",".join(line) + "\n")
print("\xa0\xa058同城{}{}整租房信息写入完毕!\n".format(province_name, city_name))