[Python] 纯文本查看 复制代码
import requests
import re
import json
import datetime
from lxml import etree
from queue import Queue
import threading
import time
yesterday = datetime.date.today()+datetime.timedelta(-2)
need = ['馆', '展厅', '厅', '装修', '设计', '施工', '景观', '展', '装饰', '基地', '党建', '文化', '空间', '线上', '数字', '策划', '提升', '美丽乡村']
no_need_1 = '中标'
no_need_2 = '结果'
class Procuder(threading.Thread):
def __init__(self, url_queue,pc_queue,*args,**kwargs):
super(Procuder,self).__init__(*args,**kwargs)
self.url_queue = url_queue
self.pc_queue = pc_queue
def run(self):
while True:
if self.url_queue.empty():
break
url = self.url_queue.get()
self.jie(url)
def jie(self,url):
if url.rsplit('|',1)[0] == str(1):
url1 = url.rsplit('|',1)[1]
self.sdggzyjyzx(url1)
if url.rsplit('|',1)[0] == str(2):
url1 = url.rsplit('|',1)[1]
self.sdggzyjyzx(url1)
if url.rsplit('|',1)[0] == str(3):
url1 = url.rsplit('|',1)[1]
self.qdggzyjyw(url1)
if url.rsplit('|',1)[0] == str(4):
url1 = url.rsplit('|',1)[1]
self.qdggzyjyw(url1)
if url.rsplit('|',1)[0] == str(5):
url1 = url.rsplit('|',1)[1]
self.zbggzyjyw(url1)
if url.rsplit('|',1)[0]== str(6):
url1 = url.rsplit('|',1)[1]
self.zbggzyjyw(url1)
if url.rsplit('|',1)[0] == str(7):
url1 = url.rsplit('|',1)[1]
self.zzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(8):
url1 = url.rsplit('|',1)[1]
self.dyggzyjyw(url1)
if url.rsplit('|',1)[0] == str(9):
url1 = url.rsplit('|',1)[1]
self.dyggzyjyw(url1)
if url.rsplit('|',1)[0] == str(10):
url1 = url.rsplit('|',1)[1]
self.wfggzyjyw(url1)
if url.rsplit('|',1)[0] == str(11):
url1 = url.rsplit('|',1)[1]
self.wfggzyjyw(url1)
if url.rsplit('|',1)[0]== str(12):
url1 = url.rsplit('|',1)[1]
self.wfggzyjyw(url1)
if url.rsplit('|',1)[0] == str(13):
url1 = url.rsplit('|',1)[1]
self.jnggzyjyw(url1)
if url.rsplit('|',1)[0] == str(14):
url1 = url.rsplit('|',1)[1]
self.taggzyjyw(url1)
if url.rsplit('|',1)[0] == str(15):
url1 = url.rsplit('|',1)[1]
self.taggzyjyw(url1)
if url.rsplit('|',1)[0] == str(16):
url1 = url.rsplit('|',1)[1]
self.whggzyjyw(url1)
if url.rsplit('|',1)[0] == str(17):
url1 = url.rsplit('|',1)[1]
self.whggzyjyw(url1)
if url.rsplit('|',1)[0] == str(18):
url1 = url.rsplit('|',1)[1]
self.rzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(19):
url1 = url.rsplit('|',1)[1]
self.rzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(20):
url1 = url.rsplit('|',1)[1]
self.rzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(21):
url1 = url.rsplit('|',1)[1]
self.lyggzyjyw(url1)
if url.rsplit('|',1)[0] == str(22):
url1 = url.rsplit('|',1)[1]
self.lyggzyjyw(url1)
if url.rsplit('|',1)[0] == str(23):
url1 = url.rsplit('|',1)[1]
self.lyggzyjyw(url1)
if url.rsplit('|',1)[0] == str(24):
url1 = url.rsplit('|',1)[1]
self.lyggzyjyw(url1)
if url.rsplit('|',1)[0] == str(25):
url1 = url.rsplit('|',1)[1]
self.lcggzyjyw(url1)
if url.rsplit('|',1)[0] == str(26):
url1 = url.rsplit('|',1)[1]
self.lcggzyjyw(url1)
if url.rsplit('|',1)[0] == str(27):
url1 = url.rsplit('|',1)[1]
self.bzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(28):
url1 = url.rsplit('|',1)[1]
self.bzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(29):
url = self.url_queue.get()
self.bzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(30):
url1 = url.rsplit('|',1)[1]
self.hzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(31):
url1 = url.rsplit('|',1)[1]
self.hzggzyjyw(url1)
if url.rsplit('|',1)[0] == str(32):
url1 = url.rsplit('|',1)[1]
self.hzggzyjyw(url1)
def hzggzyjyw(self,url1):
t = []
u = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/ul/li/a/text()')
url_1 = html.xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/ul/li/a/@href')
time_rq = html.xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/ul/li/span/text()')
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://www.hzsggzyjyzx.gov.cn' + url_2
u.append(url_3)
list_word = zip(time_rq, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def bzggzyjyw(self,url1):
t = []
u = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@id="right"]/table/tr[1]/td/table/tr/td[2]/a/text()')
url_1 = html.xpath('//*[@id="right"]/table/tr[1]/td/table/tr/td[2]/a/@href')
time_rq = html.xpath('//*[@id="right"]/table/tr[1]/td/table/tr/td[3]/text()')
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://ggzyjy.binzhou.gov.cn' + url_2
u.append(url_3)
list_word = zip(time_rq, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def lcggzyjyw(self,url1):
t = []
u = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/text()')
url_1 = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/@href')
time_rq = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/span/text()')
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://www.lcsggzyjy.cn' + url_2
u.append(url_3)
list_word = zip(time_rq, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def lyggzyjyw(self,url1):
t = []
u = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/text()')
url_1 = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/@href')
time_rq = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/span/text()')
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://ggzyjy.linyi.gov.cn' + url_2
u.append(url_3)
list_word = zip(time_rq, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def rzggzyjyw(self,url1):
t = []
u = []
q = []
e = []
m = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@id="DataList1"]/tr/td/li/a/div[1]/text()')
url_1 = html.xpath('//*[@id="DataList1"]/tr/td/li/a/@href')
for url_2 in url_1:
url_3 = url_2[2:]
e.append(url_3)
time_rq = html.xpath('//*[@id="DataList1"]/tr/td/li/a/div[2]/text()')
for title_1 in title:
title_2 = '|' + title_1.strip() + '|'
t.append(title_2)
for e_1 in e:
e_2 = 'http://ggzyjy.rizhao.gov.cn/rzwz' + e_1
u.append(e_2)
for time_rq_1 in time_rq:
time_rq_2 = time_rq_1.strip()
time_rq_3 = time_rq_2.replace('.', '-')
m.append(time_rq_3)
list_word = zip(m, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def whggzyjyw(self,url1):
t = []
u = []
q = []
e = []
m = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
for i in range(1, 11):
title = html.xpath('/html/body/div[4]/div[3]/div/ul/li[' + str(i) + ']/div/a/text()')
title_1 = ''.join(title).strip()
e.append(title_1)
time_rq = html.xpath('/html/body/div[4]/div[3]/div/ul/li/div/div/text()')
for time_rq_1 in time_rq:
time_rq_2 = time_rq_1.strip()
m.append(time_rq_2)
url_1 = html.xpath('/html/body/div[4]/div[3]/div/ul/li[1]/div/a/@href')
for url_2 in url_1:
url_3 = 'http://www.whggzyjy.cn' + url_2
u.append(url_3)
for e_1 in e:
e_2 = '|' + e_1 + '|'
t.append(e_2)
list_word = zip(m, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def taggzyjyw(self,url1):
t = []
u = []
q = []
m = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@id="right_table"]/table/tr/td[2]/a/text()')
url_1 = html.xpath('//*[@id="right_table"]/table/tr/td[2]/a/@href')
time_rq = html.xpath('//*[@id="right_table"]/table/tr/td[3]/text()')
for time_rq_1 in time_rq:
time_rq_2 = time_rq_1.replace('[', '').replace(']', '')
m.append(time_rq_2)
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://www.taggzyjy.com.cn' + url_2
u.append(url_3)
list_word = zip(m, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def jnggzyjyw(self,url1):
t = []
u = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
'Content-Type': 'application/json'
}
time.sleep(1)
payloadData = {
"FilterText": "",
"categoryCode": "503000",
"maxResultCount": 20,
"skipCount": 0,
"tenantId": "3"
}
data = json.dumps(payloadData)
r = requests.post(url1, data=data, headers=headers).text
title = re.findall(r'title":"(.*?)",', r)
url_1 = re.findall(r'"id":"(.*?)"},', r)
time_rq = re.findall(r'"releaseDate":"(.*?)T', r)
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://ggzy.jining.gov.cn/JiNing/Bulletins/Detail/' + url_2 + '/?CategoryCode=503000'
u.append(url_3)
list_word = zip(time_rq, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def wfggzyjyw(self,url1):
t = []
u = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@class="info-form"]/table/tbody/tr/td[3]/span/a/text()')
url_1 = html.xpath('//*[@class="info-form"]/table/tbody/tr/td[3]/span/a/@href')
time_rq = html.xpath('//*[@class="info-form"]/table/tbody/tr/td[4]/span/text()')
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://ggzy.weifang.gov.cn' + url_2
u.append(url_3)
list_word = zip(time_rq, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def dyggzyjyw(self,url1):
t = []
u = []
q = []
m = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@height="25"]/td[2]/a/font/text()')
url_1 = html.xpath('//*[@height="25"]/td[2]/a/@href')
time_rq = html.xpath('//*[@height="25"]/td[3]/text()')
for time_rq_1 in time_rq:
time_rq_2 = time_rq_1.replace('[', '').replace(']', '')
m.append(time_rq_2)
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://ggzy.dongying.gov.cn' + url_2
u.append(url_3)
list_word = zip(m, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def zzggzyjyw(self,url1):
t = []
u = []
q = []
m = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@width="98%"]/tr/td[3]/a/text()')
url_1 = html.xpath('//*[@width="98%"]/tr/td[3]/a/@href')
time_rq = html.xpath('//*[@width="98%"]/tr/td[4]/text()')
for time_rq_1 in time_rq:
time_rq_2 = time_rq_1.replace('[', '').replace(']', '')
m.append(time_rq_2)
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://www.zzggzy.com' + url_2
u.append(url_3)
list_word = zip(m, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def zbggzyjyw(self,url1):
t = []
u = []
q = []
e = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@id="MoreInfoList1_DataGrid1"]/tr/td[2]/a/text()')
url_1 = html.xpath('//*[@id="MoreInfoList1_DataGrid1"]/tr/td[2]/a/@href')
time_rq = html.xpath('//*[@id="MoreInfoList1_DataGrid1"]/tr/td[3]/text()')
for time_rq_1 in time_rq:
time_rq_2 = time_rq_1.strip()
e.append(time_rq_2)
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'http://ggzyjy.zibo.gov.cn' + url_2
u.append(url_3)
list_word = zip(e, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def sdggzyjyzx(self,url1):
u = []
t = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@class="ewb-list"]/li/a/text()')
date = html.xpath('//*[@class="ewb-list"]/li/span/text()')
url_1 = html.xpath('//*[@class="ewb-list"]/li/a/@href')
for url_2 in url_1:
url_3 = 'http://ggzyjyzx.shandong.gov.cn' + url_2
u.append(url_3)
for title_1 in title:
title_2 = ' | ' + title_1 + ' | '
t.append(title_2)
list_word = zip(date, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
def qdggzyjyw(self,url1):
t = []
u = []
q = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
time.sleep(1)
r = requests.get(url1, headers=headers).text
html = etree.HTML(r)
title = html.xpath('//*[@class="info_con"]/table/tr/td/a/@title')
url_1 = html.xpath('//*[@class="info_con"]/table/tr/td/a/@href')
time_rq = html.xpath('//*[@class="info_con"]/table/tr/td[2]/text()')
for title_1 in title:
title_2 = '|' + title_1 + '|'
t.append(title_2)
for url_2 in url_1:
url_3 = 'https://ggzy.qingdao.gov.cn' + url_2
u.append(url_3)
list_word = zip(time_rq, t, u)
for list_word_1 in list_word:
list_word_2 = ''.join(list_word_1)
q.append(list_word_2)
self.pc_queue.put(q)
class Consumer(threading.Thread):
def __init__(self, url_queue,pc_queue,*args,**kwargs):
super(Consumer,self).__init__(*args,**kwargs)
self.url_queue = url_queue
self.pc_queue = pc_queue
def run(self) :
while True:
if self.pc_queue.empty() and self.url_queue.empty():
break
q = self.pc_queue.get()
for tt in q:
for need_1 in need:
if need_1 in tt:
if no_need_1 not in tt:
if str(yesterday) < tt[:10]:
if tt != []:
print(need_1)
print(tt)
def main():
start = time.time()
url_queue = Queue(1000)
pc_queue = Queue(10000)
tsk = []
for i in range(1,33):
if i ==1:
print('开始')
url = str(i)+'|'+'http://ggzyjyzx.shandong.gov.cn/003/003001/003001001/moreinfo.html'
url_queue.put(url)
if i ==2:
url = str(i)+'|'+'http://ggzyjyzx.shandong.gov.cn/003/003004/003004001/moreinfo.html'
url_queue.put(url)
if i ==3:
for z in range(1, 5):
url = str(i)+'|'+'https://ggzy.qingdao.gov.cn/Tradeinfo-GGGSList/0-0-0?pageIndex=' + str(z)
url_queue.put(url)
if i ==4:
for z in range(1, 5):
url = str(i)+'|'+'https://ggzy.qingdao.gov.cn/Tradeinfo-GGGSList/1-1-0?pageIndex=' + str(z)
url_queue.put(url,i)
if i ==5:
for z in range(1,13):
if z ==1:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001001/MoreInfo.aspx?CategoryNum=268698113'
url_queue.put(url,i)
if z ==2:
url =str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001002/MoreInfo.aspx?CategoryNum=268698114'
url_queue.put(url,i)
if z ==3:
url =str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001013/MoreInfo.aspx?CategoryNum=268698123'
url_queue.put(url,i)
if z ==4:
url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001009/MoreInfo.aspx?CategoryNum=2001001009'
url_queue.put(url,i)
if z ==5:
url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001010/MoreInfo.aspx?CategoryNum=268698120'
url_queue.put(url,i)
if z ==6:
url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001007/MoreInfo.aspx?CategoryNum=268698119'
url_queue.put(url,i)
if z ==7:
url =str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001008/MoreInfo.aspx?CategoryNum=2001001008'
url_queue.put(url,i)
if z ==8:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001006/MoreInfo.aspx?CategoryNum=268698118'
url_queue.put(url,i)
if z ==9:
url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001012/MoreInfo.aspx?CategoryNum=268698122'
url_queue.put(url,i)
if z ==10:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001004/MoreInfo.aspx?CategoryNum=268698116'
url_queue.put(url,i)
if z ==11:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001005/MoreInfo.aspx?CategoryNum=268698117'
url_queue.put(url,i)
if z ==12:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001011/MoreInfo.aspx?CategoryNum=268698121'
url_queue.put(url,i)
if i == 6:
for z in range(1, 13):
if z == 1:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001001/MoreInfo.aspx?CategoryNum=268960257'
url_queue.put(url,i)
if z == 2:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001002/MoreInfo.aspx?CategoryNum=268960258'
url_queue.put(url,i)
if z == 3:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001011/MoreInfo.aspx?CategoryNum=268960265'
url_queue.put(url,i)
if z == 4:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001007/MoreInfo.aspx?CategoryNum=268960263'
url_queue.put(url,i)
if z == 5:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001012/MoreInfo.aspx?CategoryNum=268960266'
url_queue.put(url,i)
if z == 6:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001009/MoreInfo.aspx?CategoryNum=2002001009'
url_queue.put(url,i)
if z == 7:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001010/MoreInfo.aspx?CategoryNum=268960264'
url_queue.put(url,i)
if z == 8:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001008/MoreInfo.aspx?CategoryNum=2002001008'
url_queue.put(url,i)
if z == 9:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001006/MoreInfo.aspx?CategoryNum=268960262'
url_queue.put(url,i)
if z == 10:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001004/MoreInfo.aspx?CategoryNum=268960260'
url_queue.put(url,i)
if z == 11:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001005/MoreInfo.aspx?CategoryNum=268960261'
url_queue.put(url,i)
if z == 12:
url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001013/MoreInfo.aspx?CategoryNum=268960267'
url_queue.put(url,i)
if i ==7:
for z in range(1,3):
if z ==1:
url = str(i)+'|'+'http://www.zzggzy.com/TPFront/jyxx/070001/070001001/'
url_queue.put(url,i)
if z==2:
for y in range(1,4):
url = str(i)+'|'+'http://www.zzggzy.com/TPFront/jyxx/070002/070002001/Paging='+str(y)
url_queue.put(url, i)
if i ==8:
for z in range(1,10):
url = str(i)+'|'+'http://ggzy.dongying.gov.cn/dyweb/004/004001/004001001/0040010010'+str(z).rjust(2,'0')+'/'
url_queue.put(url, i)
if i ==9:
for z in range(1,10):
url = str(i)+'|'+'http://ggzy.dongying.gov.cn/dyweb/004/004002/004002001/0040020010'+str(z).rjust(2,'0')+'/'
url_queue.put(url, i)
if i ==10:
for z in range(1,4):
url = str(i)+'|'+'http://ggzy.weifang.gov.cn/wfggzy/showinfo/moreinfo_gg.aspx?address=&type=&categorynum=004012001&Paging='+str(z)
url_queue.put(url, i)
if i ==11:
for z in range(1,6):
url = str(i)+'|'+'http://ggzy.weifang.gov.cn/wfggzy/showinfo/moreinfo_gg_zfcgtwo.aspx?address=&type=&categorynum=004002001&Paging='+str(z)
url_queue.put(url, i)
if i ==12:
for z in range(1,4):
url = str(i)+'|'+'http://ggzy.weifang.gov.cn/wfggzy/showinfo/moreinfo_gg_zfcg_cgxq.aspx?address=&categorynum=004002017&Paging='+str(z)
url_queue.put(url, i)
if i ==13:
url = str(i)+'|'+'http://ggzy.jining.gov.cn/api/services/app/stPrtBulletin/GetBulletinList'
url_queue.put(url, i)
if i ==14:
for z in range(1,8):
url = str(i)+'|'+'http://www.taggzyjy.com.cn/Front/jyxx/075001/075001001/07500100100'+str(z)+'/'
url_queue.put(url, i)
if i ==15:
for x in range(1,3):
if x ==1:
for z in range(1,8):
url = str(i)+'|'+'http://www.taggzyjy.com.cn/Front/jyxx/075002/075002004/07500200400'+str(z)+'/'
url_queue.put(url, i)
if x ==2:
for z in range(1,8):
url = str(i)+'|'+'http://www.taggzyjy.com.cn/Front/jyxx/075002/075002001/07500200100'+str(z)+'/'
url_queue.put(url, i)
if i ==16:
for z in range(1,10):
if z ==1:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==2:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E7%8E%AF%E7%BF%A0&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==3:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E9%AB%98%E5%8C%BA&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==4:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E7%BB%8F%E5%8C%BA&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==5:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E4%B8%B4%E6%B8%AF&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==6:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E8%8D%A3%E6%88%90&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==7:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E6%96%87%E7%99%BB&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==8:
url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E4%B9%B3%E5%B1%B1&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if z ==9:
url =str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E5%8D%97%E6%B5%B7&channelId=563&beginTime=&endTime='
url_queue.put(url,i)
if i ==17:
for z in range(1,3):
url =str(i)+'|'+'http://www.whggzyjy.cn/jyxxzfcg/index_'+str(z)+'.jhtml'
url_queue.put(url, i)
if i ==18:
for z in range(1,3):
url = str(i)+'|'+'http://ggzyjy.rizhao.gov.cn/rzwz/ShowInfo/MoreJyxxList.aspx?categoryNum=071001001&Paging='+str(z)
url_queue.put(url, i)
if i ==19:
for z in range(1,3):
url = str(i)+'|'+'http://ggzyjy.rizhao.gov.cn/rzwz/ShowInfo/MoreJyxxList.aspx?categoryNum=071002001&Paging='+str(z)
url_queue.put(url, i)
if i ==20:
for z in range(1,4):
url = str(i)+'|'+'http://ggzyjy.rizhao.gov.cn/rzwz/ShowInfo/MoreJyxxList.aspx?categoryNum=071002002&Paging='+str(z)
url_queue.put(url, i)
if i ==21:
for z in range(1,7):
url = str(i)+'|'+'http://ggzyjy.linyi.gov.cn/TPFront/jyxx/074001/074001001/07400100100'+str(z)+'/'
url_queue.put(url, i)
if i ==22:
for z in range(1,8):
url = str(i)+'|'+'http://ggzyjy.linyi.gov.cn/TPFront/jyxx/074002/074002001/07400200100'+str(z)+'/'
url_queue.put(url, i)
if i ==23:
for z in range(1,8):
url = str(i)+'|'+'http://ggzyjy.linyi.gov.cn/TPFront/jyxx/074002/074002002/07400200200'+str(z)+'/'
url_queue.put(url, i)
if i ==24:
for z in range(1,5):
if z ==1:
url=str(i)+'|'+'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004001/004001005/004001005001/'
url_queue.put(url,i)
if z ==2:
url = str(i)+'|'+'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004001/004001001/004001001001/'
url_queue.put(url,i)
if z ==3:
url =str(i)+'|'+ 'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004002/004002005/004002005001/'
url_queue.put(url,i)
if z ==4:
url = str(i)+'|'+'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004002/004002001/004002001001/'
url_queue.put(url,i)
if i ==25:
for z in range(1,6):
for x in range(1,15):
url = str(i)+'|'+'http://www.lcsggzyjy.cn/lcweb/jyxx/079001/079001001/07900100100'+str(z)+'/07900100100'+str(z)+'0'+str(x).rjust(2,'0')+'/'
url_queue.put(url,i)
if i ==26:
for z in range(7,21):
url = str(i)+'|'+'http://www.lcsggzyjy.cn/lcweb/jyxx/079002/079002001/0790020010'+str(z).rjust(2,'0')+'/'
url_queue.put(url, i)
if i ==27:
for t in range(1,4):
if t ==1:
for z in range(1,12):
url = str(i)+'|'+'http://ggzyjy.binzhou.gov.cn/bzweb/002/002004/002004001/0020040010'+str(z).rjust(2,'0')+'/'
url_queue.put(url, i)
if t ==2:
for z in range(1,12):
url = str(i)+'|'+'http://ggzyjy.binzhou.gov.cn/bzweb/002/002005/002005008/0020050080'+str(z).rjust(2,'0')+'/'
url_queue.put(url, i)
if t ==3:
for z in range(1,12):
url = str(i)+'|'+'http://ggzyjy.binzhou.gov.cn/bzweb/002/002005/002005004/0020050040'+str(z).rjust(2,'0')+'/'
url_queue.put(url, i)
if i ==30:
url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001001/001001001/about.html'
url_queue.put(url, i)
if i ==31:
url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001002/001002001/about.html'
url_queue.put(url, i)
if i ==32:
for z in range(1,4):
if z ==1:
url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001002/001002003/about.html'
url_queue.put(url,i)
else:
url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001002/001002003/'+str(z)+'.html'
url_queue.put(url,i)
for x in range(6):
t = Procuder(url_queue,pc_queue)
t.start()
tsk.append(t)
for x in range(4):
t = Consumer(url_queue,pc_queue)
t.start()
tsk.append(t)
for t in tsk:
t.join()
end = time.time()
print('耗时:%0.002fs'%(end - start))
if __name__ == '__main__':
main()