#-*-coding:utf-8-*-
import urllib2
import time
import MySQLdb
import json
import sys
defaultencoding = 'utf-8'
if sys.getdefaultencoding() != defaultencoding:
reload(sys)
sys.setdefaultencoding(defaultencoding)
db = MySQLdb.connect("localhost", "root", "root", "py", charset='utf8' )
cursor = db.cursor()
pageNum = 1
page = 0
while(page < 6020):
try:
page = (pageNum - 1) * 10
initUrl = "http://www.bjrbj.gov.cn/integralpublic/settlePerson/settlePersonJson?sort=pxid&order=asc&limit=10&offset=10&name=&rows=10&page=%s" %page
print initUrl
req = urllib2.Request(initUrl.encode('utf8'))
htmlContent = urllib2.urlopen(req, timeout=5).read()
htmlContent = htmlContent.decode("gbk")
js = json.loads(htmlContent)
rows = js['rows']
for row in rows:
area = {"11": "北京", "12": "天津", "13": "河北", "14": "山西", "15": "内蒙古", "21": "辽宁", "22": "吉林", "23": "黑龙江",
"31": "上海", "32": "江苏", "33": "浙江", "34": "安徽", "35": "福建", "36": "江西", "37": "山东", "41": "河南",
"42": "湖北", "43": "湖南", "44": "广东", "45": "广西", "46": "海南", "50": "重庆", "51": "四川", "52": "贵州",
"53": "云南", "54": "西藏", "61": "陕西", "62": "甘肃", "63": "青海", "64": "宁夏", "65": "新疆", "71": "台湾",
"81": "香港", "82": "澳门", "91": "国外"}
row = json.dumps(row)
row = json.loads(row)
name = row['name']
idCard = row['idCard']
score = row['score']
unit = row['unit']
province = area[idCard[0:2]]
year = idCard[6:10]
sql = "insert into bjrbj(name,city,score,unit,idCard,year)values('%s','%s','%s','%s','%s','%s')"%(name,province,score,unit,idCard,year)
try:
cursor.execute(sql)
db.commit()
except:
print "出错了!!"
db.rollback()
time.sleep(1)
pageNum = pageNum+1
except :
continue