本帖最后由 double07 于 2021-10-27 13:33 编辑
搞了半天,没弄出来,导出的表格中,“成交日期”这一栏始终要显示“00:00:00”,去不掉?
[Python] 纯文本查看 复制代码 import re
import pandas as pd
import requests
from lxml import etree
p = 0
data_list = []
# 获取网页内容
def gethtml(url):
return requests.get(url).text
# 主程序
def main():
global p
price = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7']
for pr in price:
url = 'https://cq.ke.com/chengjiao/beibinlu/' + pr + '/':
html_detail = gethtml(i)
html = etree.HTML(html_detail)
b = html.xpath('//li[@data-view-event="ModuleExpo"]')
for i in range(len(b)):
lst = {}
lst['索引'] = ''
lst['成交日期'] = b[i].xpath('./div/div[2]/div[2]/text()')[0].replace(' ', '').replace('\n','').replace('.', '-')
data_list.append(lst)
df = pd.DataFrame(data_list)
df['成交日期']=pd.to_datetime(df['成交日期'],format='%Y-%m-%d')
for i in df.index:
df['索引'].at[i] = i + 1
df.to_excel("C:/Users/Administrator/Desktop/Python/贝壳/二手房源_成交数据" + ".xlsx", index=False)
p = p + 1
print('第%s条数据已保存' % p)
except Exception:
continue
if __name__ == '__main__':
main()
|