ΪʲôС˵ÄÚÈÝÊÇ¿Õ°×ÄØ printÓÐÄÚÈÝ
ÓÃprint£¨¡°page2¡±£©ÄÜ¿´µ½Ã¿Ò»ÕµÄÄÚÈÝimportrequestsimport os
from lxml importetree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
url = "https://book.qidian.com/info/1025592578#Catalog"
page_text = requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)
#½âÎö³öÕ½ÚÃû³ÆºÍÏêÇéÒ³Url
li_list = tree.xpath('//*[@id="j-catalogWrap"]/div/div/ul/li')
if not os.path.exists('./dagelao'):
os.mkdir('./dagelao')
for li in li_list:
detail_url= "https:"+li.xpath('./a/@href')
name = li.xpath('./a/text()') + ".text"
detaii_page_text = requests.get(url=detail_url,headers=headers).text
detail_tree = etree.HTML(detaii_page_text)
detail_text = detail_tree.xpath('//*[@class="text-wrap"]/div/div//text()')
for page2 in detail_text:
path = './dagelao/' + name
with open(path,"w",encoding="UTF-8") as pf:
pf.write(page2)
print(name,"ÏÂÔØÍê±Ï") ÕÒµ½ÔÒòÁËwith open(path,"w",encoding="UTF-8") µÄ¡°w¡± ²»¶Ô»»³Éa¾Í¶ÔÁË
w ÿ´ÎÑ»·¶¼°ÑÉϴεÄÎļþɾµôÖØд´½¨ Á½¸öµØ·½´íÁË
µÚÒ»¸ö×îΪÖÂÃü
with open(path,"w",encoding="UTF-8") as pf:
ÄãÕâÀïÊÇ´ÓÁбíÀïÑ»·È¡³öÎı¾ÒªÓÃ×·¼ÓģʽҲ¾ÍÊÇa+
È»ºóÄãµÄÎļþÃûºó׺¾ÓÈ»ÊÇtext£¬ÂèµÄÎÒ²îµãû´ò¿ª Îı¾ÊÇtxt ¶àлָµã fanvalen ·¢±íÓÚ 2021-5-25 21:34
Á½¸öµØ·½´íÁË
µÚÒ»¸ö×îΪÖÂÃü
with open(path,"w",encoding="UTF-8") as pf:
дÎļþ×îºÃ»¹ÊǸijÉÒ»´ÎÐÔдÈëµ½ÎļþÐÔÄܱȽϺðɡ£ tanzhiwei ·¢±íÓÚ 2021-5-26 09:25
дÎļþ×îºÃ»¹ÊǸijÉÒ»´ÎÐÔдÈëµ½ÎļþÐÔÄܱȽϺðɡ£
¿´ÇëÇóµÄÊý¾ÝÇé¿ö°É¡£·Ö¶à´ÎÇëÇóµ½µÄÄÚÈݲ»±£´æµÄ»°¾ÍÔÚÄÚ´æÀÈç¹û³ÌÐò±¼À£Ò»Ï¾Ͱ׸ÉÁË¡£with»á×Ô¼ºÔÚºÏÊʵĽڵã¹Ø±ÕÎļþµÄ¡£ÍËÒ»Íò²½½²£¬Ð¡ÏîÄ¿²»Óÿ¼ÂÇÕâô¶à¡£ Èç¹ûÄÚ´æÓÐÌõ¼þµÄ»°£¬¿ÉÒÔ°ÑÅÀÈ¡ÏÂÀ´µÄÎı¾´æµ½ÄÚ´æÀ±ÈÈç°´Ðдæ½ølist»òÕßÊÇ´æ½ø×Ö·û´®Àï
Ò³:
[1]