各位大神帮我看看我这个爬虫的类写的对不对
#_*_ coding='utf-8' _*_
import sys,os,re
import requests
from bs4 import BeautifulSoup
#------------------------
class fitgirl(objbect):
#headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
#'Referer':"https://www.manhuadb.com/author/370"
# }
#url='http://fitgirl-repacks.site/page/' #这样定义对不对
def __init__(self,page,html,url,headers,dict):
self.url='http://fitgirl-repacks.site/page/'
self.page=page
self.html=html
self.dict=dict{}
self.headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
#'Referer':"https://www.manhuadb.com/author/370"
}
def page_exx(self,html,page):
print("这是第{}内容".format(self.page))
soup=BeautifulSoup(self.html,'lxml')
for s in soup.find_all("article",id=re.compile("post-\d+$")):
dict["name"]=s.find('h1',class_="entry-title").getText()
dict["dizhi"]=s.find('a',href=re.compile('^magnet')).get('href')
print(dict.get("name")+"\n"+dict.get('dizhi'))
def main(self):
for p in range(1,100):
r = requests.get(self.url+str(p),self.headers)
try:
page_exx(r.text,p)
except:
pass
if __name__ == "__main__":
fitgirl()
我运行错误,各位能帮我写一个对的类吗根据我这个 你这代码乱七八糟的,.get是java的写法,拷贝出来改都不想改。。。 class fitgirl():
def __init__(self,url):
self.url=url
def page_exx(self,html:str,p:str):
dict={}
print("这是第{}内容".format(self.page))
soup=BeautifulSoup(html,'lxml')
for s in soup.find_all("article",id=re.compile("post-\d+$")):
dict["name"]=s.find('h1',class_="entry-title").getText()
dict["dizhi"]=s.find('a',href=re.compile('^magnet')).get('href')
print(dict.get("name")+"\n"+dict.get('dizhi'))
def main(self):
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
#'Referer':"https://www.manhuadb.com/author/370"
}
for p in range(1,10):
r = requests.get(self.url+str(p),headers)
try:
page_exx(r.text,p)
except:
pass
if __name__ == "__main__":
fitgirl('http://fitgirl-repacks.site/page/')
为什么这样运行了 什么结果都没有我第二个Page_exx 这个self写的不对嘛?感觉值没有传过去 本帖最后由 zldtb19931116 于 2020-9-2 11:00 编辑
# coding='utf-8' *
import sys, os, re
import requests
from bs4 import BeautifulSoup
class Fitgirl(object):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
# 'Referer':"https://www.manhuadb.com/author/370"
}
url = 'http://fitgirl-repacks.site/page/'# 这样定义对不对
def __init__(self):
self.url = 'http://fitgirl-repacks.site/page/'
self.dict = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
# 'Referer':"https://www.manhuadb.com/author/370"
}
def page_exx(self, html, page):
print("这是第{}内容".format(page))
soup = BeautifulSoup(html, 'lxml')
for s in soup.find_all("article", id=re.compile("post-\d+$")):
self.dict["name"] = s.find('h1', class_="entry-title").getText()
self.dict["dizhi"] = s.find('a', href=re.compile('^magnet')).get(
'href')
print(self.dict["name"] + "\n" + self.dict['dizhi'])
def main(self):
for p in range(1, 100):
r = requests.get(self.url + str(p), self.headers)
try:
self.page_exx(r.text, p)
except Exception as e:
print(e)
if __name__ == '__main__':
fitgirl = Fitgirl()
fitgirl.main()
将就着看吧,你写的太乱了,lxml这个组件我没用过,你自己再看看吧,我逛水区去了,你except后面直接pass了,这样就看不出到底是什么错了,你要把异常信息打印出来看到底是什么错误 你这代码写的是稀碎啊,不知道你想实现什么功能,简单给你改了改,那个网站我打不开,也就没调试了
import sys, os, re
import requests
from bs4 import BeautifulSoup
class fitgirl(object):
# headers={
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
# 'Referer':"https://www.manhuadb.com/author/370"
# }
# url='http://fitgirl-repacks.site/page/' #这样定义对不对
def __init__(self):
self.url = 'http://fitgirl-repacks.site/page/'
self.page = ""
self.html = ""
self.dicts = dict
{}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
}
def page_exx(self):
print("这是第{}内容".format(self.page))
soup = BeautifulSoup(self.html, 'lxml')
for s in soup.find_all("article", id=re.compile("post-\d+$")):
self.dicts["name"] = s.find('h1', class_="entry-title").getText()
self.dicts["dizhi"] = s.find('a', href=re.compile('^magnet')).get('href')
print(self.dicts.get("name") + "\n" + dict.get('dizhi'))
def main(self):
for p in range(1, 100):
r = requests.get(self.url + str(p), self.headers)
try:
self.page_exx(r.text, p)
except:
pass
if __name__ == '__main__':
f = fitgirl()
f.main()
3Q,刚刚新手学习类的用法,这里PASS掉 我主要是想用类 写一个爬虫,之前都是直接DEF的 错误信息无所谓的 zheng10072 发表于 2020-9-2 11:01
你这代码写的是稀碎啊,不知道你想实现什么功能,简单给你改了改,那个网站我打不开,也就没调试了
我不是想实现什么 ,我本来做了这个网站的爬虫,现在学了类,想用类写一下之前这个爬虫的基本功能 hahawangzi 发表于 2020-9-2 11:23
我不是想实现什么 ,我本来做了这个网站的爬虫,现在学了类,想用类写一下之前这个爬虫的基本功能
你写了个类,你必须实例化一下,然后调用实例化后的这个对象 我想问问楼主学的怎么样了{:1_924:},我也是新手,我想实现定时自动登录网站,然后通过cookie实现自动提交一些信息,楼主知道咋做吗?:loveliness:可以交流一下 那你这个还是比较简单的 百度有很多东西的
页:
[1]