漫画爬取
这是我写的一个漫画网站的漫画爬取:from selenium import webdriverfrom time import sleep
import re
import requests
from lxml import etree
import os
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36 Edg/94.0.992.47"}
def mkdir(path):
#保存文件
folder = os.path.exists(path)
if not folder:# 判断是否存在文件夹如果不存在则创建为文件夹
os.makedirs(path)# makedirs 创建文件时如果路径不存在会创建这个路径
else:
pass
def huode_url(url):
#获取页面数据
drive = webdriver.Edge()
drive.minimize_window()
drive.get(url)
sleep(2)
text = drive.page_source
drive.close()
html = etree.HTML(text)
return html
def get_tupian_url(text):
#获取漫画图片链接
manhualianjie_url_list = text.xpath('//img/@data-src')
return manhualianjie_url_list
def chuangjian_wenjianjia(text,manhualianjie_url_list):
#创建目录
manhuazhangjie = text.xpath('//head/title/text()')
manhuaminglist = text.xpath('//div/a/text()')
manhuaming = str(manhuaminglist)
q = str(manhuazhangjie)
print(q)
ret = re.split(r":| ", q)
outfile = 'E:\{}\{}'.format(manhuaming, ret)
folder = os.path.join(os.getcwd(), outfile)
mkdir(folder)
xiazai_tupian(manhualianjie_url_list,ret,outfile)
def xiazai_tupian(manhualianjie_url_list,ret,outfile):
#下载漫画图片
for item_url in manhualianjie_url_list:
response = requests.get(item_url, headers=headers).content
path = '{}{}'.format(ret, ret)
with open("{}/{}{}.jpg".format(outfile, path, manhualianjie_url_list.index(item_url) + 1),
'wb', ) as f:
f.write(response)
def shangyizhang_url(text):
#获取上一章的链接
shangyizhang_url_list = text.xpath('//a/@href')
shangyizhang_url = 'http://www.sixmh7.com' + shangyizhang_url_list
return shangyizhang_url
def tianjian(text):
#抓取上一章的链接是否和漫画主页一样,并停止脚本
shangyizhang_url_list = text.xpath('//a/@href')
if shangyizhang_url_list == shangyizhang_url_list:
return False
else:
return True
def main():
url = 'http://www.sixmh7.com/16081/1300180.html'
#漫画的网站
i = True
while i == True:
html = huode_url(url)
manhualianjie_url_list = get_tupian_url(html)
chuangjian_wenjianjia(html,manhualianjie_url_list)
url = shangyizhang_url(html)
i = tianjian(html)
sleep(5)
print('结束')
if __name__=='__main__':
main()
使用了selenium爬取网页源代码,os保存文件到哦本地,re和lxml 解析网页源代码的元素及内容。
该脚本不足地方:
1、未添加搜索脚本
2、selenium调用Edge时会时不时弹窗 jumpbull 发表于 2021-11-4 15:20
用
pip install selenium -i https://pypi.douban.com/simple
命令
7\cryptography\hazmat\bindings
creating build\lib.win-amd64-3.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\cmac.py -> build\lib.win-amd64-3.7\
cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\constant_time.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\hashes.py -> build\lib.win-amd64-3.
7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\hmac.py -> build\lib.win-amd64-3.7\
cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\keywrap.py -> build\lib.win-amd64-3
.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\padding.py -> build\lib.win-amd64-3
.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\poly1305.py -> build\lib.win-amd64-
3.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\_asymmetric.py -> build\lib.win-amd
64-3.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\_cipheralgorithm.py -> build\lib.wi
n-amd64-3.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\_serialization.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\primitives
copying src\cryptography\hazmat\primitives\__init__.py -> build\lib.win-amd64-
3.7\cryptography\hazmat\primitives
creating build\lib.win-amd64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\aead.py -> build\lib.win-amd6
4-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\backend.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\ciphers.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\cmac.py -> build\lib.win-amd6
4-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\decode_asn1.py -> build\lib.w
in-amd64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\dh.py -> build\lib.win-amd64-
3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\dsa.py -> build\lib.win-amd64
-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\ec.py -> build\lib.win-amd64-
3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\ed25519.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\ed448.py -> build\lib.win-amd
64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\encode_asn1.py -> build\lib.w
in-amd64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\hashes.py -> build\lib.win-am
d64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\hmac.py -> build\lib.win-amd6
4-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\poly1305.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\rsa.py -> build\lib.win-amd64
-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\utils.py -> build\lib.win-amd
64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\x25519.py -> build\lib.win-am
d64-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\x448.py -> build\lib.win-amd6
4-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\x509.py -> build\lib.win-amd6
4-3.7\cryptography\hazmat\backends\openssl
copying src\cryptography\hazmat\backends\openssl\__init__.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\backends\openssl
creating build\lib.win-amd64-3.7\cryptography\hazmat\bindings\openssl
copying src\cryptography\hazmat\bindings\openssl\binding.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\bindings\openssl
copying src\cryptography\hazmat\bindings\openssl\_conditional.py -> build\lib.
win-amd64-3.7\cryptography\hazmat\bindings\openssl
copying src\cryptography\hazmat\bindings\openssl\__init__.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\bindings\openssl
creating build\lib.win-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\dh.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\dsa.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\ec.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\ed25519.py -> build\lib.
win-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\ed448.py -> build\lib.wi
n-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\padding.py -> build\lib.
win-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\rsa.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\types.py -> build\lib.wi
n-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\utils.py -> build\lib.wi
n-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\x25519.py -> build\lib.w
in-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\x448.py -> build\lib.win
-amd64-3.7\cryptography\hazmat\primitives\asymmetric
copying src\cryptography\hazmat\primitives\asymmetric\__init__.py -> build\lib
.win-amd64-3.7\cryptography\hazmat\primitives\asymmetric
creating build\lib.win-amd64-3.7\cryptography\hazmat\primitives\ciphers
copying src\cryptography\hazmat\primitives\ciphers\aead.py -> build\lib.win-am
d64-3.7\cryptography\hazmat\primitives\ciphers
copying src\cryptography\hazmat\primitives\ciphers\algorithms.py -> build\lib.
win-amd64-3.7\cryptography\hazmat\primitives\ciphers
copying src\cryptography\hazmat\primitives\ciphers\base.py -> build\lib.win-am
d64-3.7\cryptography\hazmat\primitives\ciphers
copying src\cryptography\hazmat\primitives\ciphers\modes.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\primitives\ciphers
copying src\cryptography\hazmat\primitives\ciphers\__init__.py -> build\lib.wi
n-amd64-3.7\cryptography\hazmat\primitives\ciphers
creating build\lib.win-amd64-3.7\cryptography\hazmat\primitives\kdf
copying src\cryptography\hazmat\primitives\kdf\concatkdf.py -> build\lib.win-a
md64-3.7\cryptography\hazmat\primitives\kdf
copying src\cryptography\hazmat\primitives\kdf\hkdf.py -> build\lib.win-amd64-
3.7\cryptography\hazmat\primitives\kdf
copying src\cryptography\hazmat\primitives\kdf\kbkdf.py -> build\lib.win-amd64
-3.7\cryptography\hazmat\primitives\kdf
copying src\cryptography\hazmat\primitives\kdf\pbkdf2.py -> build\lib.win-amd6
4-3.7\cryptography\hazmat\primitives\kdf
copying src\cryptography\hazmat\primitives\kdf\scrypt.py -> build\lib.win-amd6
4-3.7\cryptography\hazmat\primitives\kdf
copying src\cryptography\hazmat\primitives\kdf\x963kdf.py -> build\lib.win-amd
64-3.7\cryptography\hazmat\primitives\kdf
copying src\cryptography\hazmat\primitives\kdf\__init__.py -> build\lib.win-am
d64-3.7\cryptography\hazmat\primitives\kdf
creating build\lib.win-amd64-3.7\cryptography\hazmat\primitives\serialization
copying src\cryptography\hazmat\primitives\serialization\base.py -> build\lib.
win-amd64-3.7\cryptography\hazmat\primitives\serialization
copying src\cryptography\hazmat\primitives\serialization\pkcs12.py -> build\li
b.win-amd64-3.7\cryptography\hazmat\primitives\serialization
copying src\cryptography\hazmat\primitives\serialization\pkcs7.py -> build\lib
.win-amd64-3.7\cryptography\hazmat\primitives\serialization
copying src\cryptography\hazmat\primitives\serialization\ssh.py -> build\lib.w
in-amd64-3.7\cryptography\hazmat\primitives\serialization
copying src\cryptography\hazmat\primitives\serialization\__init__.py -> build\
lib.win-amd64-3.7\cryptography\hazmat\primitives\serialization
creating build\lib.win-amd64-3.7\cryptography\hazmat\primitives\twofactor
copying src\cryptography\hazmat\primitives\twofactor\hotp.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\primitives\twofactor
copying src\cryptography\hazmat\primitives\twofactor\totp.py -> build\lib.win-
amd64-3.7\cryptography\hazmat\primitives\twofactor
copying src\cryptography\hazmat\primitives\twofactor\__init__.py -> build\lib.
win-amd64-3.7\cryptography\hazmat\primitives\twofactor
running egg_info
writing src\cryptography.egg-info\PKG-INFO
writing dependency_links to src\cryptography.egg-info\dependency_links.txt
writing requirements to src\cryptography.egg-info\requires.txt
writing top-level names to src\cryptography.egg-info\top_level.txt
reading manifest file 'src\cryptography.egg-info\SOURCES.txt'
reading manifest template 'MANIFEST.in'
no previously-included directories found matching 'docs\_build'
warning: no previously-included files found matching 'vectors'
warning: no previously-included files matching '*' found under directory 'vect
ors'
warning: no previously-included files matching '*' found under directory '.git
hub'
warning: no previously-included files found matching 'release.py'
warning: no previously-included files found matching '.coveragerc'
warning: no previously-included files found matching 'codecov.yml'
warning: no previously-included files found matching '.readthedocs.yml'
warning: no previously-included files found matching 'dev-requirements.txt'
warning: no previously-included files found matching 'tox.ini'
warning: no previously-included files found matching 'mypy.ini'
warning: no previously-included files matching '*' found under directory '.zuu
l.d'
warning: no previously-included files matching '*' found under directory '.zuu
l.playbooks'
adding license file 'LICENSE'
adding license file 'LICENSE.APACHE'
adding license file 'LICENSE.BSD'
adding license file 'LICENSE.PSF'
writing manifest file 'src\cryptography.egg-info\SOURCES.txt'
copying src\cryptography\py.typed -> build\lib.win-amd64-3.7\cryptography
creating build\lib.win-amd64-3.7\cryptography\hazmat\bindings\_rust
copying src\cryptography\hazmat\bindings\_rust\__init__.pyi -> build\lib.win-a
md64-3.7\cryptography\hazmat\bindings\_rust
copying src\cryptography\hazmat\bindings\_rust\asn1.pyi -> build\lib.win-amd64
-3.7\cryptography\hazmat\bindings\_rust
copying src\cryptography\hazmat\bindings\_rust\ocsp.pyi -> build\lib.win-amd64
-3.7\cryptography\hazmat\bindings\_rust
copying src\cryptography\hazmat\bindings\_rust\x509.pyi -> build\lib.win-amd64
-3.7\cryptography\hazmat\bindings\_rust
running build_ext
generating cffi module 'build\\temp.win-amd64-3.7\\Release\\_openssl.c'
creating build\temp.win-amd64-3.7
creating build\temp.win-amd64-3.7\Release
running build_rust
=============================DEBUG ASSISTANCE=============================
If you are seeing a compilation error please try the following steps to
successfully install cryptography:
1) Upgrade to the latest pip and try again. This will fix errors for most
users. See: https://pip.pypa.io/en/stable/installing/#upgrading-pip
2) Read https://cryptography.io/en/latest/installation/ for specific
instructions for your platform.
3) Check our frequently asked questions for more information:
https://cryptography.io/en/latest/faq/
4) Ensure you have a recent Rust toolchain installed:
https://cryptography.io/en/latest/installation/#rust
Python: 3.7.5
platform: Windows-7-6.1.7601-SP1
pip: n/a
setuptools: 58.4.0
setuptools_rust: 0.12.1
=============================DEBUG ASSISTANCE=============================
error: can't find Rust compiler
If you are using an outdated pip version, it is possible a prebuilt wheel is a
vailable for this package but pip is not able to install from it. Installing fro
m the wheel would avoid the need for a Rust compiler.
To update pip, run:
pip install --upgrade pip
and then retry package installation.
If you did intend to build this package from source, try installing a Rust com
piler from your system package manager and ensure it is on the PATH during insta
llation. Alternatively, rustup (available at https://rustup.rs) is the recommend
ed way to download and update the Rust compiler toolchain.
This package requires Rust >=1.41.0.
----------------------------------------
ERROR: Failed building wheel for cryptography
Running setup.py clean for cryptography
ERROR: Command errored out with exit status 1:
command: 'c:\pc\python.exe' -u -c 'import sys, setuptools, tokenize; sys.argv
= '"'"'C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\pip-install-hupx2j18\\cryp
tography\\setup.py'"'"'; __file__='"'"'C:\\Users\\ADMINI~1\\AppData\\Local\\Temp
\\pip-install-hupx2j18\\cryptography\\setup.py'"'"';f=getattr(tokenize, '"'"'ope
n'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.cl
ose();exec(compile(code, __file__, '"'"'exec'"'"'))' clean --all
cwd: C:\Users\ADMINI~1\AppData\Local\Temp\pip-install-hupx2j18\cryptograp
hy
Complete output (42 lines):
C:\Users\ADMINI~1\AppData\Local\Temp\pip-build-env-psr7c26i\overlay\Lib\site-p
ackages\setuptools\__init__.py:151: SetuptoolsDeprecationWarning: setup_requires
is deprecated. Supply build dependencies using PEP 517 pyproject.toml build-req
uires.
SetuptoolsDeprecationWarning,
running clean
removing 'build\temp.win-amd64-3.7' (and everything under it)
removing 'build\lib.win-amd64-3.7' (and everything under it)
'build\bdist.win-amd64' does not exist -- can't clean it
'build\scripts-3.7' does not exist -- can't clean it
removing 'build'
running clean_rust
error: can't find Rust compiler
If you are using an outdated pip version, it is possible a prebuilt wheel is a
vailable for this package but pip is not able to install from it. Installing fro
m the wheel would avoid the need for a Rust compiler.
To update pip, run:
pip install --upgrade pip
and then retry package installation.
If you did intend to build this package from source, try installing a Rust com
piler from your system package manager and ensure it is on the PATH during insta
llation. Alternatively, rustup (available at https://rustup.rs) is the recommend
ed way to download and update the Rust compiler toolchain.
This package requires Rust >=1.41.0.
=============================DEBUG ASSISTANCE=============================
If you are seeing a compilation error please try the following steps to
successfully install cryptography:
1) Upgrade to the latest pip and try again. This will fix errors for most
users. See: https://pip.pypa.io/en/stable/installing/#upgrading-pip
2) Read https://cryptography.io/en/latest/installation/ for specific
instructions for your platform.
3) Check our frequently asked questions for more information:
https://cryptography.io/en/latest/faq/
4) Ensure you have a recent Rust toolchain installed:
https://cryptography.io/en/latest/installation/#rust
Python: 3.7.5
platform: Windows-7-6.1.7601-SP1
pip: n/a
setuptools: 58.4.0
setuptools_rust: 0.12.1
=============================DEBUG ASSISTANCE=============================
----------------------------------------
ERROR: Failed cleaning build dir for cryptography
Failed to build cryptography
ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be i
nstalled directly
WARNING: You are using pip version 19.2.3, however version 21.3.1 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' comm
and.
C:\Users\Administrator> Asy_少洋 发表于 2021-11-4 16:04
7\cryptography\hazmat\bindings
creating build\lib.win-amd64-3.7\cryptography\hazmat\primitives
...
使用下面命令升级一下pip
python -m pip install --upgrade pip
看到Successfully字样即安装成功
在重复之前操作 厉害了 什么时候出软件 马上来实践一下 来了吾爱破解,感觉自己就是个废物,电脑知识也是白学了,坐等软件下载地址! 感谢大佬的分享!!! 这不会就是楼主的漫画网吧{:301_997:} No module named 'selenium安装PIP命令 pip install selenium 一直安装不上,到后面都是红色保存 看不懂 茶茶大人L 发表于 2021-10-29 22:02
这不会就是楼主的漫画网吧
不是我的漫画网,我还是刚刚学的 Asy_少洋 发表于 2021-10-30 08:56
No module named 'selenium安装PIP命令 pip install selenium 一直安装不上,到后面都是红色保存 看不懂
试用一下pip3 install selenium https://www.helloimg.com/images/2021/11/03/CYtZut.jpg
提示这个报错。不知道什么意思大佬
页:
[1]
2