import
urllib.parse
import
string
import
requests
import
json
import
re
import
time
import
os
import
sys
from
docx
import
Document
from
docx.shared
import
Inches
from
docx.oxml.ns
import
qn
Cookie
=
'PHPSESSID=5s6umsm3ic7hmp9djhkia854s9; _gid=398749400617; _gidv=9e54db532452f68408485a0f3f20a1b1'
flag
=
True
keyword
=
''
while
flag:
keyword
=
input
(
'请输入搜索的关键字:'
)
if
keyword:
flag
=
False
else
:
print
(
'关键字为空,请重新输入!'
)
wqxuetang_url
=
r
'https://lib-nuanxin.wqxuetang.com/v1/search/initsearch'
new_wqxuetang_url
=
urllib.parse.quote(wqxuetang_url, safe
=
string.printable)
def
get_response(page
=
1
):
headers
=
{
'Accept'
:
'application/json, text/plain, */*'
,
'Accept - Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'
,
'Connection'
:
'keep-alive'
,
'Host'
:
'lib-nuanxin.wqxuetang.com'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0'
,
'Cookie'
: Cookie
}
data
=
{
'kw'
: keyword,
'type'
:
1
,
'begindate'
: '',
'enddate'
: '',
'tid'
: '',
'libclass'
: '',
'cnclass'
: '',
'tag'
: '',
'author'
: '',
'pn'
: page,
'size'
:
10
,
'isfquery'
:
'undefined'
,
'sort'
:
1
}
response
=
requests.post(url
=
new_wqxuetang_url, data
=
data, headers
=
headers)
dict_str
=
json.loads(response.text)
return
dict_str
def
data_download(dict_str):
global
count
rec
=
re.
compile
(
'</?[a-zA-Z]+\s?\d?/?>'
)
for
item
in
dict_str[
'data'
][
'list'
]:
data1
=
'{0}【华丽的分割线】{0}\n'
.
format
(
'*'
*
30
)
f1.write(data1)
data2
=
'书的ID号:%s\n'
%
item[
'numid'
]
f1.write(data2)
print
(
'BID:%s\t'
%
item[
'numid'
], end
=
'')
book_name
=
item[
'name'
]
result
=
set
(re.findall(rec, book_name))
for
ret
in
result:
book_name
=
book_name.replace(ret, '')
data3
=
'书名:《%s》\n'
%
book_name
f1.write(data3)
print
(
'书名:《%s》\t'
%
book_name, end
=
'')
data4
=
'作者:%s\n'
%
item[
'author'
]
f1.write(data4)
data5
=
'出版时间:%s\n'
%
item[
'pubdate'
]
f1.write(data5)
print
(
'出版时间:%s'
%
item[
'pubdate'
])
data6
=
'出版社:%s\n'
%
item[
'pub'
]
f1.write(data6)
des
=
item[
'descript'
]
result
=
set
(re.findall(rec, des))
for
ret
in
result:
des
=
des.replace(ret, '')
data7
=
'图书简介 :%s\n'
%
des
f1.write(data7)
data8
=
'书籍地址:[url=https://lib-nuanxin.wqxuetang.com/#/Book/%s]https://lib-nuanxin.wqxuetang.com/#/Book/%s[/url]\n\n'
%
item[
'numid'
]
f1.write(data8)
f1.flush()
img_headers
=
{
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
,
'Accept - Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'
,
'Connection'
:
'keep-alive'
,
'Host'
:
'bookask-cover.oss-cn-beijing.aliyuncs.com'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0'
,
'Cookie'
: Cookie
}
img_url
=
item[
'coverurl'
]
img_data
=
requests.get(url
=
img_url, headers
=
img_headers)
time.sleep(
2
)
img_count
=
'%s\\%s.jpg'
%
(pt, count)
with
open
(img_count,
'wb'
) as fp:
fp.write(img_data.content)
data1
=
data1.replace(
'\n'
, '')
myDocument.add_paragraph(data1)
myDocument.add_picture(img_count, width
=
Inches(
2
), height
=
Inches(
3
))
datas
=
[data2, data3, data4, data5, data6, data7, data8]
datas
=
map
(
lambda
text: text.replace(
'\n'
, ''), datas)
for
data_item
in
datas:
myDocument.add_paragraph(u
'%s'
%
data_item)
myDocument.add_paragraph()
count
+
=
1
count
=
1
dict_str
=
get_response()
login
=
dict_str[
'errmsg'
]
if
login
=
=
'请先登录'
:
input
(
'cookie已失效,请重新获取登录后的cookie进行替换,本程序自动结束!'
)
sys.exit()
page_count
=
dict_str[
'data'
][
'pageinfo'
][
'pagecount'
]
print
(
'约%s页'
%
page_count)
flag1
=
True
mincount
=
1
while
flag1:
min_page
=
input
(
'请输入要开始获取的页数,默认值为第1页:'
)
if
min_page.isdigit():
mincount
=
int
(min_page)
flag1
=
False
elif
min_page
=
=
'':
flag1
=
False
else
:
print
(
'非法参数!请重新输入'
)
flag2
=
True
while
flag2:
max_page
=
input
(
'请输入要获取的总页数,默认值为最大页数:'
)
if
max_page.isdigit():
page_count
=
int
(max_page)
flag2
=
False
elif
max_page
=
=
'':
flag2
=
False
else
:
print
(
'非法参数!请重新输入'
)
root
=
os.getcwd()
pt
=
os.path.join(root, keyword)
if
not
os.path.isdir(pt):
os.mkdir(root
+
'\\%s'
%
keyword)
myDocument
=
Document()
myDocument.styles[
'Normal'
].font.name
=
u
'微软雅黑'
myDocument.styles[
'Normal'
]._element.rPr.rFonts.
set
(qn(
'w:eastAsia'
), u
'微软雅黑'
)
f1
=
open
(
'%s.txt'
%
keyword,
'w'
, encoding
=
'utf-8'
)
for
page
in
range
(mincount, page_count
+
1
):
print
(
'第%s页书籍信息:'
%
page)
dict_str
=
get_response(page)
if
not
dict_str[
'data'
][
'list'
]:
break
data_download(dict_str)
print
(
'【第%s页数据写入文件中...】'
%
page)
time.sleep(
5
)
f1.close()
myDocument.save(
'%s.doc'
%
keyword)
print
(
'数据写入完毕!本程序自动结束!'
)