佳慧 发表于 2021-3-21 22:39

python大神进,运行报错!

本帖最后由 佳慧 于 2021-3-21 22:47 编辑

import json
import os
import time
import urllib

import requests
import requests_toolbelt

import hashlib

from requests_toolbelt import MultipartEncoder

WENKU_BASE_URL = 'https://wenku.baidu.com'
# 正式
# WENKU_UPLOAD_URL = WENKU_BASE_URL + '/doc/orgapi/orgupload'
# 测试
WENKU_UPLOAD_URL = WENKU_BASE_URL + '/doc/orgapi/fororguploadtest'
# 正式
# WENKU_APPKEY = '你的正式appkey'
# 测试
WENKU_APPKEY = 'test'
# 正式
# WENKU_SECRET = '你的正式secret'
# 测试
WENKU_SECRET = 'g75a50a20baed9bbd12a6e69d091f4d6'


def upload(file_path=''):
    file_name = os.path.basename(file_path)
    file_tail = get_file_tail(file_name)
    file_name_without_tail = file_name[:-(len(file_tail) + 1)]
    new_file_name = get_md5(file_name_without_tail) + '.' + file_tail
    tmp_file_path = 'C:/Users/Administrator/Desktop/' + os.sep + new_file_name
    # 生成文件名不为中文的临时文件
    os.rename(file_path, tmp_file_path)

    timestamp = '{0}'.format(int(time.time()))
    flag = '10'
    price = '2'

    title = urllib.parse.quote(file_name_without_tail)
    summary = urllib.parse.quote(file_name_without_tail)

    pre_sign_str = 'appkey={0}&flag={1}&price={2}&summary={3}×tamp={4}&title={5}&secret={6}&filemd5={7}'.format(
      WENKU_APPKEY, flag, price, summary, timestamp, title,
      WENKU_SECRET, get_file_md5(tmp_file_path)
    )
    print('pre_sign_str1:', pre_sign_str)
    sign_str = get_md5(pre_sign_str)
    print('sign_str:', sign_str)
    # flag 0:免费文档3:机构文档10:付费文档19:商业文档
    # price 单位为分
    fields = {'object_file': (os.path.basename(tmp_file_path),
                              open(tmp_file_path, 'rb'), 'multipart/form-data'),
            'appkey': WENKU_APPKEY,
            'timestamp': timestamp,
            'sign': sign_str,
            'title': title,
            'flag': '10',
            'price': '2',
            'summary': summary
            }
    data = MultipartEncoder(fields=fields)
    print('ct:', data.content_type)
    result = requests.Session().post(url=WENKU_UPLOAD_URL, data=data,
                                     verify=False, headers={
            'Content-Type': data.content_type})
    # print(result.request.headers)
    print('result.status_code:', result.status_code)
    if result.status_code == 200:
      result_str = result.text
      print('result_str:', result_str)
      result_json = json.loads(result_str)
      code = result_json['status']['code']
      msg = result_json['status']['msg']
      if code == 0:
            print('msg:', msg)
            data = result_json['data']
            print('上传成功,doc_id:', data['doc_id'])
      else:
            print('code:', code)
            print('msg:', msg)
    else:
      print('网络请求出错')


# 获取文件名后缀
def get_file_tail(file_name=''):
    # 获取文件后缀名
    file_arr = file_name.split(".")
    if len(file_arr) >= 2:
      return file_arr.lower()
    else:
      return 'default'


def get_file_md5(file_path=''):
    try:
      with open(file_path, 'rb') as fp:
            data = fp.read()
      file_md5 = hashlib.md5(data).hexdigest()
      print(file_md5)
      return file_md5
    except Exception as e:
      print(e.args)
      return ''


def get_md5(source_str=''):
    return hashlib.md5(source_str.encode('utf8')).hexdigest()


if __name__ == '__main__':
    import glob
    for f in glob.glob(r'C:\Users\Administrator\Desktop\up\*'):
      print(f)
      upload(f)# 文件夹下的所有文件路径逐个传给upload()
      # 因每个循环调用upload时都有对网页的请求,为避免过于频繁,要设定休眠时间,数据根据情况调整大小
      #time.sleep(2)

运行报错:C:\Users\Administrator\AppData\Local\Programs\Python\Python38\python.exe I:/new-python/down/baidu.py
C:\Users\Administrator\Desktop\up\cf7672f2579c5226db8fd7e824a0d9f7.doc
ed52683888cdf3445b9719fc15b106db
pre_sign_str1: appkey=test&flag=10&price=2&summary=cf7672f2579c5226db8fd7e824a0d9f7×tamp=1616336936&title=cf7672f2579c5226db8fd7e824a0d9f7&secret=g75a50a20baed9bbd12a6e69d091f4d6&filemd5=ed52683888cdf3445b9719fc15b106db
sign_str: a86c47614204440eb1bb94beb0879177
ct: multipart/form-data; boundary=6cf4ad8d48b24ab8963f738ccaaa312e
C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'wenku.baidu.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
warnings.warn(
Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connection.py", line 169, in _new_conn
    conn = connection.create_connection(
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\util\connection.py", line 73, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\socket.py", line 918, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: getaddrinfo failed

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 699, in urlopen
    httplib_response = self._make_request(
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 382, in _make_request
    self._validate_conn(conn)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 1010, in _validate_conn
    conn.connect()
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connection.py", line 353, in connect
    conn = self._new_conn()
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connection.py", line 181, in _new_conn
    raise NewConnectionError(
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x000000000346A6A0>: Failed to establish a new connection: getaddrinfo failed

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\adapters.py", line 439, in send
    resp = conn.urlopen(
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 755, in urlopen
    retries = retries.increment(
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\util\retry.py", line 573, in increment
    raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='bjyz-bcc-edu-arch-qaenv.bcc-bjyz.baidu.com', port=8080): Max retries exceeded with url: /error.html?status=500&tc=17380495782087063050032122 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000000000346A6A0>: Failed to establish a new connection: getaddrinfo failed'))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "I:/new-python/down/baidu.py", line 234, in <module>
    upload(f)
File "I:/new-python/down/baidu.py", line 182, in upload
    result = requests.Session().post(url=WENKU_UPLOAD_URL, data=data,
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 590, in post
    return self.request('POST', url, data=data, json=json, **kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 542, in request
    resp = self.send(prep, **send_kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 677, in send
    history =
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 677, in <listcomp>
    history =
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 237, in resolve_redirects
    resp = self.send(
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 655, in send
    r = adapter.send(request, **kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\adapters.py", line 516, in send
    raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='bjyz-bcc-edu-arch-qaenv.bcc-bjyz.baidu.com', port=8080): Max retries exceeded with url: /error.html?status=500&tc=17380495782087063050032122 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000000000346A6A0>: Failed to establish a new connection: getaddrinfo failed'))

Process finished with exit code 1

需求就是:上传up文件夹内的每个word文档,到文库;要求每分钟不超过20个;
附上api文档:
接口域名:...........................................................................................................................................................2
签名方法:...........................................................................................................................................................2
文档上传接口.......................................................................................................................................................2
常见错误提示信息..............................................................................................................................................4
签名以及上传文档的代码实例...........................................................................................................................5
百度文库机构合作文档
1.接口域名:
https://wenku.baidu.com
2.签名方法:
把除了文件之外的参数,按照参数名降序排之后, 加上密钥,以及文件内容的md5,拼
接的字符串再做md5。
这是一个拼接的字符串例子: appkey=test×tamp=1590387266
&secret=g75a50a20baed9bbd12a6e69d091f4d6&filemd5=d41d8cd98f00b204e
9800998ecf8427e
其中filemd5 是要上传的文档内容md5。把上面的字符串再做md5,即可得到32
位字符串签名。
每次调用上传接口均应实时生成签名,签名在10 分钟以内有效。
具体示例见文档底部。
3.文档上传接口
接口名称文档上传接口
请求URL /doc/orgapi/orgupload
请求方法POST
请求参数
名称类型是否必须备注
object_file file 是要上传的文件(不要有中文)
百度文库机构合作文档
appkey string 是文库分配的appkey
sign string 是生成的签名
timestamp int 是时间戳,1970 年至今的秒数
title string 否需要urlencode 转码
flag int 否
0:免费文档(默认)3:机构文档10:付费
文档19:商业文档
price int 否flag 为付费文档时(单位:分)
summary string 否需要urlencode 转码
接口返回字段说明
字段字段名称字段类型说明
code 返回码int 0-处理正确!0-处理异常
msg 返回码描述string 返回描述
data 返回数据json 返回数据
成功返回例子:
{
"status":{
"code":0,
"msg":null
},
"data":{
"doc_id":"338248649b6648d7c1c74657"
}
}
doc_id 为文档id。
百度文库机构合作文档
失败返回例子:
{
"status":{
"code":40002,
"msg":"sign is error"
},
"data":{
"errstr":"sign is error"
}
}
4. 错误提示对应表
错误码错误提示中文说明
40001 IP is not allowed ip 地址不在白名单
40002 sign is error 签名错误
40003 paramkey is error 参数名有误
40004 flag is error flag 参数有误
40005 appkey not the white list appkey 不存在
40010 limited quantity or size 文档大小超限
40011 timestamp is overtime 请求时间戳有误
10002 operate too fast 操作过于频繁
10004 miss args 请求参数不存在
30301 doc upload reache day max 达到当日请求上限
5.请求逻辑示例
测试环境接口地址:https://wenku.baidu.com/doc/orgapi/fororguploadtest
测试环境appkey、secret 值如下
百度文库机构合作文档
appkey:test
secret:g75a50a20baed9bbd12a6e69d091f4d6
测试环境返回下面结果,说明测试成功
{"status":{"code":10001,"msg":"forbidden user"},"data":{"errstr":"forbidden user"}}

战网无极限 发表于 2021-3-21 23:46

开放平台地址多少

佳慧 发表于 2021-3-22 00:06

战网无极限 发表于 2021-3-21 23:46
开放平台地址多少

全部资料都在上面了

syrmb 发表于 2021-3-22 00:18

你是不是开了抓包工具之类的 关掉它

页: [1]
查看完整版本: python大神进,运行报错!