Python（阿里云接口视频硬字幕识别）

yk156511 · 发表于 2023-3-12 21:54

[Python] 纯文本查看 复制代码

import cv2
import requests
import base64
import json

# 定义视频文件路径和字幕文件路径
video_file_path = "G:/.mp4"
text_save_path = ".txt"

# 读取视频文件
cap = cv2.VideoCapture(video_file_path)

# 阿里API的相关信息
app_key = ""
access_key_id = ""
access_key_secret = ""
api_url = "https://ocrapi-ugc.taobao.com/ocrservice/ugc"

# 读取字幕文件
with open(subtitles_file_path, 'w') as f:
    pass

while True:
    # 逐帧读取视频
    ret, frame = cap.read()

    # 转换成灰度图像
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # 二值化处理
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # 去除噪声和边缘
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

    # 将图像转换成Base64编码的字符串
    img_str = base64.b64encode(cv2.imencode('.jpg', opening)[1].tobytes()).decode()

    # 设置请求头
    headers = {
        'Content-Type': 'application/json; charset=UTF-8',
        'Authorization': 'APPCODE ' + app_key
    }

    # 设置请求体
    data = {
        'img': img_str,
        'prob': False,
        'charInfo': False,
        'rotate': False
    }

    # 发送POST请求
    response = requests.post(api_url, headers=headers, data=json.dumps(data).encode())

    try:
        result = json.loads(response.content.decode())
    except json.decoder.JSONDecodeError:
        print("Error: Response is not a valid JSON string")
        continue  # jump to the next iteration of the loop

    if result.get('success', False):
        # handle successful response

        # 输出识别结果
        text = result['prism_wordsInfo'][0]['word']
        print(text)

        # 写入字幕文件
        with open(subtitles_file_path, 'a', encoding='utf-8') as f:
            f.write(text + '\n')

        # 写入识别结果
        with open(text_save_path, 'a', encoding='utf-8') as f:
            f.write(text + '\n')
    else:
        # handle unsuccessful response
        print("Error: OCR API 请求失败")  # 

    # 显示视频
    cv2.imshow('Video', frame)

    # 按 Q 键退出
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 释放资源
cap.release()
cv2.destroyAllWindows()

kll545012 · 发表于 2023-3-13 09:05

感谢楼主分享！

wwjmei · 发表于 2023-3-13 09:37

运行不了啊
Traceback (most recent call last):
File "D:\Lsrj\py\alzm.py", line 20, in <module>
with open(subtitles_file_path, 'w') as f:
NameError: name 'subtitles_file_path' is not defined

dev452 · 发表于 2023-3-13 09:48

先收藏，以后可能用得上

lmsw · 发表于 2023-3-13 10:10

非常谢谢分享

xxl1039 · 发表于 2023-3-13 10:41

学习是个好习惯，努力。

dtadsl · 发表于 2023-3-13 10:48

感谢分享，收藏了

帐号		自动登录	找回密码
密码			注册[Register]

[学习记录] Python（阿里云接口视频硬字幕识别）

免费评分