[Python] 纯文本查看 复制代码 import cv2
import requests
import base64
import json
# 定义视频文件路径和字幕文件路径
video_file_path = "G:/.mp4"
text_save_path = ".txt"
# 读取视频文件
cap = cv2.VideoCapture(video_file_path)
# 阿里API的相关信息
app_key = ""
access_key_id = ""
access_key_secret = ""
api_url = "https://ocrapi-ugc.taobao.com/ocrservice/ugc"
# 读取字幕文件
with open(subtitles_file_path, 'w') as f:
pass
while True:
# 逐帧读取视频
ret, frame = cap.read()
# 转换成灰度图像
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 二值化处理
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# 去除噪声和边缘
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
# 将图像转换成Base64编码的字符串
img_str = base64.b64encode(cv2.imencode('.jpg', opening)[1].tobytes()).decode()
# 设置请求头
headers = {
'Content-Type': 'application/json; charset=UTF-8',
'Authorization': 'APPCODE ' + app_key
}
# 设置请求体
data = {
'img': img_str,
'prob': False,
'charInfo': False,
'rotate': False
}
# 发送POST请求
response = requests.post(api_url, headers=headers, data=json.dumps(data).encode())
try:
result = json.loads(response.content.decode())
except json.decoder.JSONDecodeError:
print("Error: Response is not a valid JSON string")
continue # jump to the next iteration of the loop
if result.get('success', False):
# handle successful response
# 输出识别结果
text = result['prism_wordsInfo'][0]['word']
print(text)
# 写入字幕文件
with open(subtitles_file_path, 'a', encoding='utf-8') as f:
f.write(text + '\n')
# 写入识别结果
with open(text_save_path, 'a', encoding='utf-8') as f:
f.write(text + '\n')
else:
# handle unsuccessful response
print("Error: OCR API 请求失败") #
# 显示视频
cv2.imshow('Video', frame)
# 按 Q 键退出
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 释放资源
cap.release()
cv2.destroyAllWindows() |