主要是写js+PHP的 ,以前用过 tesseract.js,只能识别简单的(因为不会自己训练模型)。
后来 一直用的收费接口,学习Python发现 muggle_ocr 效果还不错,应付简单的正确率比较高。
用它识别正常的文本更是没问题
[Python] 纯文本查看 复制代码 # 吾爱破解论坛 [url]www.52pojie.cn[/url]
import sys
# muggle_ocr安装--> pip install muggle_ocr -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
import muggle_ocr
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QLineEdit
from PyQt5.QtGui import QIcon, QPixmap
from urllib.request import urlopen
class Example(QMainWindow):
def __init__(self):
super().__init__()
self.initUI()
def readImage(self):
image_bytes = urlopen(self.inputLabel.text()).read()
text = self.sdk.predict(image_bytes=image_bytes)
self.codeText.setText(text)
pixmap = QPixmap()
pixmap.loadFromData(image_bytes)
pixmap = pixmap.scaled(150, 60)
self.image.setPixmap(pixmap)
def initUI(self):
# ModelType.Captcha 可识别4-6位验证码
self.sdk = muggle_ocr.SDK(model_type=muggle_ocr.ModelType.Captcha)
self.label = QLabel("图片地址:", self)
self.label.setGeometry(20, 20, 80, 30)
self.inputLabel = QLineEdit("https://wp.pep.com.cn/web/captcha.php",
self)
self.inputLabel.setGeometry(130, 20, 450, 30)
self.image = QLabel(self)
self.image.setGeometry(20, 60, 150, 60)
self.label = QLabel("识别结果:", self)
self.label.setGeometry(200, 80, 60, 30)
self.codeText = QLabel("", self)
self.codeText.setGeometry(280, 80, 100, 30)
self.codeText.setStyleSheet(
"color:rgb(225,202,100,255);font-size:26px;font-weight:normal;font-family:Arial"
)
self.btn = QPushButton("刷新", self)
self.btn.setGeometry(200, 130, 200, 30)
self.btn.clicked.connect(self.readImage)
self.setGeometry(600, 600, 600, 180)
self.setWindowTitle("P娃儿猫-验证码识别")
self.setWindowIcon(QIcon("Pwaerm.ico"))
self.show()
self.readImage()
if __name__ == "__main__":
app = QApplication(sys.argv)
ex = Example()
sys.exit(app.exec_())
|