opencv pytorch CRNN验证码识别

article/2025/8/23 23:45:49

文章目录

  • 前言:
  • 效果预览:
  • 搭建CRNN模型:
  • 验证码数据集制作:
  • 模型训练:
  • 项目结构与源码下载:

前言:

本文使用crnn网络识别验证码,使用的验证码数据集有三种,准确率都很高。

效果预览:

opencv验证码识别

搭建CRNN模型:

# crnn.py
import argparse, os
import torch
import torch.nn as nnclass BidirectionalLSTM(nn.Module):def __init__(self, nInput_size, nHidden, nOut):super(BidirectionalLSTM, self).__init__()self.lstm = nn.LSTM(nInput_size, nHidden, bidirectional=True)self.linear = nn.Linear(nHidden * 2, nOut)def forward(self, input):recurrent, (hidden, cell) = self.lstm(input)T, b, h = recurrent.size()t_rec = recurrent.view(T * b, h)output = self.linear(t_rec)  # [T * b, nOut]output = output.view(T, b, -1)  # 输出变换为[seq,batch,类别总数]return outputclass CNN(nn.Module):def __init__(self, imageHeight, nChannel):super(CNN, self).__init__()assert imageHeight % 32 == 0, 'image Height has to be a multiple of 32'self.depth_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=nChannel, kernel_size=3, stride=1, padding=1,groups=nChannel)self.point_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=64, kernel_size=1, stride=1, padding=0,groups=1)self.relu0 = nn.ReLU(inplace=True)self.pool0 = nn.MaxPool2d(kernel_size=2, stride=2)self.depth_conv1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, groups=64)self.point_conv1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=1, padding=0, groups=1)self.relu1 = nn.ReLU(inplace=True)self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)self.depth_conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128)self.point_conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm2 = nn.BatchNorm2d(256)self.relu2 = nn.ReLU(inplace=True)self.depth_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)self.point_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)self.relu3 = nn.ReLU(inplace=True)self.pool3 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))self.depth_conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)self.point_conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm4 = nn.BatchNorm2d(512)self.relu4 = nn.ReLU(inplace=True)self.depth_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1, groups=512)self.point_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.relu5 = nn.ReLU(inplace=True)self.pool5 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))# self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0)self.depth_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0, groups=512)self.point_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm6 = nn.BatchNorm2d(512)self.relu6 = nn.ReLU(inplace=True)def forward(self, input):depth0 = self.depth_conv0(input)point0 = self.point_conv0(depth0)relu0 = self.relu0(point0)pool0 = self.pool0(relu0)# print(pool0.size())depth1 = self.depth_conv1(pool0)point1 = self.point_conv1(depth1)relu1 = self.relu1(point1)pool1 = self.pool1(relu1)# print(pool1.size())depth2 = self.depth_conv2(pool1)point2 = self.point_conv2(depth2)batchNormal2 = self.batchNorm2(point2)relu2 = self.relu2(batchNormal2)# print(relu2.size())depth3 = self.depth_conv3(relu2)point3 = self.point_conv3(depth3)relu3 = self.relu3(point3)pool3 = self.pool3(relu3)# print(pool3.size())depth4 = self.depth_conv4(pool3)point4 = self.point_conv4(depth4)batchNormal4 = self.batchNorm4(point4)relu4 = self.relu4(batchNormal4)# print(relu4.size())depth5 = self.depth_conv5(relu4)point5 = self.point_conv5(depth5)relu5 = self.relu5(point5)pool5 = self.pool5(relu5)# print(pool5.size())depth6 = self.depth_conv6(pool5)point6 = self.point_conv6(depth6)batchNormal6 = self.batchNorm6(point6)relu6 = self.relu6(batchNormal6)# print(relu6.size())return relu6class CRNN(nn.Module):def __init__(self, imgHeight, nChannel, nClass, nHidden):super(CRNN, self).__init__()self.cnn = nn.Sequential(CNN(imgHeight, nChannel))self.lstm = nn.Sequential(BidirectionalLSTM(512, nHidden, nHidden),BidirectionalLSTM(nHidden, nHidden, nClass),)def forward(self, input):conv = self.cnn(input)# pytorch框架输出结构为BCHWbatch, channel, height, width = conv.size()assert height == 1, "the output height must be 1."# 将height==1的维度去掉-->BCWconv = conv.squeeze(dim=2)# 调整各个维度的位置(B,C,W)->(W,B,C),对应lstm的输入(seq,batch,input_size)conv = conv.permute(2, 0, 1)output = self.lstm(conv)return outputif __name__ == "__main__":x = torch.rand(1, 1, 32, 100)model = CRNN(imgHeight=32, nChannel=1, nClass=37, nHidden=256)y = model(x)print(y.shape)

验证码数据集制作:

共有三种数据集,第一种数据集是captcha_images_v2,该数据集部分验证码图片如下所示:
在这里插入图片描述
另外两种数据集是使用python自制的,其部分图像如下所示:
在这里插入图片描述
在这里插入图片描述
使用python生成验证码的代码如下所示:

# -*- coding: utf-8 -*-
"""
Created on Fri Nov 27 16:10:15 2020
@author: pistachio
"""
# 生成验证码图片
from PIL import Image, ImageDraw, ImageFont
import random
import string
import cv2
import numpy as np
import warnings
warnings.filterwarnings("ignore")# 所有可能的字符,主要是英文字母和数字
characters = string.ascii_letters + string.digits
characters=characters.lower()# 获取指定长度的字符串
def selectedCharacters(length):'''length:the number of characters to show'''result = ""for i in range(length):result += random.choice(characters)return resultdef getColor():'''get a random color'''r = random.randint(0, 255)g = random.randint(0, 255)b = random.randint(0, 255)return (r, g, b)def main(size=(200, 100), characterNumber=4, bgcolor=(255, 255, 255)):imageTemp = Image.new('RGB', size, bgcolor)# 设置字体和字号font = ImageFont.truetype('c:\\windows\\fonts\\TIMESBD.TTF', 48)draw = ImageDraw.Draw(imageTemp)text = selectedCharacters(characterNumber)width, heigth = draw.textsize(text, font)# 绘制验证码字符串offset =2for i in range(characterNumber):offset += width // characterNumberposition = (offset, (size[1] - heigth) // 2 + random.randint(-5, 5))draw.text(xy=position, text=text[i], font=font, fill=getColor())# 对验证码图片进行简单变换,这里采取简单的点运算imageFinal = Image.new('RGB', size, bgcolor)pixelsFinal = imageFinal.load()pixelsTemp = imageTemp.load()for y in range(0, size[1]):offset = random.randint(-1, 1)for x in range(0, size[0]):newx = x + offsetif newx >= size[0]:newx = size[0] - 1elif newx < 0:newx = 0pixelsFinal[newx, y] = pixelsTemp[x, y]draw = ImageDraw.Draw(imageFinal)# 绘制干扰噪点像素for i in range(int(size[0] * size[1] * 0.07)):draw.point((random.randint(0, size[0]), random.randint(0, size[1])), fill=getColor())#绘制干扰线条for i in range(8):start = (0, random.randint(0, size[1] - 1))end = (size[0], random.randint(0, size[1] - 1))draw.line([start, end], fill=getColor(), width=1)#绘制干扰弧线for i in range(8):start = (-50, 50)end = (size[0] + 10, random.randint(0, size[1] + 10))draw.arc(start + end, 0, 360, fill=getColor())# 保存验证码图片# imageFinal.save('result.jpg')# imageFinal.show()src=np.array(imageFinal)[...,::-1]# print(text)# cv2.imshow('a21',src)# cv2.waitKey(0)cv2.imwrite("./data3/"+text+'.png',src)if __name__ == "__main__":for i in range(30000):main((150, 60), 4, (255, 255, 255))

模型训练:

训练代码如下所示:

from model import CRNN
from mydataset import CRNNDataSet
from torch.utils.data import DataLoader
import torch
from torch import optim
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import os
import randomdef decode(preds):char_set = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n','o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z','0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + [" "]preds=list(preds)pred_text = ''for i,j in enumerate(preds):if j==n_class-1:continueif i==0:pred_text+=char_set[j]continueif preds[i-1]!=j:pred_text += char_set[j]return pred_text
def getAcc(preds,labs):acc=0char_set = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n','o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z','0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + [" "]labs=labs.cpu().detach().numpy()preds = preds.cpu().detach().numpy()preds=np.argmax(preds,axis=-1)preds=np.transpose(preds,(1,0))out=[]for pred in preds:out_txt=decode(pred)out.append(out_txt)ll=[]for lab in labs:a=lab[lab!=-1]b=[char_set[i] for i in a]b="".join(b)ll.append(b)for a1,a2 in zip(out,ll):if a1==a2:acc+=1return acc/batch_sizebatch_size=32
n_class = 37data_dir='data3'
datas=os.listdir(data_dir)val_lines=random.sample(datas,k=int(len(datas)*0.1))
train_lines=list(set(datas)-set(val_lines))trainData = CRNNDataSet(lines=train_lines,train=True,img_width=200,data_dir=data_dir)
trainLoader = DataLoader(dataset=trainData, batch_size=batch_size, shuffle=True, num_workers=1)
valData = CRNNDataSet(lines=val_lines,train=False,img_width=200,data_dir=data_dir)
valLoader = DataLoader(dataset=valData, batch_size=batch_size, shuffle=False, num_workers=1)device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = CRNN(imgHeight=32, nChannel=1, nClass=n_class, nHidden=256)
net=net.to(device)
stcdics=torch.load('./my_model/data3/data3.pth')
net.load_state_dict(state_dict=stcdics)loss_func = torch.nn.CTCLoss(blank=n_class - 1)  # 注意,这里的CTCLoss中的 blank是指空白字符的位置,在这里是第65个,也即最后一个
optimizer = torch.optim.Adam(net.parameters(), lr=0.0005, betas=(0.5, 0.999))
#学习率衰减
lr_scheduler  = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)#画图列表
trainLoss=[]
valLoss=[]
trainAcc=[]
valAcc=[]
if __name__ == '__main__':#设置迭代次数100次Epoch=50epoch_step = len(train_lines) // batch_sizefor epoch in range(1, Epoch + 1):net.train()train_total_loss = 0val_total_loss=0train_total_acc = 0val_total_acc = 0with tqdm(total=epoch_step, desc=f'Epoch {epoch}/{Epoch}', postfix=dict, mininterval=0.3) as pbar:for step, (features, label) in enumerate(trainLoader, 1):labels = torch.IntTensor([])for j in range(label.size(0)):labels = torch.cat((labels, label[j]), 0)labels=labels[labels!=-1]features = features.to(device)labels = labels.to(device)loss_func=loss_func.to(device)batch_size = features.size()[0]out = net(features)log_probs = out.log_softmax(2).requires_grad_()targets = labelsinput_lengths = torch.IntTensor([out.size(0)] * int(out.size(1)))target_lengths = torch.where(label!=-1,1,0).sum(dim=-1)loss = loss_func(log_probs, targets, input_lengths, target_lengths)acc=getAcc(out,label)optimizer.zero_grad()loss.backward()optimizer.step()train_total_loss += losstrain_total_acc += accpbar.set_postfix(**{'loss': train_total_loss.item() / (step),'acc': train_total_acc/ (step), })pbar.update(1)trainLoss.append(train_total_loss.item()/step)trainAcc.append(train_total_acc/step)#保存模型torch.save(net.state_dict(), 'my_model/{}/{}.pth'.format(data_dir,data_dir))#验证net.eval()for step, (features, label) in enumerate(valLoader, 1):with torch.no_grad():labels = torch.IntTensor([])for j in range(label.size(0)):labels = torch.cat((labels, label[j]), 0)labels = labels[labels != -1]features = features.to(device)labels = labels.to(device)loss_func = loss_func.to(device)batch_size = features.size()[0]out = net(features)log_probs = out.log_softmax(2).requires_grad_()targets = labelsinput_lengths = torch.IntTensor([out.size(0)] * int(out.size(1)))target_lengths = torch.where(label != -1, 1, 0).sum(dim=-1)loss = loss_func(log_probs, targets, input_lengths, target_lengths)acc = getAcc(out, label)val_total_loss+=lossval_total_acc+=accvalLoss.append(val_total_loss.item()/step)valAcc.append(val_total_acc/step)lr_scheduler.step()# print(trainLoss)# print(valLoss)"""绘制loss acc曲线图"""plt.figure()plt.plot(trainLoss, 'r')plt.plot(valLoss, 'b')plt.title('Training and validation loss')plt.xlabel("Epochs")plt.ylabel("Loss")plt.legend(["Loss", "Validation Loss"])plt.savefig('loss.png')plt.figure()plt.plot(trainAcc, 'r')plt.plot(valAcc, 'b')plt.title('Training and validation acc')plt.xlabel("Epochs")plt.ylabel("Acc")plt.legend(["Acc", "Validation Acc"])plt.savefig('acc.png')# plt.show()

准确率与loss图像:
在这里插入图片描述
在这里插入图片描述

项目结构与源码下载:

如下图所示,三个文件夹中存放的分别是三个验证码数据集,运行main.py即可出现GUI界面。
在这里插入图片描述
GUI界面如下所示:
在这里插入图片描述
项目下载:下载地址


http://chatgpt.dhexx.cn/article/w05X845I.shtml

相关文章

CRNN笔记

参考链接&#xff1a; 一文读懂CRNNCTC文字识别 - 知乎 CTC loss - 知乎 1、背景 文字识别主流的两种算法 1.1 基于CRNNCTC 1.2 基于CNNSeq2SeqAttention 2、CRNNCTC原理解析 CRNNCTC结构图 以下是根据paddleocr中以mobilenetv3为backbone的网络结构图 model …

ocr小白入门CRNN

什么是CRNN CRNN的整体框架图&#xff1a; CRNNCNNRNNCTC 1&#xff09;CNN主要是为RNN提取特征&#xff1b; 2&#xff09;RNN主要是将CNN输出的特征序列转换为输出&#xff1b; 3&#xff09;CTC为翻译层&#xff0c;得到最终的预测结果&#xff0c;由于CTC适合不知道输入…

CRNN代码笔记

CRNN代码笔记 主要由五个模块组成&#xff1a; 数据集的加载与切分CRNN代码复现训练过程预测过程训练过程中对的评估 文章目录 CRNN代码笔记数据集的加载与切分RCNN模型构建训练部分训练辅助函数注意超参数设置判断cuda是否可用&#xff0c;是则基于GPU训练&#xff0c;否则用…

基于CRNN的文本识别

文章目录 0. 前言1. 数据集准备2.构建网络3.数据读取4.训练模型 0. 前言 至于CRNN网络的细节这里就不再多言了&#xff0c;网上有很多关于crnn的介绍&#xff0c;这里直接讲一下代码的实现流程 1. 数据集准备 CRNN是识别文本的网络&#xff0c;所以我们首先需要构建数据集&a…

CRNN论文翻译——中文版

文章作者&#xff1a;Tyan 博客&#xff1a;noahsnail.com | CSDN | 简书 翻译论文汇总&#xff1a;https://github.com/SnailTyan/deep-learning-papers-translation An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Applicatio…

CRNN论文笔记

0. 前言 在这篇论文《An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition》所讲述的内容便是大名鼎鼎的CRNN网络,中实现了端到端的文本识别。 论文地址 Github地址 该网络具有如下的特点: 1)该模…

CRNN详解

一.概述 常用文字识别算法主要有两个框架&#xff1a; CNNRNNCTC(CRNNCTC)CNNSeq2SeqAttention 本文介绍第一种方法。 CRNN是一种卷积循环神经网络结构&#xff0c;用于解决基于图像的序列识别问题&#xff0c;特别是场景文字识别问题。 文章认为文字识别是对序列的预测方法…

CRNN模型

介绍&#xff1a; 是目前较为流行的图文识别模型&#xff0c;可识别较长的文本序列&#xff0c; 它利用BLSTM和CTC部件学习字符图像中的上下文关系&#xff0c; 从而有效提升文本识别准确率&#xff0c;使得模型更加鲁棒。 CRNN是一种卷积循环神经网络结构&#xff0c;用于解决…

CRNN

CRNN详解&#xff1a;https://blog.csdn.net/bestrivern/article/details/91050960 https://www.cnblogs.com/skyfsm/p/10335717.html 1 概述 传统的OCR识别过程分为两步&#xff1a;单字切割和分类任务。现在更流行的是基于深度学习的端到端的文字识别&#xff0c;即我们不需…

论文阅读 - CRNN

文章目录 1 概述2 模型介绍2.1 输入2.2 Feature extraction2.3 Sequence modeling2.4 Transcription2.4.1 训练部分2.4.2 预测部分 3 模型效果参考资料 1 概述 CRNN(Convolutional Recurrent Neural Network)是2015年华科的白翔老师团队提出的&#xff0c;直至今日&#xff0c…

文本识别网络CRNN

文本识别网络CRNN 简介网络结构CNN层LSTM层CTC Loss 代码实现 简介 CRNN&#xff0c;全称Convolutional Recurrent Neural Network&#xff0c;卷积循环神经网络。 它是一种基于图像的序列识别网络&#xff0c;可以对不定长的文字序列进行端到端的识别。 它集成了卷积神经网络…

CRNN——文本识别算法

常用文字识别算法主要有两个框架&#xff1a; CNNRNNCTC(CRNNCTC)CNNSeq2SeqAttention 文章认为文字识别是对序列的预测方法&#xff0c;所以采用了对序列预测的RNN网络。通过CNN将图片的特征提取出来后采用RNN对序列进行预测&#xff0c;最后通过一个CTC的翻译层得到最终结果…

OCR论文笔记系列(一): CRNN文字识别

👨‍💻作者简介:大数据专业硕士在读,CSDN人工智能领域博客专家,阿里云专家博主,专注大数据与人工智能知识分享,公众号:GoAI的学习小屋,免费分享书籍、简历、导图等资料,更有交流群分享AI和大数据,加群方式公众号回复“加群”或➡️点击链接。 🎉专栏推荐:➡️点…

CRNN——卷积循环神经网络结构

CRNN——卷积循环神经网络结构 简介构成CNNMap-to-Sequence 图解RNNctcloss序列合并机制推理过程编解码过程 代码实现 简介 CRNN 全称为 Convolutional Recurrent Neural Network&#xff0c;是一种卷积循环神经网络结构&#xff0c;主要用于端到端地对不定长的文本序列进行识…

java bean的生命周期

文章转载来自博客园&#xff1a;https://www.cnblogs.com/kenshinobiy/p/4652008.html Spring 中bean 的生命周期短暂吗? 在spring中&#xff0c;从BeanFactory或ApplicationContext取得的实例为Singleton&#xff0c;也就是预设为每一个Bean的别名只能维持一个实例&#xf…

Spring创建Bean的生命周期

1.Bean 的创建生命周期 UserService.class —> 无参构造方法&#xff08;推断构造方法&#xff09; —> 普通对象 —> 依赖注入&#xff08;为带有Autowired的属性赋值&#xff09; —> 初始化前&#xff08;执行带有PostConstruct的方法&#xff09; —> 初始…

Bean的生命周期(不要背了记思想)

文章内容引用自 咕泡科技 咕泡出品&#xff0c;必属精品 文章目录 1. 应付面试2 可以跟着看源码的图3 学习Bean 的生命周期之前你应该知道什么4 Bean 的完整生命周期 1. 应付面试 你若是真的为面试而来&#xff0c;请把下面这段背下来&#xff0c;应付面试足矣 spring的bean的…

简述 Spring Bean的生命周期

“请你描述下 Spring Bean 的生命周期&#xff1f;”&#xff0c;这是面试官考察 Spring 的常用问题&#xff0c;可见是 Spring 中很重要的知识点。 其实要记忆该过程&#xff0c;还是需要我们先去理解&#xff0c;本文将从以下两方面去帮助理解 Bean 的生命周期&#xff1a; 生…

【Spring源码】讲讲Bean的生命周期

1、前言 面试官&#xff1a;“看过Spring源码吧&#xff0c;简单说说Spring中Bean的生命周期” 大神仙&#xff1a;“基本生命周期会经历实例化 -> 属性赋值 -> 初始化 -> 销毁”。 面试官&#xff1a;“......” 2、Bean的生命周期 如果是普通Bean的生命周期&am…

Spring中bean的生命周期(易懂版)

bean的生命周期 写在前面的话bean的生命周期代码演示 bean的更完整的生命周期添加后置处理器的代码演示 写在前面的话 关于bean的生命周期有很多的文章&#xff0c;但是大多数都是长篇的理论&#xff0c;说来说去也不是很好理解&#xff0c;再次我就整理了一篇比较好理解的bea…