1.通用OCR文字识别
这种OCR只能按照识别图片中的文字,且是按照行识别返回结果,精度较低。
首先引入依赖包:
<dependency><groupId>com.baidu.aip</groupId><artifactId>java-sdk</artifactId><version>4.6.0</version>
</dependency>
通过OCR工具类:
package util;import com.baidu.aip.ocr.AipOcr;
import org.json.JSONObject;
import java.util.HashMap;public class OcrApi {private static final String APP_ID = "你的 App ID";private static final String API_KEY = "Xb12m5t4jS2n7";private static final String SECRET_KEY = "9XVx9GPcSbSUTZ";private static AipOcr getAipClient() {return getAipClient(API_KEY, SECRET_KEY);}public static AipOcr getAipClient(String apiKey, String secretKey) {AipOcr client = new AipOcr(APP_ID, apiKey, secretKey);// 可选:设置网络连接参数client.setConnectionTimeoutInMillis(2000);client.setSocketTimeoutInMillis(60000);return client;}public static String result(AipOcr client) {// 传入可选参数调用接口HashMap<String, String> options = new HashMap<>();options.put("language_type", "CHN_ENG");options.put("detect_direction", "true");options.put("detect_language", "true");options.put("probability", "true");JSONObject res = client.basicGeneralUrl("https://lichunyu1234.oss-cn-shanghai.aliyuncs.com/1.png", options);return res.toString(2);}public static void main(String[] args) {System.out.println(result(getAipClient()));}
}
结果如下,识别有两行信息(words即是识别的信息):
2.高精度OCR识别身份证信息
这种就比较高精度,且按照分类显示,返回数据更友好,高可用。
2.1 接口说明及请求参数是地址官方截图如下:
2.2 OCR身份证识别工具类
package util;import com.alibaba.druid.util.Base64;
import com.alibaba.fastjson.JSONObject;
import java.io.*;
import java.net.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;public class OcrUtil {// Access_Token获取private static final String ACCESS_TOKEN_HOST = "https://aip.baidubce.com/oauth/2.0/token?";// 身份证识别请求URLprivate static final String OCR_HOST = "https://aip.baidubce.com/rest/2.0/ocr/v1/idcard?";// apiKey,secretKeyprivate static final String API_KEY ="Xb12m5t4jS";private static final String SECRET_KEY = "9XVx9GPcSbSUT";// 获取百度云OCR的授权access_tokenpublic static String getAccessToken() {return getAccessToken(API_KEY, SECRET_KEY);}/*** 获取百度云OCR的授权access_token* @param apiKey* @param secretKey* @return*/public static String getAccessToken(String apiKey, String secretKey) {String accessTokenURL = ACCESS_TOKEN_HOST// 1. grant_type为固定参数+ "grant_type=client_credentials"// 2. 官网获取的 API Key+ "&client_id=" + apiKey// 3. 官网获取的 Secret Key+ "&client_secret=" + secretKey;try {URL url = new URL(accessTokenURL);// 打开和URL之间的连接HttpURLConnection connection = (HttpURLConnection) url.openConnection();connection.setRequestMethod("GET");connection.connect();// 获取响应头Map<String, List<String>> map = connection.getHeaderFields();// 遍历所有的响应头字段for (String key : map.keySet()) {System.out.println(key + "---->" + map.get(key));}// 定义 BufferedReader输入流来读取URL的响应BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(connection.getInputStream()));StringBuilder result = new StringBuilder();String inputLine;while ((inputLine = bufferedReader.readLine()) != null) {result.append(inputLine);}JSONObject jsonObject = JSONObject.parseObject(result.toString());return jsonObject.getString("access_token");} catch (Exception e) {e.printStackTrace();System.err.print("获取access_token失败");}return null;}/*** 获取身份证识别后的数据* @param imageUrl* @param idCardSide* @return*/public static String getStringIdentityCard(File imageUrl, String idCardSide) {// 身份证OCR的http URL+鉴权tokenString OCRUrl = OCR_HOST+"access_token="+getAccessToken();System.out.println(OCRUrl);System.out.println("***************************************************");System.out.println(getAccessToken());// 对图片进行base64处理String image = encodeImageToBase64(imageUrl);// 请求参数String requestParam = "detect_direction=true&id_card_side="+idCardSide+"&image="+image;try {// 请求OCR地址URL url = new URL(OCRUrl);HttpURLConnection connection = (HttpURLConnection) url.openConnection();// 设置请求方法为POSTconnection.setRequestMethod("POST");// 设置请求头connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");connection.setRequestProperty("apiKey", API_KEY);connection.setDoOutput(true);connection.getOutputStream().write(requestParam.getBytes(StandardCharsets.UTF_8));connection.connect();// 定义 BufferedReader输入流来读取URL的响应BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));StringBuilder result = new StringBuilder();String inputLine;while ((inputLine = bufferedReader.readLine()) != null) {result.append(inputLine);}bufferedReader.close();return result.toString();} catch (Exception e) {e.printStackTrace();System.err.println("身份证OCR识别异常");return null;}}/*** 对图片url进行Base64编码处理* @param imageUrl* @return*/public static String encodeImageToBase64(File imageUrl) {// 将图片文件转化为字节数组字符串,并对其进行Base64编码处理byte[] data = null;try {InputStream inputStream = new FileInputStream(imageUrl);data = new byte[inputStream.available()];inputStream.read(data);inputStream.close();// 对字节数组Base64编码return URLEncoder.encode(Base64.byteArrayToBase64(data), "UTF-8");} catch (Exception e) {e.printStackTrace();return null;}}/*** 提取OCR识别身份证有效信息* @param* @return*/public static Map<String, String> getIdCardInfo(MultipartFile image, int idCardSide) {String value = getStringIdentityCard(image, idCardSide);String side;if (idCardSide == 1) {side = "正面";}else {side = "背面";}Map<String, String> map = new HashMap<>();JSONObject jsonObject = JSONObject.parseObject(value);JSONObject words_result = jsonObject.getJSONObject("words_result");if (words_result == null || words_result.isEmpty()) {throw new MyException("请提供身份证"+side+"图片");}for (String key : words_result.keySet()) {JSONObject result = words_result.getJSONObject(key);String info = result.getString("words");switch (key) {case "姓名":map.put("name", info);break;case "性别":map.put("sex", info);break;case "民族":map.put("nation", info);break;case "出生":map.put("birthday", info);break;case "住址":map.put("address", info);break;case "公民身份号码":map.put("idNumber", info);break;case "签发机关":map.put("issuedOrganization", info);break;case "签发日期":map.put("issuedAt", info);break;case "失效日期":map.put("expiredAt", info);break;}}return map;}}
官方返回示例:
**********************************************
对于身份证识别有个大坑:
1.有的base64编码后有头部“Base64:”要去掉,阿里巴巴的base64可以正常使用。
2.OCR识别官方只说明图片要Base64编码,但是实际上还是要再UrlEncode再编码一次才可以。