Java OCR 图片识别文字

安装依赖

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
<dependency>
<groupId>io.github.mymonstercat</groupId>
<artifactId>rapidocr</artifactId>
<version>0.0.7</version>
</dependency>
<!-- CPU 环境使用 onnx 后端 -->
<dependency>
<groupId>io.github.mymonstercat</groupId>
<artifactId>rapidocr-onnx-platform</artifactId>
<version>0.0.7</version>
</dependency>
<!-- 使用通用 Linux 版本 -->
<dependency>
<groupId>io.github.mymonstercat</groupId>
<artifactId>rapidocr-onnx-linux-x86_64</artifactId>
<version>0.0.6</version>
</dependency>

代码示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
package com.bozhi.xiaoluo.modules.common.utils;

import com.bozhi.xiaoluo.common.utils.HttpUtils;
import io.github.mymonstercat.Model;
import io.github.mymonstercat.ocr.InferenceEngine;
import lombok.extern.slf4j.Slf4j;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

@Slf4j
public class PicUtil {

private static class InferenceEngineHolder {
private static final InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V3);
}

//从图片中提取文字 PaddleOCR (Java封装版):中文识别利器
public static String queryWordsFromPicLocal(String url) {
long startTime = System.currentTimeMillis();
byte[] imageBytes = HttpUtils.downloadImageAsBytes(url);
if (imageBytes == null) {
log.error("图片识别:图片下载失败。");
return "";
}
long picDownloadTime = System.currentTimeMillis() - startTime;
Path tempFile = null;
// 2. 将字节数组写入临时文件 (推荐使用 .png 后缀)
try {
tempFile = Files.createTempFile("ocr_temp_", ".png");
Files.write(tempFile, imageBytes);
String result = InferenceEngineHolder.engine.runOcr(tempFile.toAbsolutePath().toString()).getStrRes().trim();
return result;
} catch (Throwable e) {
log.error("图片识别:异常:", e);
} finally {
// 6. 清理临时文件
if (tempFile != null) {
try {
Files.deleteIfExists(tempFile);
} catch (IOException e) {
log.error(e.getMessage());
}
}

if (Env.isDev()) {
log.info("图片识别:下载图片耗时{}ms 识别耗时{}ms", picDownloadTime, System.currentTimeMillis() - startTime);
}
}
return "";
}
}

UnsatisfiedLinkError 错误

错误信息:`GLIBCXX_3.4.26’ not found

1
2
3
4
5
6
7
8
[2026-04-01 20:33:39.949] ERROR [AsyncUtils-1] [2039320252728655872] 图片识别异常:java.lang.UnsatisfiedLinkError: /tmp/ocrJava/onnx/libRapidOcr.so: /lib64/libstdc++.so.6: version `GLIBCXX_3.4.26' not found (required by /tmp/ocrJava/onnx/libRapidOcr.so)
...
at io.github.mymonstercat.JarFileUtil.copyFileFromJar(JarFileUtil.java:68)
at io.github.mymonstercat.OnnxLinuxX8664LibraryLoader.loadLibrary(OnnxLinuxX8664LibraryLoader.java:14)
at io.github.mymonstercat.ocr.InferenceEngine.loadFileIfNeeded(InferenceEngine.java:78)
at io.github.mymonstercat.ocr.InferenceEngine.runOcr(InferenceEngine.java:55)
at io.github.mymonstercat.ocr.InferenceEngine.runOcr(InferenceEngine.java:50)
at com.bozhi.xiaoluo.modules.common.utils.PicUtil.queryWordsFromPic(PicUtil.java:29)

解决思路

参考 https://www.cnblogs.com/caoyiting/p/16935916.html
参考 https://www.jianshu.com/p/050b2b777b9d
.
问题:软链接指向的是 3.4.25,而我们需要的是 3.4.26
1、下载 3.4.26 对应的包
2、解压包,并安装
3、删除旧的软链接,并创建新的软链接

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX
》》》GLIBCXX_3.4.25
》》》GLIBCXX_DEBUG_MESSAGE_LENGTH

sudo yum provides libstdc++.so.6

cd /usr/local/lib64
sudo wget http://www.vuln.cn/wp-content/uploads/2019/08/libstdc.so_.6.0.26.zip
unzip libstdc.so_.6.0.26.zip
cp libstdc++.so.6.0.26 /usr/lib64
cd /usr/lib64

ls -l | grep libstdc++
》》》lrwxrwxrwx 1 root root 19 11月 8 2024 libstdc++.so.6 -> libstdc++.so.6.0.25
》》》-rwxr-xr-x 1 root root 1661424 11月 8 2024 libstdc++.so.6.0.25
》》》-rwxr-xr-x 1 root root 13172960 4月 1 21:16 libstdc++.so.6.0.26

sudo rm libstdc++.so.6

sudo ln -s libstdc++.so.6.0.26 libstdc++.so.6

strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX
》》》GLIBCXX_3.4.25
》》》GLIBCXX_3.4.26
》》》GLIBCXX_DEBUG_MESSAGE_LENGTH