安卓OCR使用(Google ML Kit)
OCR是一个很常用的功能,Google ML Kit提供了OCR能力,用起来也很简单,本文介绍一下使用方法。
1. 相关概念
名词 | 概念 | 解释 |
---|---|---|
TextBlock | 块 | 一个段落 |
Line | 行 | 一行文本 |
Element | 元素 | 单词;对汉字来说,类似"开头 (分隔符)中间(分隔符) 结尾"这样含有明显分隔符的才会有多个字在一个Element中,否则就是单个字 |
Symbol | 字符 | 字母;对汉字来说就是单个字 |
2. 代码实现
在build.gradle中添加相关依赖:
// To recognize Chinese script
implementation 'com.google.mlkit:text-recognition-chinese:16.0.1'
添加布局文件activity_ocr.xml:
<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:orientation="vertical">
<FrameLayout
android:layout_width="wrap_content"
android:layout_height="wrap_content">
<SurfaceView
android:id="@+id/camera_preview"
android:layout_width="wrap_content"
android:layout_height="wrap_content" />
<com.example.study.views.DrawView
android:id="@+id/ocr_area"
android:layout_width="wrap_content"
android:layout_height="wrap_content" />
</FrameLayout>
<Button
android:id="@+id/ocr_switch"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:layout_gravity="center_horizontal|bottom"
android:layout_marginBottom="80dp"
android:background="@color/夏云灰"
android:text="stop" />
</LinearLayout>
绘制文字的OCRDrawView.java:
package com.example.study.views;
import android.content.Context;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.graphics.Path;
import android.graphics.Point;
import android.util.AttributeSet;
import android.view.View;
import androidx.annotation.Nullable;
import java.util.ArrayList;
import java.util.List;
public class OCRDrawView extends View {
private final Object lock = new Object();
protected Paint paint = new Paint();
protected Path path = new Path();
private final List<ShapeInfo> cornerPointsList = new ArrayList<>();
public OCRDrawView(Context context) {
super(context);
}
public OCRDrawView(Context context, @Nullable AttributeSet attrs) {
super(context, attrs);
}
public void clear() {
synchronized (lock) {
cornerPointsList.clear();
}
postInvalidate();
}
public void add(Point[] cornerPoints, String text) {
synchronized (lock) {
cornerPointsList.add(new ShapeInfo(cornerPoints, text));
}
}
@Override
protected void onDraw(Canvas canvas) {
super.onDraw(canvas);
synchronized (lock) {
for (ShapeInfo shapeInfo : cornerPointsList) {
drawBackground(shapeInfo, canvas);
drawText(shapeInfo, canvas);
}
}
}
private void drawText(ShapeInfo shapeInfo, Canvas canvas) {
Point[] points = shapeInfo.points;
// 根据矩形区域的高度设置文字大小
double height = calDistance(points[0], points[3]);
double width = calDistance(points[2], points[3]);
float textSize = (float) Math.min(height, width / shapeInfo.text.length());
paint.setColor(Color.BLUE);
paint.setTextSize(textSize);
path.reset();
path.moveTo(points[3].x, points[3].y);
path.lineTo(points[2].x, points[2].y);
canvas.drawTextOnPath(shapeInfo.text, path, 0, 0, paint);
}
private double calDistance(Point start, Point end) {
return Math.sqrt(Math.pow(start.x - end.x, 2) + Math.pow(start.y - end.y, 2));
}
private void drawBackground(ShapeInfo shapeInfo, Canvas canvas) {
Point[] shape = shapeInfo.points;
path.reset();
path.moveTo(shape[3].x, shape[3].y);
for (int i = 0; i < shape.length; i++) {
path.lineTo(shape[i].x, shape[i].y);
}
path.close();
paint.setColor(Color.WHITE);
canvas.drawPath(path, paint);
}
static class ShapeInfo {
Point[] points;
String text;
public ShapeInfo(Point[] shape, String text) {
this.points = shape;
this.text = text;
}
}
}
activity类:
package com.example.study.activities;
import android.Manifest;
import android.content.pm.PackageManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.ImageFormat;
import android.graphics.Matrix;
import android.graphics.Point;
import android.graphics.Rect;
import android.graphics.YuvImage;
import android.hardware.Camera;
import android.os.Bundle;
import android.util.Log;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.ViewGroup;
import android.widget.Button;
import android.widget.FrameLayout;
import android.widget.Toast;
import androidx.activity.ComponentActivity;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import com.example.study.R;
import com.example.study.views.OCRDrawView;
import com.google.mlkit.vision.text.Text;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;
import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions;
import java.io.ByteArrayOutputStream;
public class OCRActivity extends ComponentActivity implements Camera.PreviewCallback, SurfaceHolder.Callback {
private static final String TAG = "CameraDemoActivity";
private static final int REQUEST_CAMERA = 1000;
private static final int HEIGHT = 1920;
private static final int WIDTH = 1080;
private static final int ORIENTATION = 90;
private SurfaceView preview;
private OCRDrawView ocrArea;
private Button ocrSwitch;
private Camera camera;
private Camera.Parameters parameters;
private TextRecognizer recognizer;
private Matrix matrix;
private boolean isRecognizering = false;
private boolean stopRecognizer = false;
@Override
protected void onCreate(@Nullable Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
this.setContentView(R.layout.activity_ocr);
initView();
initVar();
// 检查权限
if (checkSelfPermission(Manifest.permission.CAMERA) != PackageManager.PERMISSION_GRANTED) {
requestPermissions(new String[]{Manifest.permission.CAMERA}, REQUEST_CAMERA);
} else {
preview.getHolder().addCallback(this);
}
}
private void initVar() {
recognizer = TextRecognition.getClient(new ChineseTextRecognizerOptions.Builder().build());
matrix = new Matrix();
matrix.setRotate(ORIENTATION);
// 4个角的坐标是没有旋转过的,所以HEIGHT、WIDTH是反的
matrix.preTranslate(-HEIGHT >> 1, -WIDTH >> 1);
}
private void initView() {
preview = findViewById(R.id.camera_preview);
ocrArea = findViewById(R.id.ocr_area);
ocrSwitch = findViewById(R.id.ocr_switch);
ocrSwitch.setOnClickListener(view -> {
stopRecognizer = !stopRecognizer;
ocrSwitch.setText(stopRecognizer ? "start" : "stop");
if (camera == null) {
return;
}
if (stopRecognizer) {
camera.stopPreview();
} else {
camera.startPreview();
}
});
adjustSurface(preview, ocrArea);
}
private void adjustSurface(SurfaceView cameraPreview, OCRDrawView ocrArea) {
FrameLayout.LayoutParams cameraPreviewParams = (FrameLayout.LayoutParams) cameraPreview.getLayoutParams();
cameraPreviewParams.width = WIDTH;
cameraPreviewParams.height = HEIGHT;
ViewGroup.LayoutParams ocrAreaParams = ocrArea.getLayoutParams();
ocrAreaParams.width = WIDTH;
ocrAreaParams.height = HEIGHT;
}
@Override
public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == REQUEST_CAMERA && grantResults.length > 0) {
if (grantResults[0] == PackageManager.PERMISSION_GRANTED) {
preview.getHolder().addCallback(this);
surfaceCreated(preview.getHolder());
camera.setPreviewCallback(this);
camera.startPreview();
} else {
finish();
}
}
}
@Override
public void onPreviewFrame(byte[] data, Camera camera) {
if (isRecognizering || stopRecognizer) {
return;
}
Bitmap bitmap = convertToBitmap(camera, data);
isRecognizering = true;
recognizer.process(bitmap, ORIENTATION).addOnSuccessListener(text -> {
parseOCRResult(text);
}).addOnFailureListener(exception -> {
Toast.makeText(this, "Failure", Toast.LENGTH_SHORT).show();
isRecognizering = false;
}).addOnCompleteListener(task -> {
isRecognizering = false;
}).addOnCanceledListener(() -> {
Toast.makeText(this, "Canceled", Toast.LENGTH_SHORT).show();
isRecognizering = false;
});
}
private void parseOCRResult(Text text) {
// 所有识别到的内容,下同
String textContent = text.getText();
if (textContent == null || textContent.trim().length() == 0) {
return;
}
ocrArea.clear();
// 块,段落
for (Text.TextBlock textBlock : text.getTextBlocks()) {
// 一行文本
for (Text.Line line : textBlock.getLines()) {
drawResult(line);
// 元素:单词,对汉字来说,需要"开头 (分隔符)中间(分隔符) 结尾"之类比较强烈的分隔符去分隔
for (Text.Element element : line.getElements()) {
// symbol:字符,字母,字
for (Text.Symbol symbol : element.getSymbols()) {
symbol.getText();
}
}
}
}
}
private void drawResult(Text.Line line) {
// line的旋转角度(以度为单位,顺时针为正,范围为[-180, 180])
float angle = line.getAngle() + ORIENTATION;
// 检测到的文本的轴对齐边界矩形
Rect boundingBox = line.getBoundingBox();
// 从左上角开始顺时针方向的四个角点。不带旋转角度,如果设置过旋转角度camera.setDisplayOrientation,需要进行旋转
Point[] cornerPoints = line.getCornerPoints();
// 置信度
float confidence = line.getConfidence();
// 获取文本中的主要语言(如果有的话)
String recognizedLanguage = line.getRecognizedLanguage();
// 置信度太低的过滤掉
if (confidence < 0.3f) {
return;
}
for (Point cornerPoint : cornerPoints) {
float[] floats = {cornerPoint.x, cornerPoint.y};
matrix.mapPoints(floats);
cornerPoint.x = (int) floats[0] + (WIDTH >> 1);
cornerPoint.y = (int) floats[1] + (HEIGHT >> 1);
}
ocrArea.add(cornerPoints, line.getText());
ocrArea.postInvalidate();
}
/**
* Convert camera data into bitmap data.
*/
private Bitmap convertToBitmap(Camera camera, byte[] data) {
int width = camera.getParameters().getPreviewSize().width;
int height = camera.getParameters().getPreviewSize().height;
YuvImage yuv = new YuvImage(data, ImageFormat.NV21, width, height, null);
ByteArrayOutputStream stream = new ByteArrayOutputStream();
yuv.compressToJpeg(new Rect(0, 0, width, height), 100, stream);
return BitmapFactory.decodeByteArray(stream.toByteArray(), 0, stream.toByteArray().length);
}
@Override
protected void onResume() {
super.onResume();
}
@Override
protected void onRestart() {
super.onRestart();
}
@Override
protected void onDestroy() {
super.onDestroy();
if (recognizer != null) {
recognizer.close();
}
}
@Override
public void surfaceCreated(@NonNull SurfaceHolder holder) {
try {
camera = Camera.open(Camera.CameraInfo.CAMERA_FACING_BACK);
parameters = camera.getParameters();
// 旋转了90度,所以height、width互换
parameters.setPictureSize(HEIGHT, WIDTH);
parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE);
parameters.setPictureFormat(ImageFormat.NV21);
camera.setPreviewDisplay(holder);
camera.setDisplayOrientation(ORIENTATION);
camera.setParameters(parameters);
} catch (Exception exception) {
Log.i(TAG, exception.getMessage());
}
}
@Override
public void surfaceChanged(@NonNull SurfaceHolder holder, int format, int width, int height) {
if (camera != null) {
camera.stopPreview();
camera.setPreviewCallback(null);
camera.startPreview();
camera.setPreviewCallback(this);
ocrArea.clear();
stopRecognizer = true;
ocrSwitch.performClick();
}
}
@Override
public void surfaceDestroyed(@NonNull SurfaceHolder holder) {
if (camera != null) {
camera.stopPreview();
camera.setPreviewCallback(null);
camera.release();
}
}
}
参考文章
- 文字识别 v2