当前位置: 首页 > article >正文

安卓OCR使用(Google ML Kit)

OCR是一个很常用的功能,Google ML Kit提供了OCR能力,用起来也很简单,本文介绍一下使用方法。

1. 相关概念

名词概念解释
TextBlock一个段落
Line一行文本
Element元素单词;对汉字来说,类似"开头 (分隔符)中间(分隔符) 结尾"这样含有明显分隔符的才会有多个字在一个Element中,否则就是单个字
Symbol字符字母;对汉字来说就是单个字

2. 代码实现

在build.gradle中添加相关依赖:

// To recognize Chinese script
implementation 'com.google.mlkit:text-recognition-chinese:16.0.1'

添加布局文件activity_ocr.xml:

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
              android:layout_width="match_parent"
              android:layout_height="match_parent"
              android:orientation="vertical">

    <FrameLayout
            android:layout_width="wrap_content"
            android:layout_height="wrap_content">

        <SurfaceView
                android:id="@+id/camera_preview"
                android:layout_width="wrap_content"
                android:layout_height="wrap_content" />

        <com.example.study.views.DrawView
                android:id="@+id/ocr_area"
                android:layout_width="wrap_content"
                android:layout_height="wrap_content" />
    </FrameLayout>

    <Button
            android:id="@+id/ocr_switch"
            android:layout_width="match_parent"
            android:layout_height="match_parent"
            android:layout_gravity="center_horizontal|bottom"
            android:layout_marginBottom="80dp"
            android:background="@color/夏云灰"
            android:text="stop" />
</LinearLayout>

绘制文字的OCRDrawView.java:

package com.example.study.views;

import android.content.Context;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.graphics.Path;
import android.graphics.Point;
import android.util.AttributeSet;
import android.view.View;

import androidx.annotation.Nullable;

import java.util.ArrayList;
import java.util.List;

public class OCRDrawView extends View {

    private final Object lock = new Object();

    protected Paint paint = new Paint();

    protected Path path = new Path();

    private final List<ShapeInfo> cornerPointsList = new ArrayList<>();

    public OCRDrawView(Context context) {
        super(context);
    }

    public OCRDrawView(Context context, @Nullable AttributeSet attrs) {
        super(context, attrs);
    }

    public void clear() {
        synchronized (lock) {
            cornerPointsList.clear();
        }
        postInvalidate();
    }

    public void add(Point[] cornerPoints, String text) {
        synchronized (lock) {
            cornerPointsList.add(new ShapeInfo(cornerPoints, text));
        }
    }

    @Override
    protected void onDraw(Canvas canvas) {
        super.onDraw(canvas);

        synchronized (lock) {
            for (ShapeInfo shapeInfo : cornerPointsList) {
                drawBackground(shapeInfo, canvas);
                drawText(shapeInfo, canvas);
            }
        }
    }

    private void drawText(ShapeInfo shapeInfo, Canvas canvas) {
        Point[] points = shapeInfo.points;
        // 根据矩形区域的高度设置文字大小
        double height = calDistance(points[0], points[3]);
        double width = calDistance(points[2], points[3]);
        float textSize = (float) Math.min(height, width / shapeInfo.text.length());
        paint.setColor(Color.BLUE);
        paint.setTextSize(textSize);

        path.reset();
        path.moveTo(points[3].x, points[3].y);
        path.lineTo(points[2].x, points[2].y);

        canvas.drawTextOnPath(shapeInfo.text, path, 0, 0, paint);
    }

    private double calDistance(Point start, Point end) {
        return Math.sqrt(Math.pow(start.x - end.x, 2) + Math.pow(start.y - end.y, 2));
    }

    private void drawBackground(ShapeInfo shapeInfo, Canvas canvas) {
        Point[] shape = shapeInfo.points;
        path.reset();
        path.moveTo(shape[3].x, shape[3].y);
        for (int i = 0; i < shape.length; i++) {
            path.lineTo(shape[i].x, shape[i].y);
        }
        path.close();

        paint.setColor(Color.WHITE);
        canvas.drawPath(path, paint);
    }

    static class ShapeInfo {
        Point[] points;
        String text;

        public ShapeInfo(Point[] shape, String text) {
            this.points = shape;
            this.text = text;
        }
    }
}

activity类:

package com.example.study.activities;

import android.Manifest;
import android.content.pm.PackageManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.ImageFormat;
import android.graphics.Matrix;
import android.graphics.Point;
import android.graphics.Rect;
import android.graphics.YuvImage;
import android.hardware.Camera;
import android.os.Bundle;
import android.util.Log;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.ViewGroup;
import android.widget.Button;
import android.widget.FrameLayout;
import android.widget.Toast;

import androidx.activity.ComponentActivity;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;

import com.example.study.R;
import com.example.study.views.OCRDrawView;
import com.google.mlkit.vision.text.Text;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;
import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions;

import java.io.ByteArrayOutputStream;

public class OCRActivity extends ComponentActivity implements Camera.PreviewCallback, SurfaceHolder.Callback {
    private static final String TAG = "CameraDemoActivity";
    private static final int REQUEST_CAMERA = 1000;
    private static final int HEIGHT = 1920;
    private static final int WIDTH = 1080;
    private static final int ORIENTATION = 90;
    private SurfaceView preview;
    private OCRDrawView ocrArea;
    private Button ocrSwitch;
    private Camera camera;
    private Camera.Parameters parameters;

    private TextRecognizer recognizer;
    private Matrix matrix;

    private boolean isRecognizering = false;
    private boolean stopRecognizer = false;


    @Override
    protected void onCreate(@Nullable Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        this.setContentView(R.layout.activity_ocr);
        initView();
        initVar();

        // 检查权限
        if (checkSelfPermission(Manifest.permission.CAMERA) != PackageManager.PERMISSION_GRANTED) {
            requestPermissions(new String[]{Manifest.permission.CAMERA}, REQUEST_CAMERA);
        } else {
            preview.getHolder().addCallback(this);
        }
    }

    private void initVar() {
        recognizer = TextRecognition.getClient(new ChineseTextRecognizerOptions.Builder().build());
        matrix = new Matrix();
        matrix.setRotate(ORIENTATION);
        // 4个角的坐标是没有旋转过的,所以HEIGHT、WIDTH是反的
        matrix.preTranslate(-HEIGHT >> 1, -WIDTH >> 1);
    }

    private void initView() {
        preview = findViewById(R.id.camera_preview);
        ocrArea = findViewById(R.id.ocr_area);
        ocrSwitch = findViewById(R.id.ocr_switch);
        ocrSwitch.setOnClickListener(view -> {
            stopRecognizer = !stopRecognizer;
            ocrSwitch.setText(stopRecognizer ? "start" : "stop");
            if (camera == null) {
                return;
            }
            if (stopRecognizer) {
                camera.stopPreview();
            } else {
                camera.startPreview();
            }
        });
        adjustSurface(preview, ocrArea);
    }

    private void adjustSurface(SurfaceView cameraPreview, OCRDrawView ocrArea) {
        FrameLayout.LayoutParams cameraPreviewParams = (FrameLayout.LayoutParams) cameraPreview.getLayoutParams();
        cameraPreviewParams.width = WIDTH;
        cameraPreviewParams.height = HEIGHT;

        ViewGroup.LayoutParams ocrAreaParams = ocrArea.getLayoutParams();
        ocrAreaParams.width = WIDTH;
        ocrAreaParams.height = HEIGHT;
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults);
        if (requestCode == REQUEST_CAMERA && grantResults.length > 0) {
            if (grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                preview.getHolder().addCallback(this);
                surfaceCreated(preview.getHolder());
                camera.setPreviewCallback(this);
                camera.startPreview();
            } else {
                finish();
            }
        }
    }

    @Override
    public void onPreviewFrame(byte[] data, Camera camera) {
        if (isRecognizering || stopRecognizer) {
            return;
        }
        Bitmap bitmap = convertToBitmap(camera, data);
        isRecognizering = true;
        recognizer.process(bitmap, ORIENTATION).addOnSuccessListener(text -> {
            parseOCRResult(text);
        }).addOnFailureListener(exception -> {
            Toast.makeText(this, "Failure", Toast.LENGTH_SHORT).show();
            isRecognizering = false;
        }).addOnCompleteListener(task -> {
            isRecognizering = false;
        }).addOnCanceledListener(() -> {
            Toast.makeText(this, "Canceled", Toast.LENGTH_SHORT).show();
            isRecognizering = false;
        });
    }

    private void parseOCRResult(Text text) {
        // 所有识别到的内容,下同
        String textContent = text.getText();
        if (textContent == null || textContent.trim().length() == 0) {
            return;
        }
        ocrArea.clear();
        // 块,段落
        for (Text.TextBlock textBlock : text.getTextBlocks()) {
            // 一行文本
            for (Text.Line line : textBlock.getLines()) {
                drawResult(line);
                // 元素:单词,对汉字来说,需要"开头 (分隔符)中间(分隔符) 结尾"之类比较强烈的分隔符去分隔
                for (Text.Element element : line.getElements()) {
                    // symbol:字符,字母,字
                    for (Text.Symbol symbol : element.getSymbols()) {
                        symbol.getText();
                    }
                }
            }
        }
    }

    private void drawResult(Text.Line line) {
        // line的旋转角度(以度为单位,顺时针为正,范围为[-180, 180])
        float angle = line.getAngle() + ORIENTATION;
        // 检测到的文本的轴对齐边界矩形
        Rect boundingBox = line.getBoundingBox();
        // 从左上角开始顺时针方向的四个角点。不带旋转角度,如果设置过旋转角度camera.setDisplayOrientation,需要进行旋转
        Point[] cornerPoints = line.getCornerPoints();
        // 置信度
        float confidence = line.getConfidence();
        // 获取文本中的主要语言(如果有的话)
        String recognizedLanguage = line.getRecognizedLanguage();
        // 置信度太低的过滤掉
        if (confidence < 0.3f) {
            return;
        }
        for (Point cornerPoint : cornerPoints) {
            float[] floats = {cornerPoint.x, cornerPoint.y};
            matrix.mapPoints(floats);
            cornerPoint.x = (int) floats[0] + (WIDTH >> 1);
            cornerPoint.y = (int) floats[1] + (HEIGHT >> 1);
        }
        ocrArea.add(cornerPoints, line.getText());
        ocrArea.postInvalidate();
    }

    /**
     * Convert camera data into bitmap data.
     */
    private Bitmap convertToBitmap(Camera camera, byte[] data) {
        int width = camera.getParameters().getPreviewSize().width;
        int height = camera.getParameters().getPreviewSize().height;
        YuvImage yuv = new YuvImage(data, ImageFormat.NV21, width, height, null);
        ByteArrayOutputStream stream = new ByteArrayOutputStream();
        yuv.compressToJpeg(new Rect(0, 0, width, height), 100, stream);
        return BitmapFactory.decodeByteArray(stream.toByteArray(), 0, stream.toByteArray().length);
    }

    @Override
    protected void onResume() {
        super.onResume();
    }

    @Override
    protected void onRestart() {
        super.onRestart();
    }

    @Override
    protected void onDestroy() {
        super.onDestroy();
        if (recognizer != null) {
            recognizer.close();
        }
    }

    @Override
    public void surfaceCreated(@NonNull SurfaceHolder holder) {
        try {
            camera = Camera.open(Camera.CameraInfo.CAMERA_FACING_BACK);
            parameters = camera.getParameters();
            // 旋转了90度,所以height、width互换
            parameters.setPictureSize(HEIGHT, WIDTH);
            parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE);
            parameters.setPictureFormat(ImageFormat.NV21);
            camera.setPreviewDisplay(holder);
            camera.setDisplayOrientation(ORIENTATION);
            camera.setParameters(parameters);
        } catch (Exception exception) {
            Log.i(TAG, exception.getMessage());
        }
    }

    @Override
    public void surfaceChanged(@NonNull SurfaceHolder holder, int format, int width, int height) {
        if (camera != null) {
            camera.stopPreview();
            camera.setPreviewCallback(null);
            camera.startPreview();
            camera.setPreviewCallback(this);
            ocrArea.clear();
            stopRecognizer = true;
            ocrSwitch.performClick();
        }
    }

    @Override
    public void surfaceDestroyed(@NonNull SurfaceHolder holder) {
        if (camera != null) {
            camera.stopPreview();
            camera.setPreviewCallback(null);
            camera.release();
        }
    }
}

参考文章

  1. 文字识别 v2

http://www.kler.cn/a/471017.html

相关文章:

  • 加速物联网HMI革命,基于TouchGFX的高效GUI显示方案
  • 在macOS上安装MySQL
  • asio中strand用于串行执行task
  • 解锁编程智慧:23种设计模式案例分享
  • OpenCV轮廓相关操作API (C++)
  • Qt天气预报系统设计界面布局第四部分右边
  • H7-TOOL固件2.27发布,新增加40多款芯片脱机烧录,含多款车轨芯片,发布LUA API手册,CAN助手增加负载率,错误状态信息检测
  • Zookeeper是如何解决脑裂问题的?
  • 【首发 1day】WordPress Crypto 插件存在前台任意用户登录漏洞(CVE-2024-9989)
  • Pytest 变量渲染
  • Unity2D初级背包设计前篇 理论分析
  • 一文讲清计算机中的镜像,以及其在计算机中的作用
  • ARM发布Armv9.5架构:迈向更强性能与灵活性的新时代
  • YOLOv11改进 | 注意力篇 | YOLOv11引入24年空间和通道协同注意模块(SCSA),并构建C2PSA_SCSA
  • 在Spring Boot项目中使用Zookeeper和Curator实现高效、可靠的分布式锁
  • redis查看锁是否存在
  • 【数据库系统概论】数据库完整性与触发器--复习
  • Go Ebiten游戏库入门教程
  • 【NLP高频面题 - Transformer篇】什么是缩放点积注意力,为什么要除以根号d?
  • 开源人工智能模型框架:探索与实践
  • Leetcode打卡:不含特殊楼层的最大连续楼层数
  • 一文讲清楚PostgreSQL分区表
  • [openGauss 学废系列]-用户和模式的关系以及访问方式
  • Scala语言的语法
  • java实验6 J.U.C并发编程
  • jEasyUI 创建页脚摘要