当前位置：首页 > article >正文

基于Spring Boot的文字识别系统

article 2025/2/22 2:17:36

前端使用html+css+js，后端使用Spring Boot，数据库使用mysql，识别算法有两个，一个是使用百度OCR接口，一个是自己写一个python，用flask包装。
其中百度OCR接口可以去免费申请，然后把appid、apikey、secretKey填入application.properties即可

在这里插入图片描述

application.properties

spring.application.name=demo

# baidu_ocr_config
baidu.ocr.appid=your
baidu.ocr.apiKey=your
baidu.ocr.secretKey=your

# myself_ocr_model_config
myself.model.url=your flask

spring.thymeleaf.cache= false

# database
server.port=8080
spring.datasource.url=jdbc:mysql://localhost:3306/test?characterEncoding=utf-8
spring.datasource.username=root
spring.datasource.password=123456
spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver

#JPA
spring.jpa.show-sql=true
spring.jpa.hibernate.ddl-auto=update
spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect

# application.properties
spring.web.resources.static-locations=classpath:/static/,classpath:/demo/static/

数据库

在这里插入图片描述

0. pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
		 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<parent>
		<groupId>org.springframework.boot</groupId>
		<artifactId>spring-boot-starter-parent</artifactId>
		<version>3.3.2</version>
		<relativePath/> <!-- lookup parent from repository -->
	</parent>
	<groupId>com.example</groupId>
	<artifactId>demo</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<name>demo</name>
	<description>demo</description>
	<url/>
	<licenses>
		<license/>
	</licenses>
	<developers>
		<developer/>
	</developers>
	<scm>
		<connection/>
		<developerConnection/>
		<tag/>
		<url/>
	</scm>
	<properties>
		<java.version>17</java.version>
	</properties>

	<dependencies>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-web</artifactId>
		</dependency>

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>

		<!-- okhttp -->
		<dependency>
			<groupId>com.squareup.okhttp3</groupId>
			<artifactId>okhttp</artifactId>
			<version>4.12.0</version>
		</dependency>

		<!-- 引入Lombok依赖 -->
		<dependency>
			<groupId>org.projectlombok</groupId>
			<artifactId>lombok</artifactId>
			<optional>true</optional>
		</dependency>

		<!-- 百度人工智能依赖 -->
		<!-- https://mvnrepository.com/artifact/com.baidu.aip/java-sdk -->
		<dependency>
			<groupId>com.baidu.aip</groupId>
			<artifactId>java-sdk</artifactId>
			<version>4.11.3</version>
		</dependency>

		<!-- thymeleaf模板引擎 -->
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-thymeleaf</artifactId>
		</dependency>

		<!-- MySQL驱动 -->
		<dependency>
			<groupId>com.mysql</groupId>
			<artifactId>mysql-connector-j</artifactId>
			<scope>runtime</scope>
		</dependency>

		<!-- Spring Data JPA 依赖 -->
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-data-jpa</artifactId>
		</dependency>

	</dependencies>

	<build>
		<plugins>
			<plugin>
				<groupId>org.springframework.boot</groupId>
				<artifactId>spring-boot-maven-plugin</artifactId>
			</plugin>
		</plugins>
	</build>

</project>

1. 项目结构

在这里插入图片描述

config为一些配置设置
controller为控制层
dao为数据库实体层
service为服务层
css保存样式文件
js保存js文件
ocrImg是本地保存图片的位置，数据库里指存放图片在本地的地址

2. css代码

2.1 base.css

/* 去除常见标签默认的 margin 和 padding */
* {
  margin: 0;
  padding: 0;
  box-sizing: border-box;
}

/* 设置网页统一的字体大小、行高、字体系列相关属性 */
body {
  font: 16px/1.5  "Microsoft Yahei",
    "Hiragino Sans GB", "Heiti SC", "WenQuanYi Micro Hei", sans-serif;
  color: #333;
  background-color: #f5f7f8;
}

/* 去除列表默认样式 */
ul,
ol {
  list-style: none;
}

/* 去除默认的倾斜效果 */
em,
i {
  font-style: normal;
}

/* 去除a标签默认下划线，并设置默认文字颜色 */
a {
  text-decoration: none;
  color: #333;
}

/* 设置img的垂直对齐方式为居中对齐，去除img默认下间隙 */
img {
  width: 100%;
  height: 100%;
  vertical-align: middle;
}

/* 去除input默认样式 */
input {
  border: none;
  outline: none;
  color: #333;
}

h1,
h2,
h3,
h4,
h5,
h6 {
  font-weight: 400;
}

2.2 index.css

.container {
  width: 10rem;
  height: auto;
}

/* 顶部 */
.header {
  display: flex;
  padding: 0 .1563rem;
  width: 10rem;
  height: .3385rem;
  align-items: center;
  background-color: #fff;
}
.header img {
  width: 30px;
  height: 30px;
}
.header span {
  margin-left: .026rem;
  font-size: .1042rem;
}
/* 功能部分 */
.main {
  width: 10rem;
  height: auto;
  padding: 0 .1563rem;
}
.operation {
  display: flex;
  margin: .1563rem 0;
}
.operation input {
  display: none;
}
.operation button {
  display: none;
}
.operation label {
  display: flex;
  margin-right: .1042rem;
  width: .7813rem;
  height: .2604rem;
  border: .0052rem solid #cbc2c2;
  border-radius: .0417rem;
  align-items: center;
  cursor: pointer;
}
.operation label:hover {
  background-color: #d1f6ff;
}
.operation label .left {
  margin-right: .0417rem;
  padding: .0781rem 0;
  width: .2083rem;
  height: .2604rem;
  text-align: center;
}
.operation label .left svg {
  width: .1042rem;
  height: .1042rem;
}
.operation label .right h2 {
  font-size: .0833rem;
  font-weight: bold;
  color: #1296db;
}
.operation label .right h3 {
  font-size: .0625rem;
  color: #cbc2c2;
}
/* 主体 */
.function {
  width: 9.70rem;
  height: 3.125rem;
  border-radius: .0938rem;
  background-color: #fff;
  overflow: hidden;
  border: .0052rem solid #cbc2c2;
}
.function .title {
  display: flex;
  width: 100%;
  height: .2604rem;
  border-bottom: .0052rem solid #cbc2c2;
  align-items: center;
}
.function .title h2 {
  width: 50%;
  padding-left: .1042rem;
  font-size: .0833rem;
  font-weight: bold;
  color: #1f4d77;
}
.function .action {
  display: flex;
  width: 100%;
  height: 2.8646rem;
}
.function .action .source {
  width: 50%;
  height: 2.8646rem;
  border-right: .0052rem solid #cbc2c2;
}
.function .action .source img {
  width: 100%;
  height: 100%;
  object-fit: contain;
  margin: auto;
}
.function .action .result {
  width: 50%;
  height: 2.8646rem;
}
.function .action .result textarea{
  width: 100%;
  height: 100%;
  font-size: .0833rem;
  border: none;
}

/* 历史记录按钮 */
.circle-button {
  position: absolute;
  top : 75.5%;
  right: 0.18rem;
  bottom: 0.1064rem;
  width: 0.1583rem;
  height: 0.1583rem;
  border: 1px solid black;
  border-radius: 50%;
  display: flex;
  align-items: center;
  justify-content: center;
  cursor: pointer;
  background: transparent;
}

.circle-button::before {
  content: "";
  display: block;
  width: 0.1063rem; /* 内部圆的直径 */
  height: 0.1063rem;
  border: 1px solid black; /* 细细的黑色边框 */
  border-radius: 50%; /* 创建圆形效果 */
  background: transparent; /* 设置背景为透明 */
}

.circle-button:hover {
  border-color: #000; /* 鼠标悬停时保持黑色边框 */
}

#modelSelect {
  background: transparent; /* 可以设置背景颜色 */
  border: none; /* 移除边框 */
  padding: 0; /* 移除内边距 */
  margin: 0; /* 移除外边距 */
  font-size: .0833rem;
  font-weight: bold;
  color: #1f4d77;
  cursor: pointer; /* 改变鼠标指针 */
  position: relative; /* 为了定位伪元素 */
  display: inline-block; /* 使其与周围的元素在同一行显示 */
  /* 增加宽度以适应文本和箭头 */
  width: 100px; /* 或者使用您喜欢的宽度 */
  /* 设置高度，以确保箭头在垂直方向上居中 */
  height: 2em; /* 或者使用您喜欢的高度 */
  line-height: 2em; /* 文本垂直居中 */
}

/* 加载动画 */
.loading {
  border: 4px solid rgba(0, 0, 0, 0.1);
  border-top-color: #1296db;
  border-radius: 50%;
  width: 24px;
  height: 24px;
  animation: spin 1s linear infinite;
  position: absolute;
  top: 50%;
  left: 74%;
  transform: translate(-50%, -50%);
  display: none;
}

@keyframes spin {
  0% { transform: rotate(0deg); }
  100% { transform: rotate(360deg); }
}

2.3 re.css

.container {
  display: flex;
  flex-direction: column;
  width: 100vw;
  height: 100vh;
}

/* 顶部 */
.header {
  display: flex;
  padding: 0 1.563vw;
  width: 100vw;
  height: 3.385vw;
  align-items: center;
  background-color: #fff;
}
.header img {
  width: 30px;
  height: 30px;
}
.header span {
  margin-left: .26vw;
  font-size: 1.042vw;
}
/* 功能部分 */
.main {
  display: flex;
  flex: 1;
  width: 100vw;
}
.main .left {
  width: 10%;
  background-color: #fff;
}
.main .left .logs {
  width: 100%;
  height: 3vw;
  line-height: 3vw;
  text-align: center;
  background-color: #bdd7ee;
}
.main .left .logs span {
  font-weight: bold;
  color: #2e75b6;
}
.main .right {
  width: 90%;
  height: 100%;
  overflow-y: scroll;
}
.right .table {
  width: 100%;
  height: auto;
}
table
{
    border-collapse:collapse;
}
table, th, td
{
  border: 1px solid black;
}
.table .thead {
  width: 100%;
  height: 3vw;
  text-align: center;
  line-height: 3vw;
}
.table td {
  position: relative;
  text-align: center;
}
.table tbody tr {
  width: 100%;
  height: 10vw;
}
.table tbody tr:nth-child(2n-1){
  background-color: #fff;
}
.table tbody td:nth-child(1) {
  width: 5%;
}
.table tbody tr td:nth-child(2){
  width: 40%;
}
.table tbody tr td:nth-child(3){
  width: 30%;
}
.table tbody tr td:nth-child(4){
  width: 15%;
}
.table tbody tr td:nth-child(5){
  width: 10%;
}




.image-container {
  position: absolute;
  top: 50%;
  left: 50%;
  transform: translate(-50%, -50%); /* 将 image-container 移动到 td 的中心 */
  width: 470px; /* 或者指定一个宽度 */
  height: 130px; /* 或者指定一个高度 */
  overflow: hidden;
  display: flex;
  align-items: center;
  justify-content: center;
}

.image-container img {
  max-width: 100%; /* 图片的最大宽度为容器宽度 */
  max-height: 100%; /* 图片的最大高度为容器高度 */
  object-fit: contain; /* 保持图片的比例 */
}

3. JS 代码

3.1 doData.js

document.addEventListener('DOMContentLoaded', function() {

    const tableBody = document.getElementById('table-body');
    if (tableBody === null) {
        console.error('The element with id "table-body" is not found.');
    } else {
        fetch('http://localhost:8080/history')
            .then(response => response.json())
            .then(data => {
                //逆序
                data.sort((a,b) =>b.id-a.id);
                data.forEach(item => {
                    // 创建一个新的表格行
                    const row = document.createElement('tr');

                    // 创建序号单元格
                    const idCell = document.createElement('td');
                    idCell.textContent = item.id;
                    row.appendChild(idCell);

                    // 创建图片单元格
                    const imageCell = document.createElement('td');
                    const imgElement = document.createElement('img');

                    // 使用 item.imagedata 作为图片的 src
                    imgElement.src = item.imagedata;
                    imgElement.alt = "Image";

                    // 包裹在 div 中
                    const imageContainer = document.createElement('div');
                    imageContainer.className = 'image-container'; // 添加类名
                    imageContainer.appendChild(imgElement);
                    imageCell.appendChild(imageContainer);
                    row.appendChild(imageCell);

                    // 创建识别结果单元格
                    const textCell = document.createElement('td');
                    textCell.textContent = item.text;
                    row.appendChild(textCell);

                    // 创建识别时间单元格
                    const datetimeCell = document.createElement('td');
                    datetimeCell.textContent = item.datetime;
                    row.appendChild(datetimeCell);

                    // 创建识别模型单元格
                    const modelCell = document.createElement('td');
                    modelCell.textContent = item.modelname;
                    row.appendChild(modelCell);

                    // 将新行添加到表格中
                    tableBody.appendChild(row);
                });
            })
            .catch(error => console.error('Error fetching data:', error));
    }

    console.log(tableBody);
});

3.2 flexible.js

(function flexible (window, document) {
    var docEl = document.documentElement
    var dpr = window.devicePixelRatio || 1
  
    // adjust body font size
    function setBodyFontSize () {
      if (document.body) {
        document.body.style.fontSize = (12 * dpr) + 'px'
      }
      else {
        document.addEventListener('DOMContentLoaded', setBodyFontSize)
      }
    }
    setBodyFontSize();
  
    // set 1rem = viewWidth / 10
    function setRemUnit () {
      var rem = docEl.clientWidth / 10
      docEl.style.fontSize = rem + 'px'
    }
  
    setRemUnit()
  
    // reset rem unit on page resize
    window.addEventListener('resize', setRemUnit)
    window.addEventListener('pageshow', function (e) {
      if (e.persisted) {
        setRemUnit()
      }
    })
  
    // detect 0.5px supports
    if (dpr >= 2) {
      var fakeBody = document.createElement('body')
      var testElement = document.createElement('div')
      testElement.style.border = '.5px solid transparent'
      fakeBody.appendChild(testElement)
      docEl.appendChild(fakeBody)
      if (testElement.offsetHeight === 1) {
        docEl.classList.add('hairlines')
      }
      docEl.removeChild(fakeBody)
    }
  }(window, document))

3.3 index.js

// 预览上传图片
document.getElementById('imageInput').addEventListener('change', function (e) {
  let file = e.target.files[0];
  if (file) {
    let reader = new FileReader();
    reader.onload = function (e) {
      let image = new Image();
      image.src = e.target.result;
      image.onload = function () {
        document.getElementById('uploadedImage').src = image.src;
        document.getElementById('uploadedImage').style.display = 'block';
      };
    };
    reader.readAsDataURL(file);
  }
});


// 当点击识别按钮
document.getElementById('sumbutt').addEventListener('click', function () {
  let fileInput = document.getElementById('imageInput');
  if (fileInput.files.length > 0) {
    let formData = new FormData();
    formData.append('file', fileInput.files[0]);

    // 获取用户选择的OCR模型
    let modelSelect = document.getElementById('modelSelect');
    let selectedModel = modelSelect.value;

    showLoadingAnimation();
    // 调用函数发送文件到服务器进行OCR识别
    sendFileToServer(formData, selectedModel);
  } else {
    alert('请选择一个图片文件！');
  }
});


// 发送文件到服务器进行OCR识别
function sendFileToServer(formData, model) {
  let url;
  if (model === 'baidu') {
    url = 'http://localhost:8080/baiduOcr';
  } else if (model === 'myModel') {
    url = 'http://localhost:8080/myModel';
  }

  fetch(url, {
    method: 'POST',
    body: formData
  })
      .then(function (response) {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        }
        return response.text();
      })
      .then(function (text) {
        hideLoadingAnimation();
        console.log('OCR Result:', text);
        displayOcrResult(splitAndProcessText(text));
      })
      .catch(function (error) {
        hideLoadingAnimation();
        alert('识别失败，请稍后重试');
        console.error('There has been a problem with your fetch operation:', error);
      });
}


// 将OCR结果显示在页面上
function displayOcrResult(ocrData) {
  let resultMsgElement = document.getElementById('resultMsg');
  resultMsgElement.value = ocrData.join('\n');
}


// 将从后端收到的字符串分割和处理成数组
function splitAndProcessText(text) {
  return text.split(/\r?\n/);
}


// 显示加载动画
function showLoadingAnimation() {
  let loading = document.getElementById('loading');
  loading.style.display = 'block';
}


// 隐藏加载动画
function hideLoadingAnimation() {
  let loading = document.getElementById('loading');
  loading.style.display = 'none';
}


// 点击打开历史记录页面
let circleButton = document.querySelector('.circle-button');
circleButton.addEventListener('click', function() {
  window.open('re.html', '_blank');
});

4. 前端页面

4.1识别页面

在这里插入图片描述

4.1.1 index.html

<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <link rel="stylesheet" href="../static/css/base.css">
  <link rel="stylesheet" href="../static/css/index.css">
  <title>识别页面</title>
</head>

<body>
  <div class="container">
    <div class="header">
      <a href="index.html"><img src="../static/ocrImg/logo.jpg" alt="">
        <span>OCR文字识别</span>
      </a>
    </div>
    <div class="main">
      <!-- 操作 -->
      <div class="operation">
        <label for="imageInput">
          <div class="left">
            <svg t="1722243087697" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"
              p-id="2417" width="200" height="200">
              <path
                d="M839.647708 1016.974416H184.352292a98.358181 98.358181 0 0 1-97.719492-98.358182V107.480583A98.358181 98.358181 0 0 1 184.352292 9.122402h383.213694a97.080802 97.080802 0 0 1 63.868948 24.90889l275.27517 243.979385a95.803423 95.803423 0 0 1 32.573164 72.810602v567.794955a98.358181 98.358181 0 0 1-99.63556 98.358182zM184.352292 74.907419a32.573164 32.573164 0 0 0-32.573164 32.573164v811.135651a32.573164 32.573164 0 0 0 32.573164 32.573164h655.295416a32.573164 32.573164 0 0 0 32.573164-32.573164V350.821279a30.657095 30.657095 0 0 0-10.857722-23.631512l-274.63648-243.979384a35.127922 35.127922 0 0 0-21.715442-8.302964z"
                fill="#1296db" p-id="2418"></path>
              <path
                d="M448.131051 354.653416H288.458679a33.211853 33.211853 0 0 1 0-63.868949H448.131051a33.211853 33.211853 0 1 1 0 63.868949zM667.840235 547.537641H288.458679a32.573164 32.573164 0 0 1 0-63.868949h379.381556a32.573164 32.573164 0 0 1 0 63.868949zM667.840235 741.060556H288.458679a33.211853 33.211853 0 0 1 0-63.868949h379.381556a33.211853 33.211853 0 0 1 0 63.868949zM883.078593 359.124242h-319.344744a32.573164 32.573164 0 0 1-33.211854-32.573164V42.334255a33.211853 33.211853 0 1 1 63.868949 0v251.643659h285.494202a32.573164 32.573164 0 1 1 0 63.868949z"
                fill="#1296db" p-id="2419"></path>
            </svg>
          </div>
          <div class="right">
            <h2>上传图片</h2>
            <h3>JPG,PNG,JPEG</h3>
          </div>
        </label>
        <input type="file" id="imageInput" accept="image/png,image/jpeg,image/jpg" onchange="uploadImage(this)">
        <label for="sumbutt">
          <div class="left">
            <svg t="1722300478940" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"
              p-id="2849" width="200" height="200">
              <path
                d="M974.72 224L798.08 47.36a52.928 52.928 0 0 0-75.52 0l-603.52 603.52c-6.4 6.4-10.88 14.72-13.44 23.04L35.84 924.16c-5.12 18.56 0 39.04 14.08 52.48 10.24 10.24 23.68 15.36 37.12 15.36 5.12 0 10.24-0.64 15.36-1.92l246.4-73.6c8.32-2.56 16-7.04 22.4-13.44l603.52-603.52c21.12-20.48 21.12-54.4 0-75.52z m-113.28 37.76l-34.56 34.56-101.12-101.12 34.56-34.56 101.12 101.12z m-517.76 518.4L242.56 678.4l407.68-407.68 101.12 101.12-407.68 408.32z m-155.52-5.12l60.16 60.16-83.84 24.96 23.68-85.12z"
                fill="#1296db" p-id="2850"></path>
            </svg>
          </div>
          <div class="right">
            <h2>开始识别</h2>
          </div>
        </label>
        <button id="sumbutt" type="submit">识别</button>
      </div>
      <!-- 功能部分 -->
      <div class="function">
        <div class="title">
          <h2>识别图片</h2>
          <select id="modelSelect">
            <option value="baidu">百度OCR模型</option>
            <option value="myModel">自建OCR模型</option>
          </select>
        </div>
        <div class="action">
          <!-- 上传图片展示 -->
          <div class="source">
            <img id="uploadedImage">
          </div>
          <!-- 识别结果展示 -->
          <div class="result">
            <textarea name="" id="resultMsg" cols="30" rows="10"></textarea>
            <div class="circle-button"></div>
            <div class="loading" id="loading"></div>
          </div>
        </div>
      </div>
    </div>
  </div>
</body>

<script src="../static/js/index.js"></script>
<script src="../static/js/flexible.js"></script>

</html>

4.2 历史记录页面

在这里插入图片描述

4.2.1 re.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>历史记录</title>
    <link rel="stylesheet" href="../static/css/base.css">
    <link rel="stylesheet" href="../static/css/re.css">
    <script src="../static/js/doData.js"></script>
</head>
<body>
<div class="container">
    <div class="header">
        <a href="index.html">
            <img src="../static/ocrImg/logo.jpg" alt="">
            <span>OCR文字识别</span>
        </a>
    </div>
    <div class="main">
        <div class="left">
            <div class="logs">
                <span>识别记录</span>
            </div>
        </div>
        <div class="right">
            <table class="table">
                <thead class="thead">
                <tr>
                    <th>序号</th>
                    <th>图片</th>
                    <th>识别结果</th>
                    <th>识别时间</th>
                    <th>识别模型</th>
                </tr>
                </thead>
                <tbody id="table-body">
                </tbody>
            </table>
        </div>
    </div>
</div>

</body>
</html>

5. config文件夹

在这里插入图片描述

5.1 baiduOcrProperties

package com.example.demo.config;

import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;

@Data
@Configuration
@ConfigurationProperties(prefix = "baidu.ocr")
public class baiduOcrProperties {
    // 百度OCR的App ID
    private String appId;
    // 百度OCR的API Key
    private String apiKey;
    // 百度OCR的Secret Key
    private String secretKey;
}

5.2 CorsConfiguration

package com.example.demo.config;

import org.springframework.context.annotation.Configuration;
import org.springframework.web.servlet.config.annotation.CorsRegistry;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;

/**
 * 配置跨域请求
 */
@Configuration
public class CorsConfiguration implements WebMvcConfigurer {
    @Override
    public void addCorsMappings(CorsRegistry registry) {
        registry.addMapping("/**")
                .allowedOriginPatterns("*")
                .allowCredentials(true)
                .allowedMethods("GET", "POST", "DELETE", "PUT")
                .maxAge(3600);
    }
}

5.3 myselfModelProperties

package com.example.demo.config;

import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;

@Data
@Configuration
@ConfigurationProperties(prefix = "myself.model")
public class myselfModelProperties {
    // 识别模型地址
    private String url;
}

6.controller文件夹

在这里插入图片描述

6.1 ocrController

package com.example.demo.controller;

import com.example.demo.dao.imageTable;
import com.example.demo.service.imageService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;

import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import java.io.IOException;
import java.util.List;

@RestController
public class ocrController {

    private final imageService imageService;

    @Autowired
    public ocrController(imageService imageService) {
        this.imageService = imageService;
    }


    /**
     * 获取所有OCR识别记录。
     *
     * @return 所有OCR识别记录。
     */
    @GetMapping("/history")
    public List<imageTable> getAllImages() {
        return imageService.getAllImages();
    }


    /**
     * 使用自建OCR模型识别图片
     *
     * @param file 用户上传的文件
     * @return OCR识别结果。
     * @throws IOException 读取文件时发生错误
     */
    @PostMapping("/myModel")
    public ResponseEntity<String> myModel(MultipartFile file) throws IOException {
        String saveImgName= imageService.loadAndSaveImage(file);
        try {
            List<String> ocrResult = imageService.performMyselfOcrModel(file);//执行自建OCR识别
            // 将 List<String> 转换为一个单一的字符串
            StringBuilder resultString = new StringBuilder();
            for (String line : ocrResult) {
                resultString.append(line).append("\n");
            }
            String modelName="OcrNetV1";
            imageService.saveOcrData(modelName,saveImgName, resultString.toString());
            return ResponseEntity.ok(resultString.toString());
        } catch (Exception e) {
            e.printStackTrace();
            return ResponseEntity.status(500).body("Doing ocr is error：" + e.getMessage());// 返回一个带有错误信息的失败响应
        }
    }


    /**
     * 使用百度ocr处理图片文字识别请求。
     *
     * @param file  用户上传的文件
     * @return 识别结果。
     * @throws IOException 如果读取文件时发生错误。
     */
    @PostMapping(value = "/baiduOcr")
    public ResponseEntity<String> ocr(MultipartFile file) throws IOException {
        String saveImgName=imageService.loadAndSaveImage(file);

        try {
            List<String> ocrResult = imageService.performBaiduOcr(file);//执行百度OCR识别
            // 将 List<String> 转换为一个单一的字符串
            StringBuilder resultString = new StringBuilder();
            for (String line : ocrResult) {
                resultString.append(line).append("\n");
            }
            System.out.println(saveImgName);
            String modelName="BaiDuOcr";
            imageService.saveOcrData(modelName,saveImgName, resultString.toString());

            return ResponseEntity.ok(resultString.toString());
        } catch (Exception e) {
            e.printStackTrace();
            return ResponseEntity.status(500).body("Doing ocr is error：" + e.getMessage());// 返回一个带有错误信息的失败响应
        }

    }
}

7. dao文件夹

在这里插入图片描述

7.1 imageTable

package com.example.demo.dao;

import jakarta.persistence.*;
import lombok.Getter;
import lombok.Setter;

import static jakarta.persistence.GenerationType.IDENTITY;

@Getter
@Setter
@Entity
@Table(name = "OCRHISTORY")
public class imageTable {
    @Id
    @GeneratedValue(strategy = IDENTITY)
    @Column(name = "id", nullable = false)
    private Integer id;

    @Column(name = "modelname")
    private String modelname;

    @Column(name = "imagedata")
    private String imagedata;

    @Column(name = "datetime")
    private String datetime;

    @Column(name = "text")
    private String text;

}

7.2 imageTableRepository

package com.example.demo.dao;

import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;

@Repository
public interface imageTableRepository extends JpaRepository<imageTable, Integer> {

}

8.service文件夹

在这里插入图片描述

8.1 imageService

package com.example.demo.service;

import com.example.demo.dao.imageTable;
import org.springframework.web.multipart.MultipartFile;

import java.io.IOException;
import java.util.List;


public interface imageService {

    List<imageTable> getAllImages();//获取所有图片

    void saveOcrData(String modelName,String imagePath, String ocrResult);//保存图片识别结果

    String getFileContentAsBase64(MultipartFile file, boolean urlEncode) throws IOException;//将文件转换为Base64编码

    List<String> performBaiduOcr(MultipartFile file) throws Exception;//执行百度OCR识别

    List<String> performMyselfOcrModel(MultipartFile file) throws Exception;//执行自定义OCR识别

    String loadAndSaveImage(MultipartFile file) throws IOException;//加载并保存图片

}

8.2 imageServiceImpl

package com.example.demo.service;

import com.example.demo.dao.imageTableRepository;
import com.example.demo.dao.imageTable;
import com.example.demo.config.baiduOcrProperties;
import com.example.demo.config.myselfModelProperties;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import okhttp3.*;
import org.json.JSONObject;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.multipart.MultipartFile;

import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Date;
import java.util.List;

@Service
public class imageServiceImpl implements imageService{

    private final baiduOcrProperties baiduOcrProperties;
    static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder().build();
    private final imageTableRepository imageTableRepository;
    private final myselfModelProperties myselfModelProperties;

    // 初始化
    @Autowired
    public imageServiceImpl(baiduOcrProperties baiduOcrProperties, imageTableRepository imageTableRepository, com.example.demo.config.myselfModelProperties myselfModelProperties) {
        this.baiduOcrProperties = baiduOcrProperties;
        this.imageTableRepository = imageTableRepository;
        this.myselfModelProperties = myselfModelProperties;
    }

    /**
     * 导入图片、保存图片到本地
     *
     * @param file  用户上传的文件
     * @return 图片保存后的文件名
     * @throws IOException 读取文件时发生错误
     */
    public String loadAndSaveImage(MultipartFile file) throws IOException {
        System.out.println("<-------------->");
        System.out.println("Image upload ok!");
        System.out.println("<-------------->");
        System.out.println(" ");

        String contentType = file.getContentType();
        String[] parts = null;//格式为：image/png
        if (contentType != null) {
            parts = contentType.split("/");
        }else{
            System.out.println("<-------------->");
            System.out.println("Parts is null!");
            System.out.println("<-------------->");
            System.out.println(" ");
        }
        String fileExtension = null;//文件后缀
        if (parts != null) {
            fileExtension = parts[parts.length - 1];
        }else{
            System.out.println("<-------------->");
            System.out.println("Parts is null!");
            System.out.println("<-------------->");
            System.out.println(" ");
        }
        // 保存图片到本地
        Path path = Paths.get("D:/code/java/demo/src/main/resources/static/ocrImg");
        Files.createDirectories(path);
        String fileName = "image-" + System.currentTimeMillis() + '.'+fileExtension;
        String saveImgName="/demo/static/ocrImg/"+fileName;
        Files.copy(file.getInputStream(), path.resolve(fileName));

        Path imagePath = path.resolve(fileName);// 图片的本地路径
        // 检查文件是否存在
        if (Files.exists(imagePath)) {
            // 如果文件大小为0，说明文件可能有问题
            long fileSize = Files.size(imagePath);
            if (fileSize == 0) {
                System.err.println("The saved file is empty.");
            }
        } else {
            System.err.println("The file was not saved.");
        }
        return saveImgName;
    }

    /**
     * 获取所有图片的识别记录
     *
     * @return 所有图片的识别记录
     */
    @Override
    public List<imageTable> getAllImages() {
        return imageTableRepository.findAll();
    }

    /**
     * 将识别记录（路径、识别时间、识别结果）保存到数据库
     *
     * @param imagePath 图片在本地的存储路径
     * @param ocrResult 图片的识别记录
     */
    public void saveOcrData(String modelName,String imagePath, String ocrResult) {

        Date now = new Date();
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

        // 创建 imagesTable 实例
        imageTable ocrhistory = new imageTable();
        ocrhistory.setModelname(modelName);
        ocrhistory.setDatetime(dateFormat.format(now));
        ocrhistory.setImagedata(imagePath);
        ocrhistory.setText(ocrResult);

        // 保存到数据库
        imageTableRepository.save(ocrhistory);
    }

    /**
     * 获取文件base64编码
     *
     * @param file      文件
     * @param urlEncode 如果Content-Type是application/x-www-form-urlencoded时,传true
     * @return base64编码信息，不带文件头
     * @throws IOException IO异常
     */
    public String getFileContentAsBase64(MultipartFile file, boolean urlEncode) throws IOException {
        if (file == null){
            return "";
        }else{
            byte[] buf = file.getBytes();

            String base64 = Base64.getEncoder().encodeToString(buf);
            if (urlEncode) {
                base64 = URLEncoder.encode(base64, StandardCharsets.UTF_8);
            }
            return base64;
        }
    }

    /**
     * 连接自建OCR模型识别系统
     *
     * @param  fileBase64 base64编码信息，不带文件头
     * @return 识别结果
     */
    public String MyselfOcrModel(String fileBase64) {
        String re="";
        String modelUrl = myselfModelProperties.getUrl();
        HttpHeaders headers = new HttpHeaders();
        headers.setContentType(org.springframework.http.MediaType.APPLICATION_JSON);
        String requestBody = "{\"image\": \"" + fileBase64 + "\"}";
        HttpEntity<String> entity = new HttpEntity<>(requestBody, headers);
        RestTemplate restTemplate = new RestTemplate();
        ResponseEntity<String> response = restTemplate.postForEntity(modelUrl, entity, String.class);
        if(response.getBody() != null){
            re= response.getBody();
        }

        return re;
    }

    /**
     * 执行自建OCR模型识别
     *
     * @param file 需要识别的图片
     * @return 识别结果
     * @throws Exception 异常
     */
    @Override
    public List<String> performMyselfOcrModel(MultipartFile file) throws Exception {
        List<String> wordsList = new ArrayList<>();
        String fileBase64;
        fileBase64 = getFileContentAsBase64(file, false);
        if (fileBase64.isEmpty()){
            wordsList.add("File is NULL");
            return wordsList;
        }
        String res_json=MyselfOcrModel(fileBase64);
        return JsonToWordsList(wordsList, res_json);
    }




    /**
     * 从百度OCR服务获取Access Token。
     *
     * @return Access Token，用于身份验证。
     * @throws IOException 如果在获取Access Token过程中出现IO错误。
     */
    public String getAccessToken() throws IOException {
        String acc="";
        String apiKey=baiduOcrProperties.getApiKey();
        String secretKey=baiduOcrProperties.getSecretKey();
        MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
        RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + apiKey
                + "&client_secret=" + secretKey);
        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/oauth/2.0/token")
                .method("POST", body)
                .addHeader("Content-Type", "application/x-www-form-urlencoded")
                .build();
        Response response = HTTP_CLIENT.newCall(request).execute();
        if (response.body() != null) {
            acc=response.body().string();
        }
        return new JSONObject(acc).getString("access_token");
    }



    /**
     * 连接百度OCR识别系统
     *
     * @param fileBase64 base64编码信息，不带文件头
     * @return 百度OCR识别结果
     * @throws IOException IO异常
     */
    public String baiduOcr(String fileBase64) throws IOException{
        String res = "";
        System.out.println(fileBase64);
        MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
        RequestBody body = RequestBody.create(mediaType, "image="+fileBase64+"&detect_direction=false&paragraph=false&probability=false");
        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + getAccessToken())
                .method("POST", body)
                .addHeader("Content-Type", "application/x-www-form-urlencoded")
                .addHeader("Accept", "application/json")
                .build();
        Response response = HTTP_CLIENT.newCall(request).execute();
        if (response.body() != null) {
            res=response.body().string();
        }
        return res;

    }

    /**
     * 执行百度OCR识别操作。
     *
     * @param file 需要进行OCR识别的文件。
     * @return 识别到的文本列表。
     * @throws Exception 如果识别过程中出现错误，则抛出异常。
     */
    public List<String> performBaiduOcr(MultipartFile file) throws Exception {

        List<String> wordsList = new ArrayList<>();
        String fileBase64;
        fileBase64 = getFileContentAsBase64(file, true);
        if (fileBase64.isEmpty()){
            wordsList.add("File is NULL");
            return wordsList;
        }
        String res_json=baiduOcr(fileBase64);
        return JsonToWordsList(wordsList, res_json);
    }

    /**
     *
     * 将json数据转换为wordsList
     *
     * @param wordsList 存储识别结果的列表
     * @param res_json 识别模型回传的Json数据
     * @return wordsList
     */
    private List<String> JsonToWordsList(List<String> wordsList, String res_json) {
        try {
            ObjectMapper mapper = new ObjectMapper();
            JsonNode rootNode = mapper.readTree(res_json);
            JsonNode wordsResultNode = rootNode.get("words_result");

            for (JsonNode wordNode : wordsResultNode) {
                wordsList.add(wordNode.get("words").asText());
            }

            // 输出结果
            System.out.println("------wordsList------");
            for (String word : wordsList) {
                System.out.println(word);
            }
            System.out.println("------wordsList------");

        } catch (IOException e) {
            e.printStackTrace();
        }
        return wordsList;
    }


}