Springboot集成ElasticSearch实现minio文件内容全文检索
一、docker安装Elasticsearch
(1)springboot和Elasticsearch的版本对应关系如下,请看版本对应:
注意安装对应版本,否则可能会出现一些未知的错误。
(2)拉取镜像
docker pull elasticsearch:7.17.6
(3)运行容器
docker run -it -d --name elasticsearch -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms512m -Xmx1024m" -p 9200:9200 -p 9300:9300 elasticsearch:7.17.6
访问http://localhost:9200/,出现如下内容表示安装成功。
(4)安装中文分词器
进入容器:
docker exec -it elasticsearch bash
然后进入bin目录执行下载安装ik分词器命令:
elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.17.6/elasticsearch-analysis-ik-7.17.6.zip
退出bash并重启容器:
docker restart elasticsearch
二、安装kibana
Kibana 是为 Elasticsearch设计的开源分析和可视化平台。你可以使用 Kibana 来搜索,查看存储在 Elasticsearch 索引中的数据并与之交互。你可以很容易实现高级的数据分析和可视化,以图表的形式展现出来。
(1)拉取镜像
docker pull kibana:7.17.6
(2)运行容器
docker run --name kibana -p 5601:5601 --link elasticsearch:es -e "elasticsearch.hosts=http://es:9200" -d kibana:7.17.6
--link elasticsearch:es表示容器互联,即容器kibana连接到elasticsearch。
(3)使用kibana dev_tools发送http请求操作Elasticsearch
三、后端代码
(1)引入maven依赖
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
(2)application.yml配置
spring:
elasticsearch:
uris: http://localhost:9200
(3)实体类
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
import java.util.Date;
/**
* @author yangfeng
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
@Document(indexName = "file")
public class File {
@Id
private String id;
/**
* 文件名称
*/
@Field(type = FieldType.Text, analyzer = "ik_max_word")
private String fileName;
/**
* 文件分类
*/
@Field(type = FieldType.Keyword)
private String fileCategory;
/**
* 文件内容
*/
@Field(type = FieldType.Text, analyzer = "ik_max_word")
private String fileContent;
/**
* 文件存储路径
*/
@Field(type = FieldType.Keyword, index = false)
private String filePath;
/**
* 文件大小
*/
@Field(type = FieldType.Keyword, index = false)
private Long fileSize;
/**
* 文件类型
*/
@Field(type = FieldType.Keyword, index = false)
private String fileType;
/**
* 创建人
*/
@Field(type = FieldType.Keyword, index = false)
private String createBy;
/**
* 创建日期
*/
@Field(type = FieldType.Keyword, index = false)
private Date createTime;
/**
* 更新人
*/
@Field(type = FieldType.Keyword, index = false)
private String updateBy;
/**
* 更新日期
*/
@Field(type = FieldType.Keyword, index = false)
private Date updateTime;
}
(4)repository接口,继承ElasticsearchRepository
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.elasticsearch.annotations.Highlight;
import org.springframework.data.elasticsearch.annotations.HighlightField;
import org.springframework.data.elasticsearch.annotations.HighlightParameters;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
/**
* @author yangfeng
* @date: 2024年11月9日 15:29
*/
@Repository
public interface FileRepository extends ElasticsearchRepository<File, String> {
/**
* 关键字查询
*
* @return
*/
@Highlight(fields = {@HighlightField(name = "fileName"), @HighlightField(name = "fileContent")},
parameters = @HighlightParameters(preTags = {"<span style='color:red'>"}, postTags = {"</span>"}, numberOfFragments = 0))
List<SearchHit<File>> findByFileNameOrFileContent(String fileName, String fileContent, Pageable pageable);
}
(5)service接口
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.core.SearchHits;
import java.util.List;
/**
* description: ES文件服务
*
* @author yangfeng
* @version V1.0
* @date 2023-02-21
*/
public interface IFileService {
/**
* 保存文件
*/
void saveFile(String filePath, String fileCategory) throws Exception;
/**
* 关键字查询
*
* @return
*/
List<SearchHit<File>> search(FileDTO dto);
/**
* 关键字查询
*
* @return
*/
SearchHits<File> searchPage(FileDTO dto);
}
(6)service实现类
import cn.hutool.core.util.IdUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.shiro.SecurityUtils;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.jeecg.common.exception.JeecgBootException;
import org.jeecg.common.system.vo.LoginUser;
import org.jeecg.common.util.CommonUtils;
import org.jeecg.common.util.MinioUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.core.SearchHits;
import org.springframework.data.elasticsearch.core.query.NativeSearchQuery;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.stereotype.Service;
import java.io.InputStream;
import java.util.Date;
import java.util.List;
import java.util.Objects;
/**
* description: ES文件服务
*
* @author yangfeng
* @version V1.0
* @date 2023-02-21
*/
@Slf4j
@Service
public class FileServiceImpl implements IFileService {
@Autowired
private FileRepository fileRepository;
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
/**
* 保存文件
*/
@Override
public void saveFile(String filePath, String fileCategory) throws Exception {
if (Objects.isNull(filePath)) {
throw new JeecgBootException("文件不存在");
}
LoginUser user = (LoginUser) SecurityUtils.getSubject().getPrincipal();
String fileName = CommonUtils.getFileNameByUrl(filePath);
String fileType = StringUtils.isNotBlank(fileName) ? fileName.substring(fileName.lastIndexOf(".") + 1) : null;
InputStream inputStream = MinioUtil.getMinioFile(filePath);
// 读取文件内容,上传到es,方便后续的检索
String fileContent = FileUtils.readFileContent(inputStream, fileType);
File file = new File();
file.setId(IdUtil.getSnowflake(1, 1).nextIdStr());
file.setFileContent(fileContent);
file.setFileName(fileName);
file.setFilePath(filePath);
file.setFileType(fileType);
file.setFileCategory(fileCategory);
file.setCreateBy(user.getUsername());
file.setCreateTime(new Date());
fileRepository.save(file);
}
/**
* 关键字查询
*
* @return
*/
@Override
public List<SearchHit<File>> search(FileDTO dto) {
Pageable pageable = PageRequest.of(dto.getPageNo() - 1, dto.getPageSize(), Sort.Direction.DESC, "createTime");
return fileRepository.findByFileNameOrFileContent(dto.getKeyword(), dto.getKeyword(), pageable);
}
@Override
public SearchHits<File> searchPage(FileDTO dto) {
NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
queryBuilder.withQuery(QueryBuilders.multiMatchQuery(dto.getKeyword(), "fileName", "fileContent"));
// 设置高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
String[] fieldNames = {"fileName", "fileContent"};
for (String fieldName : fieldNames) {
highlightBuilder.field(fieldName);
}
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
highlightBuilder.order();
queryBuilder.withHighlightBuilder(highlightBuilder);
// 也可以添加分页和排序
queryBuilder.withSorts(SortBuilders.fieldSort("createTime").order(SortOrder.DESC))
.withPageable(PageRequest.of(dto.getPageNo() - 1, dto.getPageSize()));
NativeSearchQuery nativeSearchQuery = queryBuilder.build();
return elasticsearchRestTemplate.search(nativeSearchQuery, File.class);
}
}
(7)controller
import lombok.extern.slf4j.Slf4j;
import org.jeecg.common.api.vo.Result;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
/**
* 文件es操作
*
* @author yangfeng
* @since 2024-11-09
*/
@Slf4j
@RestController
@RequestMapping("/elasticsearch/file")
public class FileController {
@Autowired
private IFileService fileService;
/**
* 保存文件
*
* @return
*/
@PostMapping(value = "/saveFile")
public Result<?> saveFile(@RequestBody File file) throws Exception {
fileService.saveFile(file.getFilePath(), file.getFileCategory());
return Result.OK();
}
/**
* 关键字查询-repository
*
* @throws Exception
*/
@PostMapping(value = "/search")
public Result<?> search(@RequestBody FileDTO dto) {
return Result.OK(fileService.search(dto));
}
/**
* 关键字查询-原生方法
*
* @throws Exception
*/
@PostMapping(value = "/searchPage")
public Result<?> searchPage(@RequestBody FileDTO dto) {
return Result.OK(fileService.searchPage(dto));
}
}
(8)工具类
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
@Slf4j
public class FileUtils {
private static final List<String> FILE_TYPE;
static {
FILE_TYPE = Arrays.asList("pdf", "doc", "docx", "text");
}
public static String readFileContent(InputStream inputStream, String fileType) throws Exception{
if (!FILE_TYPE.contains(fileType)) {
return null;
}
// 使用PdfBox读取pdf文件内容
if ("pdf".equalsIgnoreCase(fileType)) {
return readPdfContent(inputStream);
} else if ("doc".equalsIgnoreCase(fileType) || "docx".equalsIgnoreCase(fileType)) {
return readDocOrDocxContent(inputStream);
} else if ("text".equalsIgnoreCase(fileType)) {
return readTextContent(inputStream);
}
return null;
}
private static String readPdfContent(InputStream inputStream) throws Exception {
// 加载PDF文档
PDDocument pdDocument = PDDocument.load(inputStream);
// 创建PDFTextStripper对象, 提取文本
PDFTextStripper textStripper = new PDFTextStripper();
// 提取文本
String content = textStripper.getText(pdDocument);
// 关闭PDF文档
pdDocument.close();
return content;
}
private static String readDocOrDocxContent(InputStream inputStream) {
try {
// 加载DOC文档
XWPFDocument document = new XWPFDocument(inputStream);
// 2. 提取文本内容
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
return extractor.getText();
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
private static String readTextContent(InputStream inputStream) {
StringBuilder content = new StringBuilder();
try (InputStreamReader isr = new InputStreamReader(inputStream, StandardCharsets.UTF_8)) {
int ch;
while ((ch = isr.read()) != -1) {
content.append((char) ch);
}
} catch (IOException e) {
e.printStackTrace();
return null;
}
return content.toString();
}
}
(9)dto
import lombok.Data;
@Data
public class FileDTO {
private String keyword;
private Integer pageNo;
private Integer pageSize;
}
四、前端代码
(1)查询组件封装
<template>
<a-input-search
v-model:value="pageInfo.keyword"
placeholder="全文检索"
@search="handleSearch"
style="width: 220px;margin-left:30px"
/>
<a-modal v-model:visible="showSearch" title="全文检索" width="900px" :footer="null"
destroy-on-close>
<SearchContent :items="searchItems" :loading="loading"/>
<div style="padding: 10px;display: flex;justify-content: flex-end">
<Pagination v-if="pageInfo.total" :pageSize="pageInfo.pageSize" :pageNo="pageInfo.pageNo"
:total="pageInfo.total" @pageChange="changePage" :show-total="total => `共 ${total} 条`"/>
</div>
</a-modal>
</template>
<script lang="ts" setup>
import {ref} from 'vue'
import {Pagination} from "ant-design-vue";
import SearchContent from "@/components/ElasticSearch/SearchContent.vue"
import {searchPage} from "@/api/sys/elasticsearch"
const loading = ref<boolean>(false)
const showSearch = ref<any>(false)
const searchItems = ref<any>();
const pageInfo = ref<{
pageNo: number;
pageSize: number;
keyword: string;
total: number;
}>({
// 当前页码
pageNo: 1,
// 当前每页显示多少条数据
pageSize: 10,
keyword: '',
total: 0,
});
async function handleSearch() {
if (!pageInfo.value.keyword) {
return;
}
pageInfo.value.pageNo = 1
showSearch.value = true
await getSearchItems();
}
function changePage(pageNo) {
pageInfo.value.pageNo = pageNo
getSearchItems();
}
async function getSearchItems() {
loading.value = true
try {
const res: any = await searchPage(pageInfo.value);
searchItems.value = res?.searchHits;
debugger
pageInfo.value.total = res?.totalHits
} finally {
loading.value = false
}
}
</script>
<style scoped></style>
(2)接口elasticsearch.ts
import {defHttp} from '/@/utils/http/axios';
enum Api {
saveFile = '/elasticsearch/file/saveFile',
searchPage = '/elasticsearch/file/searchPage',
}
/**
* 保存文件到es
* @param params
*/
export const saveFile = (params) => defHttp.post({
url: Api.saveFile,
params
});
/**
* 关键字查询-原生方法
* @param params
*/
export const searchPage = (params) => defHttp.post({
url: Api.searchPage,
params
},);
(3)搜索内容组件SearchContent.vue
<template>
<a-spin :spinning="loading">
<div class="searchContent">
<div v-for="(item,index) in items" :key="index" v-if="!!items.length > 0">
<a-card class="contentCard">
<template #title>
<a @click="detailSearch(item.content)">
<div class="flex" style="align-items: center">
<div>
<img src="../../assets/images/pdf.png" v-if="item?.content?.fileType=='pdf'" style="width: 20px"/>
<img src="../../assets/images/word.png" v-if="item?.content?.fileType=='word'" style="width: 20px"/>
<img src="../../assets/images/excel.png" v-if="item?.content?.fileType=='excel'" style="width: 20px"/>
</div>
<div style="margin-left:10px">
<article class="article" v-html="item.highlightFields.fileName"
v-if="item?.highlightFields?.fileName"></article>
<span v-else>{{ item?.content?.fileName }}</span>
</div>
</div>
</a>
</template>
<div class="item">
<article class="article" v-html="item.highlightFields.fileContent"
v-if="item?.highlightFields?.fileContent"></article>
<span v-else>{{
item?.content?.fileContent?.length > 150 ? item.content.fileContent.substring(0, 150) + '......' : item.content.fileContent
}}</span>
</div>
</a-card>
</div>
<EmptyData v-else/>
</div>
</a-spin>
</template>
<script lang="ts" setup>
import {useGlobSetting} from "@/hooks/setting";
import EmptyData from "/@/components/ElasticSearch/EmptyData.vue";
import {ref} from "vue";
const glob = useGlobSetting();
const props = defineProps({
loading: {
type: Boolean,
default: false
},
items: {
type: Array,
default: []
},
})
function detailSearch(searchItem) {
const url = ref(`${glob.domainUrl}/sys/common/pdf/preview/`);
window.open(url.value + searchItem.filePath + '#scrollbars=0&toolbar=0&statusbar=0', '_blank');
}
</script>
<style lang="less" scoped>
.searchContent {
min-height: 500px;
overflow-y: auto;
}
.contentCard {
margin: 10px 20px;
}
a {
color: black;
}
a:hover {
color: #3370ff;
}
:deep(.ant-card-body) {
padding: 13px;
}
</style>
五、效果展示