当前位置: 首页 > article >正文

es简单实现文章检索功能

使用的api是:Elasticsearch Java API client 8.0

官网:Package structure and namespace clients | Elasticsearch Java API Client [8.15] | Elastic

 

1.建立索引库

实现搜索功能字段:

  1. title:文章标题
  2. content:文章内容
  3. category:文章分类
PUT /search_test
{
  "mappings": {
    "properties": {
      "title":{
        "type": "text",
        "analyzer": "ik_max_word",
        "index": true
      },
      "content":{
        "type": "text",
        "analyzer": "ik_max_word",
         "index": true
      },
      "category":{
        "type": "keyword",
        "index": true
      }
    }
  }
}

2.代码实现

返回实体:

@Data
public class ContentSearchVO {

    @Schema(description = "标题")
    private String title;

    @Schema(description = "高亮内容部分,用于首页展示")
    private String highLightContent;

    @Schema(description = "内容")
    private String content;

}

请求实体:

@Data
public class SearchDTO {
    @Schema(description="搜索词")
    @NotBlank(message = "搜索词不能是空")
    private String text;

    @Schema(description="分类")
    private String category;

    private Integer pageNo;

    private Integer pageSize;

}

2.1基于模板形式

    @Override
    public List<ContentSearchVO> search(SearchDTO dto)  {
        String tags = dto.getCategory();
        if (tags != null){
            tags = String.format(SearchConstants.CATEGORY, tags) ;
        }else {
            tags = "";
        }
        //es内容搜索
        String query = String.format(SearchConstants.SEARCH_MAPPING, dto.getText(), dto.getText(), tags, dto.getPageNo(), dto.getPageSize());
        StringReader stringReader = new StringReader(query);
        SearchRequest searchRequest = SearchRequest.of(builder -> builder.index(SearchConstants.INDEX_NAME)
                .withJson(stringReader)
        );
        SearchResponse<Object> response = EsSearchUtil.search(searchRequest);
        List<ContentSearchVO> searchResult = EsSearchUtil.buildSearchResult(response);

        return searchResult;
    }

模板常量

public class SearchConstants {
    //索引库
    public static final String INDEX_NAME = "search_test";
    //类别模板
    public static final String CATEGORY =   "        ,\"term\": {\n" +
                                            "          \"category\": \"%s\"\n" +
                                            "        },\n";

    //搜索模板
    public static final String SEARCH_MAPPING = "{" +
            "  \"query\": {\n" +
            "    \"bool\": {\n" +
            "      \"must\":[ " +
            "        {\n" +
            "          \"match\": {\n" +
            "            \"title\": \"%s\"\n" +
            "            }\n" +
            "        },\n" +
            "        {\n" +
            "          \"match\": {\n" +
            "             \"content\": \"%s\"\n" +
            "             }\n" +
            "        }\n" +
                     "%s"+
            "     ]}\n" +
            "    },\n" +
            "    \"from\": %s,\n" +
            "    \"size\": %s,\n" +
            "  \"highlight\": {\n" +
            "    \"pre_tags\": \"<em>\",\n" +
            "    \"post_tags\": \"</em>\",\n" +
            "    \"fields\": {\n"+
            "      \"title\": {},\n" +
            "      \"content\": {}\n" +
            "      }\n" +
            "  }\n" +
            "}";

}

EsSearchUtil



@Component
@Slf4j
public class EsSearchUtil {

    private static ElasticsearchClient client;



    @Autowired
    public EsSearchUtil(ElasticsearchClient client) {
        EsSearchUtil.client = client;
    }

    /**
     * 数据检索
     */
    public static  SearchResponse<Object> search(SearchRequest searchRequest) {
        SearchResponse<Object> response;
        try {
            response = client.search(searchRequest, Object.class);
            log.debug("搜索返回结果:" + response.toString());
        } catch (IOException e) {
            log.error(e.toString());
            throw new RuntimeException("搜索服务出了点小差,请稍后再试", e);
        }
        return response;
    }

    /**
     * 构建搜索结果
     */
    public static <T> List<T> buildSearchResult(SearchResponse<Object> resp, Class<T> clazz) {
        if (resp.hits() == null || resp.hits().hits() == null) {
            return Collections.emptyList();
        }
        List<Hit<Object>> hits = resp.hits().hits();

        List<T> list = new ArrayList<>();
        for(Hit<Object> hit:hits) {
            //格式转换
            T t = ConvertUtil.objToObj(hit.source(), clazz);
            list.add(t);
        }
        return list;
    }


    public  static  List<ContentSearchVO> buildSearchResult(SearchResponse<Object> resp){
        List<Hit<Object>> hits = resp.hits().hits();
        List<ContentSearchVO> list = new ArrayList<>();
        for(Hit<Object> hit:hits){
            ContentSearchVO contentSearchVO = ConvertUtil.objToObj(hit.source(), ContentSearchVO.class);
            Map<String, List<String>> highlight = hit.highlight();

            if (!CollectionUtils.isEmpty(highlight)) {
                //标题
                List<String> highLightTitle = highlight.get("title");
                //内容
                List<String> highLightContent = highlight.get("content");
                //无高亮,就用内容替代
                if (!CollectionUtils.isEmpty(highLightTitle)){
                    StringBuilder highLightTitleStringBuilder = new StringBuilder();
                    for (String titleSegment : highLightTitle) {
                        highLightTitleStringBuilder.append(titleSegment);
                    }
                    contentSearchVO .setTitle(highLightTitleStringBuilder.toString());
                }
                if (!CollectionUtils.isEmpty(highLightContent)){
                    StringBuilder highLightContentStringBuilder = new StringBuilder();
                    for (String contentSegment : highLightContent) {
                        highLightContentStringBuilder.append(contentSegment);
                    }
                    contentSearchVO .setHighLightContent(highLightContentStringBuilder.toString());
                }else {
                    contentSearchVO .setHighLightContent(contentSearchVO.getContent());
                }
            }
            list.add(contentSearchVO);
        }
        return list;
    }


    /**
     *  分词解析
     * @param text 搜索词
     * @param index 索引库
     * @return 分词结果
     */
    public static List<String> analyze (String text,String index) {
        AnalyzeRequest analyzeRequest = new AnalyzeRequest.Builder()
                .index(index)
                .text(text)
                .analyzer("ik_max_word")  // ik_smart
                .build();
        AnalyzeResponse analyzeResponse;
        try {
            analyzeResponse = client.indices().analyze(analyzeRequest);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        List<AnalyzeToken> tokens = analyzeResponse.tokens();
        List<String> result = new ArrayList<>();
        // 分词结果
        for (AnalyzeToken token : tokens) {
            result.add(token.token());
        }
        return result;
    }
}

2.2 基于搜索api实现




@Service
@Slf4j
@RequiredArgsConstructor
public class ContentSearchServiceImpl implements ContentSearchService {

    private final SearchHotWordsService  searchHotWordsService;

    @Value("${search.highlightWords:500}")
    private Integer highlightWords;

    @Value("${search.words:500}")
    private Integer words;

    @Value("${search.searchWords:200}")
    private Integer searchWords;

    @Override
    public SearchVO search(SearchDTO dto)  {
        SearchVO result = new SearchVO();
       
        //构建bool
        BoolQuery boolQuery =  this.buildBoolQuery(dto);
        //构建高亮字段
        Highlight highlight = this.buildHighlight();
        //构建查询
        SearchRequest searchRequest = SearchRequest.of(s -> s.index(SearchConstants.INDEX_NAME)
                .query(q -> q
                        .bool(boolQuery)
                )
                .from((dto.getPageNo() - 1) * dto.getPageSize())
                .size(dto.getPageSize())
                .highlight(highlight)
        );
        SearchResponse<SearchDocument> response = EsSearchUtil.search(searchRequest,SearchDocument.class);
        List<ContentSearchVO> searchResult = this.buildSearchResult(response);
        if (!CollectionUtils.isEmpty(searchResult)){
            searchResult = this.sortSearchResult(searchResult);
        }
        result.setList(searchResult);
        return result;
    }

 

    /**
     *  构建bool
     * @param dto 查询参数
     * @return BoolQuery
     */
    private BoolQuery buildBoolQuery(SearchDTO dto) {
        String tags = dto.getTags();
        //要查询的字段
        List<String> queryFields = List.of(SearchConstants.FILE_NAME, SearchConstants.CONVERTED_TEXT);
        //构建bool查询
        BoolQuery.Builder boolBuilder = new BoolQuery.Builder().should(s -> s.multiMatch(mu -> mu.fields(queryFields).query(dto.getText())));
        if (tags != null){
            List<FieldValue> v = new ArrayList<>();
            String[] split = tags.split(",");
            for (String s : split) {
                v.add(FieldValue.of(s));
            }
            TermsQuery termsQuery = TermsQuery.of(t -> t.field(SearchConstants.TAGS).terms(tm -> tm.value(v)));
            boolBuilder.must(m -> m.terms(termsQuery));
        }
        return boolBuilder.build();
    }

    /**
     * 构建高亮字段
     * @return Highlight
     */
    private Highlight buildHighlight() {
        Map<String, HighlightField> map = new HashMap<>();
        map.put(SearchConstants.FILE_NAME, HighlightField.of(hf -> hf.preTags(SearchConstants.PRE_TAGS).postTags(SearchConstants.POST_TAGS)));
        map.put(SearchConstants.CONVERTED_TEXT, HighlightField.of(hf -> hf.preTags(SearchConstants.PRE_TAGS).postTags(SearchConstants.POST_TAGS)
                .numberOfFragments(1).fragmentSize(highlightWords)  //numberOfFragments(1),表示将字段分割为最多1个片段,并设置 fragmentSize(300),表示每个片段的大小为300个字符。
        ));

        return Highlight.of(
                h -> h.type(HighlighterType.Unified)
                        .order(HighlighterOrder.Score)
                        .fields(map)
        );

    }

    private List<ContentSearchVO> buildSearchResult(SearchResponse<SearchDocument> resp) {
        List<Hit<SearchDocument>> hits = resp.hits().hits();
        List<ContentSearchVO> list = new ArrayList<>();
        for(Hit<SearchDocument> hit:hits){
            SearchDocument searchDocument = JsonUtil.objToObj(hit.source(), SearchDocument.class);
            ContentSearchVO contentSearchVO =  this.searchAdapter(searchDocument);
            String content = contentSearchVO.getContent();
            Map<String, List<String>> highlight = hit.highlight();

            if (!CollectionUtils.isEmpty(highlight)) {
                //高亮标题
                List<String> highLightTitle = highlight.get(SearchConstants.FILE_NAME);
                //高亮内容
                List<String> highLightContent = highlight.get(SearchConstants.CONVERTED_TEXT);
                //标题
                if (!CollectionUtils.isEmpty(highLightTitle)){
                    StringBuilder highLightTitleStringBuilder = new StringBuilder();
                    for (String titleSegment : highLightTitle) {
                        highLightTitleStringBuilder.append(titleSegment);
                    }
                    contentSearchVO .setTitle(highLightTitleStringBuilder.toString());
                }
                //内容
                if (!CollectionUtils.isEmpty(highLightContent)){
                    StringBuilder highLightContentStringBuilder = new StringBuilder();
                    for (String titleSegment : highLightContent) {
                        highLightContentStringBuilder.append(titleSegment);
                    }
                     contentSearchVO .setContent(highlightContent);
                }else {
                    //无高亮字段---从头开始取一定数量的字
                    String s = this.replaceSymbol(content).replace(SearchConstants.LINE, "");
                    if (s.length() >= words){
                        s = s.substring(0,words);
                    }
                    contentSearchVO .setContent(s);
                }
            }else {
                //无高亮字段---从头开始取一定数量的字
                String s = this.replaceSymbol(content).replace(SearchConstants.LINE, "");
                if (s.length() >= words){
                    s = s.substring(0,words);
                }
                contentSearchVO .setContent(s);
            }
            list.add(contentSearchVO );
        }
        return list;

    }

    private ContentSearchVO searchAdapter(SearchDocument searchDocument) {
        ContentSearchVO contentSearchVO = new ContentSearchVO();
        contentSearchVO .setTitle(searchDocument.getFileName());
        contentSearchVO .setContent(searchDocument.getDocText());
        contentSearchVO .setFileId(searchDocument.getFileId());
        contentSearchVO .setTags(searchDocument.getTags());
        contentSearchVO .setTimestamp(searchDocument.getTimestamp());
        return contentSearchVO ;
    }

   

    private String replaceSymbol(String replaceStr) {
        return replaceStr.replaceAll("\t","")
                .replaceAll(" "," ")
                .replaceAll(" "," ")
                .replaceAll("(\\n\\s*)+", " ") //多重换行只保留一个
                .replaceAll("\\s+"," ")
                .replaceAll("-\\d+-", "") //去掉 页码
                ;
    }

}

http://www.kler.cn/a/353358.html

相关文章:

  • XML通过HTTP POST 请求发送到指定的 API 地址,进行数据回传
  • 【YOLOv5】源码(train.py)
  • [石榴翻译] 维吾尔语音识别 + TTS语音合成
  • 使用Docker部署nextjs应用
  • 【JAVA毕业设计】基于Vue和SpringBoot的渔具租赁系统
  • Spring Boot在医疗B2B平台中的病历数据安全
  • 【游戏模组】极品飞车12无间风云冬季mod,冬天版本的无间风云你体验过吗
  • llama大模型中,为什么推理部分使用kv cache,而训练部分不使用kv cache
  • 网络资源模板--Android Studio 实现简易计算器App
  • DS树与二叉树(8)
  • Java语法糖
  • Linux性能调优,还可以从这些方面入手
  • Linux虚拟机安装
  • pytorch与卷积神经网络实战笔记
  • Centos7 搭建单机elasticsearch
  • 【重学 MySQL】六十四、主键约束的使用
  • STM32嵌入式移植GmSSL库
  • 利用Spring Boot构建大创项目资源规划平台
  • 医药追溯码是什么?
  • Java多线程--实现跑马小游戏
  • 《学习方法报》是什么级别的报纸?
  • 鸿蒙网络编程系列4-实现Smtp邮件发送客户端
  • 海康NVR管理平台EasyNVR多品牌NVR管理工具实现智能化视频管理介入现代化工厂