es简单实现文章检索功能
使用的api是:Elasticsearch Java API client 8.0
官网:Package structure and namespace clients | Elasticsearch Java API Client [8.15] | Elastic
1.建立索引库
实现搜索功能字段:
- title:文章标题
- content:文章内容
- category:文章分类
PUT /search_test
{
"mappings": {
"properties": {
"title":{
"type": "text",
"analyzer": "ik_max_word",
"index": true
},
"content":{
"type": "text",
"analyzer": "ik_max_word",
"index": true
},
"category":{
"type": "keyword",
"index": true
}
}
}
}
2.代码实现
返回实体:
@Data
public class ContentSearchVO {
@Schema(description = "标题")
private String title;
@Schema(description = "高亮内容部分,用于首页展示")
private String highLightContent;
@Schema(description = "内容")
private String content;
}
请求实体:
@Data
public class SearchDTO {
@Schema(description="搜索词")
@NotBlank(message = "搜索词不能是空")
private String text;
@Schema(description="分类")
private String category;
private Integer pageNo;
private Integer pageSize;
}
2.1基于模板形式
@Override
public List<ContentSearchVO> search(SearchDTO dto) {
String tags = dto.getCategory();
if (tags != null){
tags = String.format(SearchConstants.CATEGORY, tags) ;
}else {
tags = "";
}
//es内容搜索
String query = String.format(SearchConstants.SEARCH_MAPPING, dto.getText(), dto.getText(), tags, dto.getPageNo(), dto.getPageSize());
StringReader stringReader = new StringReader(query);
SearchRequest searchRequest = SearchRequest.of(builder -> builder.index(SearchConstants.INDEX_NAME)
.withJson(stringReader)
);
SearchResponse<Object> response = EsSearchUtil.search(searchRequest);
List<ContentSearchVO> searchResult = EsSearchUtil.buildSearchResult(response);
return searchResult;
}
模板常量
public class SearchConstants {
//索引库
public static final String INDEX_NAME = "search_test";
//类别模板
public static final String CATEGORY = " ,\"term\": {\n" +
" \"category\": \"%s\"\n" +
" },\n";
//搜索模板
public static final String SEARCH_MAPPING = "{" +
" \"query\": {\n" +
" \"bool\": {\n" +
" \"must\":[ " +
" {\n" +
" \"match\": {\n" +
" \"title\": \"%s\"\n" +
" }\n" +
" },\n" +
" {\n" +
" \"match\": {\n" +
" \"content\": \"%s\"\n" +
" }\n" +
" }\n" +
"%s"+
" ]}\n" +
" },\n" +
" \"from\": %s,\n" +
" \"size\": %s,\n" +
" \"highlight\": {\n" +
" \"pre_tags\": \"<em>\",\n" +
" \"post_tags\": \"</em>\",\n" +
" \"fields\": {\n"+
" \"title\": {},\n" +
" \"content\": {}\n" +
" }\n" +
" }\n" +
"}";
}
EsSearchUtil
@Component
@Slf4j
public class EsSearchUtil {
private static ElasticsearchClient client;
@Autowired
public EsSearchUtil(ElasticsearchClient client) {
EsSearchUtil.client = client;
}
/**
* 数据检索
*/
public static SearchResponse<Object> search(SearchRequest searchRequest) {
SearchResponse<Object> response;
try {
response = client.search(searchRequest, Object.class);
log.debug("搜索返回结果:" + response.toString());
} catch (IOException e) {
log.error(e.toString());
throw new RuntimeException("搜索服务出了点小差,请稍后再试", e);
}
return response;
}
/**
* 构建搜索结果
*/
public static <T> List<T> buildSearchResult(SearchResponse<Object> resp, Class<T> clazz) {
if (resp.hits() == null || resp.hits().hits() == null) {
return Collections.emptyList();
}
List<Hit<Object>> hits = resp.hits().hits();
List<T> list = new ArrayList<>();
for(Hit<Object> hit:hits) {
//格式转换
T t = ConvertUtil.objToObj(hit.source(), clazz);
list.add(t);
}
return list;
}
public static List<ContentSearchVO> buildSearchResult(SearchResponse<Object> resp){
List<Hit<Object>> hits = resp.hits().hits();
List<ContentSearchVO> list = new ArrayList<>();
for(Hit<Object> hit:hits){
ContentSearchVO contentSearchVO = ConvertUtil.objToObj(hit.source(), ContentSearchVO.class);
Map<String, List<String>> highlight = hit.highlight();
if (!CollectionUtils.isEmpty(highlight)) {
//标题
List<String> highLightTitle = highlight.get("title");
//内容
List<String> highLightContent = highlight.get("content");
//无高亮,就用内容替代
if (!CollectionUtils.isEmpty(highLightTitle)){
StringBuilder highLightTitleStringBuilder = new StringBuilder();
for (String titleSegment : highLightTitle) {
highLightTitleStringBuilder.append(titleSegment);
}
contentSearchVO .setTitle(highLightTitleStringBuilder.toString());
}
if (!CollectionUtils.isEmpty(highLightContent)){
StringBuilder highLightContentStringBuilder = new StringBuilder();
for (String contentSegment : highLightContent) {
highLightContentStringBuilder.append(contentSegment);
}
contentSearchVO .setHighLightContent(highLightContentStringBuilder.toString());
}else {
contentSearchVO .setHighLightContent(contentSearchVO.getContent());
}
}
list.add(contentSearchVO);
}
return list;
}
/**
* 分词解析
* @param text 搜索词
* @param index 索引库
* @return 分词结果
*/
public static List<String> analyze (String text,String index) {
AnalyzeRequest analyzeRequest = new AnalyzeRequest.Builder()
.index(index)
.text(text)
.analyzer("ik_max_word") // ik_smart
.build();
AnalyzeResponse analyzeResponse;
try {
analyzeResponse = client.indices().analyze(analyzeRequest);
} catch (IOException e) {
throw new RuntimeException(e);
}
List<AnalyzeToken> tokens = analyzeResponse.tokens();
List<String> result = new ArrayList<>();
// 分词结果
for (AnalyzeToken token : tokens) {
result.add(token.token());
}
return result;
}
}
2.2 基于搜索api实现
@Service
@Slf4j
@RequiredArgsConstructor
public class ContentSearchServiceImpl implements ContentSearchService {
private final SearchHotWordsService searchHotWordsService;
@Value("${search.highlightWords:500}")
private Integer highlightWords;
@Value("${search.words:500}")
private Integer words;
@Value("${search.searchWords:200}")
private Integer searchWords;
@Override
public SearchVO search(SearchDTO dto) {
SearchVO result = new SearchVO();
//构建bool
BoolQuery boolQuery = this.buildBoolQuery(dto);
//构建高亮字段
Highlight highlight = this.buildHighlight();
//构建查询
SearchRequest searchRequest = SearchRequest.of(s -> s.index(SearchConstants.INDEX_NAME)
.query(q -> q
.bool(boolQuery)
)
.from((dto.getPageNo() - 1) * dto.getPageSize())
.size(dto.getPageSize())
.highlight(highlight)
);
SearchResponse<SearchDocument> response = EsSearchUtil.search(searchRequest,SearchDocument.class);
List<ContentSearchVO> searchResult = this.buildSearchResult(response);
if (!CollectionUtils.isEmpty(searchResult)){
searchResult = this.sortSearchResult(searchResult);
}
result.setList(searchResult);
return result;
}
/**
* 构建bool
* @param dto 查询参数
* @return BoolQuery
*/
private BoolQuery buildBoolQuery(SearchDTO dto) {
String tags = dto.getTags();
//要查询的字段
List<String> queryFields = List.of(SearchConstants.FILE_NAME, SearchConstants.CONVERTED_TEXT);
//构建bool查询
BoolQuery.Builder boolBuilder = new BoolQuery.Builder().should(s -> s.multiMatch(mu -> mu.fields(queryFields).query(dto.getText())));
if (tags != null){
List<FieldValue> v = new ArrayList<>();
String[] split = tags.split(",");
for (String s : split) {
v.add(FieldValue.of(s));
}
TermsQuery termsQuery = TermsQuery.of(t -> t.field(SearchConstants.TAGS).terms(tm -> tm.value(v)));
boolBuilder.must(m -> m.terms(termsQuery));
}
return boolBuilder.build();
}
/**
* 构建高亮字段
* @return Highlight
*/
private Highlight buildHighlight() {
Map<String, HighlightField> map = new HashMap<>();
map.put(SearchConstants.FILE_NAME, HighlightField.of(hf -> hf.preTags(SearchConstants.PRE_TAGS).postTags(SearchConstants.POST_TAGS)));
map.put(SearchConstants.CONVERTED_TEXT, HighlightField.of(hf -> hf.preTags(SearchConstants.PRE_TAGS).postTags(SearchConstants.POST_TAGS)
.numberOfFragments(1).fragmentSize(highlightWords) //numberOfFragments(1),表示将字段分割为最多1个片段,并设置 fragmentSize(300),表示每个片段的大小为300个字符。
));
return Highlight.of(
h -> h.type(HighlighterType.Unified)
.order(HighlighterOrder.Score)
.fields(map)
);
}
private List<ContentSearchVO> buildSearchResult(SearchResponse<SearchDocument> resp) {
List<Hit<SearchDocument>> hits = resp.hits().hits();
List<ContentSearchVO> list = new ArrayList<>();
for(Hit<SearchDocument> hit:hits){
SearchDocument searchDocument = JsonUtil.objToObj(hit.source(), SearchDocument.class);
ContentSearchVO contentSearchVO = this.searchAdapter(searchDocument);
String content = contentSearchVO.getContent();
Map<String, List<String>> highlight = hit.highlight();
if (!CollectionUtils.isEmpty(highlight)) {
//高亮标题
List<String> highLightTitle = highlight.get(SearchConstants.FILE_NAME);
//高亮内容
List<String> highLightContent = highlight.get(SearchConstants.CONVERTED_TEXT);
//标题
if (!CollectionUtils.isEmpty(highLightTitle)){
StringBuilder highLightTitleStringBuilder = new StringBuilder();
for (String titleSegment : highLightTitle) {
highLightTitleStringBuilder.append(titleSegment);
}
contentSearchVO .setTitle(highLightTitleStringBuilder.toString());
}
//内容
if (!CollectionUtils.isEmpty(highLightContent)){
StringBuilder highLightContentStringBuilder = new StringBuilder();
for (String titleSegment : highLightContent) {
highLightContentStringBuilder.append(titleSegment);
}
contentSearchVO .setContent(highlightContent);
}else {
//无高亮字段---从头开始取一定数量的字
String s = this.replaceSymbol(content).replace(SearchConstants.LINE, "");
if (s.length() >= words){
s = s.substring(0,words);
}
contentSearchVO .setContent(s);
}
}else {
//无高亮字段---从头开始取一定数量的字
String s = this.replaceSymbol(content).replace(SearchConstants.LINE, "");
if (s.length() >= words){
s = s.substring(0,words);
}
contentSearchVO .setContent(s);
}
list.add(contentSearchVO );
}
return list;
}
private ContentSearchVO searchAdapter(SearchDocument searchDocument) {
ContentSearchVO contentSearchVO = new ContentSearchVO();
contentSearchVO .setTitle(searchDocument.getFileName());
contentSearchVO .setContent(searchDocument.getDocText());
contentSearchVO .setFileId(searchDocument.getFileId());
contentSearchVO .setTags(searchDocument.getTags());
contentSearchVO .setTimestamp(searchDocument.getTimestamp());
return contentSearchVO ;
}
private String replaceSymbol(String replaceStr) {
return replaceStr.replaceAll("\t","")
.replaceAll(" "," ")
.replaceAll(" "," ")
.replaceAll("(\\n\\s*)+", " ") //多重换行只保留一个
.replaceAll("\\s+"," ")
.replaceAll("-\\d+-", "") //去掉 页码
;
}
}