Lucene(2):Springboot整合全文检索引擎TermInSetQuery应用实例附源码
前言
本章代码已分享至Gitee: https://gitee.com/lengcz/springbootlucene01
接上文。Lucene(1):Springboot整合全文检索引擎Lucene常规入门附源码
如何在指定范围内查询。从lucene 7 开始,filter 被弃用,导致无法进行调节过滤。
TermInSetQuery 指定集合条件过滤
如图,想要设定fromType为CSDN和小米,不需要查询其他来源的文字该怎么办?
前文提到的TermRangeQuery 属于数值范围的条件,这里显然不适用。
TermRangeQuery query2 = new TermRangeQuery("id", new BytesRef("1001".getBytes()), new BytesRef("1005".getBytes()), true, true);
builder.add(query2, BooleanClause.Occur.MUST);
我们需要使用TermInSetQuery
List<BytesRef> bytesRefList = Arrays.asList(new BytesRef("CSDN".getBytes()),new BytesRef("小米".getBytes()));
TermInSetQuery query3 = new TermInSetQuery("fromType",bytesRefList);
builder.add(query3, BooleanClause.Occur.MUST);
多关键词在多字段中搜索
//多条件查询构造
BooleanQuery.Builder builder = new BooleanQuery.Builder();
// // 条件一
// MultiFieldQueryParser parser = new MultiFieldQueryParser(str, new IKAnalyzer());
// 创建查询对象
// Query query = parser.parse(text);
// builder.add(query, BooleanClause.Occur.MUST);
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();//这里很重要,必须单独构建一个query,相当于预设一个括号,把几个关键词放到括号里
for (String key : text.split(",")) {
String fields[] = {"title", "description"};//在标题和描述中搜索
String kws[] = {key, key};
BooleanClause.Occur[] flags = new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
Query queryKey = MultiFieldQueryParser.parse(kws, fields, flags, new IKAnalyzer()); //通常就是关键词搜索
if (rule.equals("and")) { // and 或者 or
builder2.add(queryKey, BooleanClause.Occur.MUST); //相当于各关键词之间的关系是AND
} else {
builder2.add(queryKey, BooleanClause.Occur.SHOULD); /// 相当于各关键词之间的关系是OR
}
}
builder.add(builder2.build(), BooleanClause.Occur.MUST);
完整示例
/**
*
* @param text 关键词,多关键词逗号分割
* @param rule 规则, 多关键词之间的关系是and 还是or
* @return
* @throws IOException
* @throws ParseException
* @throws InvalidTokenOffsetsException
*/
@GetMapping("/searchTextMoreParam")
public List<BlogTitle> searchTextMoreParam(String text,String rule) throws IOException, ParseException, InvalidTokenOffsetsException {
String[] str = {"title", "description"};
Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("d:\\indexDir"));
// 索引读取工具
IndexReader reader = DirectoryReader.open(directory);
// 索引搜索工具
IndexSearcher searcher = new IndexSearcher(reader);
//多条件查询构造
BooleanQuery.Builder builder = new BooleanQuery.Builder();
// // 条件一
// MultiFieldQueryParser parser = new MultiFieldQueryParser(str, new IKAnalyzer());
// 创建查询对象
// Query query = parser.parse(text);
// builder.add(query, BooleanClause.Occur.MUST);
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();//这里很重要,必须单独构建一个query,相当于预设一个括号,把几个关键词放到括号里
for (String key : text.split(",")) {
String fields[] = {"title", "description"};
String kws[] = {key, key};
BooleanClause.Occur[] flags = new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
Query queryKey = MultiFieldQueryParser.parse(kws, fields, flags, new IKAnalyzer()); //通常就是关键词搜索
if (rule.equals("and")) { //
builder2.add(queryKey, BooleanClause.Occur.MUST); //相当于各关键词之间的关系是AND
} else {
builder2.add(queryKey, BooleanClause.Occur.SHOULD); /// 相当于各关键词之间的关系是OR
}
}
builder.add(builder2.build(), BooleanClause.Occur.MUST);
// 条件二
// TermQuery不使用分析器所以建议匹配不分词的Field域(StringField, )查询,比如价格、分类ID号等。这里只能演示个ID了。。。
// Query termQuery = new TermQuery(new Term("id", "1001"));
// builder.add(termQuery, BooleanClause.Occur.MUST);
// TermRangeQuery query2 = new TermRangeQuery("id", new BytesRef("1001".getBytes()), new BytesRef("1005".getBytes()), true, true);
// builder.add(query2, BooleanClause.Occur.MUST);
List<BytesRef> bytesRefList = Arrays.asList(new BytesRef("CSDN".getBytes()),new BytesRef("小米".getBytes()));
TermInSetQuery query3 = new TermInSetQuery("fromType",bytesRefList);
builder.add(query3, BooleanClause.Occur.MUST);
// 获取前十条记录
TopDocs topDocs = searcher.search(builder.build(), 100);
// 获取总条数
log.info("本次搜索共找到" + topDocs.totalHits + "条数据");
//高亮显示
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(builder2.build()));//高亮只是关键词,其他属于过滤条件
//高亮后的段落范围在100字内
Fragmenter fragmenter = new SimpleFragmenter(100);
highlighter.setTextFragmenter(fragmenter);
// 获取得分文档对象(ScoreDoc)数组.SocreDoc中包含:文档的编号、文档的得分
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
List<BlogTitle> list = new ArrayList<>();
for (ScoreDoc scoreDoc : scoreDocs) {
// 取出文档编号
int docId = scoreDoc.doc;
// 根据编号去找文档
Document doc = reader.document(docId);
BlogTitle content = selectById(doc.get("id"));
//处理高亮字段显示
String title = highlighter.getBestFragment(new IKAnalyzer(), "title", doc.get("title"));
if (title == null) {
title = content.getTitle();
}
String description = highlighter.getBestFragment(new IKAnalyzer(), "description", content.getDescription());
content.setDescription(description);
content.setTitle(title);
list.add(content);
}
return list;
}