spring-data-elasticsearch 3.2.4 实现桶bucket排序去重,实现指定字段的聚合搜索
一、背景
es索引有一个文档CourseIndex,下面是示意:
creatorId | grade | subject | name | no |
---|---|---|---|---|
1002 | 2 | 70 | 英语听力课程一 | N00232DS9 |
1004 | 3 | 80 | 数学口算课程 | N00209DK7 |
1003 | 4 | 80 | 物理竞赛课程 | N00642XS2 |
1002 | 2 | 80 | 英语听力课程二 | N00432WS3 |
1002 | 2 | 90 | 英语听力课程三 | N002312DP5 |
在搜索的时候,搜索条件包括creatorId列表,grade列表,subject列表等,且它们不是固定的字典,而是从文档CourseIndex的已有数据中获取。
假使上面的数据,搜索条件分别是:
creatorId列表
- 1002
- 1003
- 1004
grade列表
- 2
- 3
- 4
subject列表
- 70
- 80
- 90
总结一下需求,对es文档的数据进行桶bucket排序,以达到去重效果。
下面将介绍如何使用spring-data-elasticsearch 3.2.4实现对指定字段的聚合搜索。
pom.xml引入jar包
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-elasticsearch</artifactId>
<version>3.2.4.RELEASE</version>
</dependency>
二、CourseIndexAggrService.java
import lombok.RequiredArgsConstructor;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.ParsedLongTerms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.aggregation.AggregatedPage;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
/**
* @author xxx
*/
@Component
@RequiredArgsConstructor
public class CourseIndexAggrService {
private final ElasticsearchRestTemplate elasticsearchRestTemplate;
private final CommonConfig commonConfig;
private static final String UNIQUE_FIELD = "unique_field";
// 需要进行桶排序的字段
public static final String CREATOR_ID = "creatorId";
public static final String GRADE = "grade";
public static final String SUBJECT = "subject";
public List<String> findUniqueField(String uniqueField) {
NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
//TODO 这里boolQueryBuilder是一个空的查询条件
// 过滤掉已逻辑删除的记录
// boolQueryBuilder.filter(QueryBuilders.termQuery("deleted", 0));
queryBuilder.withQuery(boolQueryBuilder);
TermsAggregationBuilder termsAgg = AggregationBuilders.terms(UNIQUE_FIELD).field(uniqueField);
queryBuilder.addAggregation(termsAgg);
// idx_courseIndex是索引名
queryBuilder.withIndices("idx_courseIndex");
// CourseIndex是es文档类,见下文
AggregatedPage<CourseIndex> resultPage = elasticsearchRestTemplate.queryForPage(queryBuilder.build(), CourseIndex.class);
Aggregation aggregation = resultPage.getAggregation(UNIQUE_FIELD);
ParsedLongTerms terms = (ParsedLongTerms) aggregation;
// 获取桶
final List<? extends Terms.Bucket> buckets = terms.getBuckets();
// 提取唯一值
List<String> uniqueUserIds = new ArrayList<>();
for (Terms.Bucket bucket : buckets) {
uniqueUserIds.add(bucket.getKeyAsString());
}
return uniqueUserIds;
}
}
三、CourseIndex.java
这里略去了无关本文的字段。
import lombok.Data;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
import javax.persistence.Id;
import java.io.Serializable;
import java.util.Set;
/**
* 课程索引.
* <p>
* 索引检索条件,并非所有的字段
* </p>
*
* @author xxx
*/
@Data
@Document(indexName = "#{commonConfig.courseIdx}", type = "_doc", shards = 1, refreshInterval = "-1")
public class CourseIndex implements Serializable {
@Id
private String id;
/**
* 课程或讲次编号
*/
@Field(type = FieldType.Keyword)
private String no;
/**
* 创建者ID
*/
@Field(type = FieldType.Long)
private long creatorId;
/**
* 课程或讲次名称
*/
@Field(type = FieldType.Text)
private String name;
/**
* 科目
*/
@Field(type = FieldType.Integer)
private int subject;
/**
* 年级
*/
@Field(type = FieldType.Integer)
private int grade;
}
四、使用
// 查询创建者creatorId列表
final List<Long> userIds = courseIndexAggrService.findUniqueField(CREATOR_ID).stream().map(v -> Long.parseLong(v)).collect(Collectors.toList());
// 查询年级grade列表
final List<Integer> grades = courseIndexAggrService.findUniqueField(GRADE).stream().map(v -> Integer.parseInt(v)).collect(Collectors.toList());
// 查询科目subject列表
final List<Integer> subjects = courseIndexAggrService.findUniqueField(SUBJECT).stream().map(v -> Integer.parseInt(v)).collect(Collectors.toList());