ElasticSearch easy-es 聚合函数 group by 混合写法求Top N 词云 分词
1.将用户访问记录表数据同步到ES,并且分词,获取用户访问最多前十条词语。
Elasticsearch、Easy-es 快速入门 SearchAfterPage分页 若依前后端分离 Ruoyi-Vue SpringBoot
使用结巴分词器
<!-- 分词器-->
<dependency>
<groupId>com.huaban</groupId>
<artifactId>jieba-analysis</artifactId>
<version>1.0.2</version>
</dependency>
初始化数据
public void createAndInitToES() {
// 1.初始化-> 创建索引(相当于mysql中的表)
esTzLyViewCountMapper.deleteIndex("tz_ly_view_count");
esTzLyViewCountMapper.createIndex();
esTzLyViewCountWordFrequencyMapper.deleteIndex("tz_ly_view_count_word");
esTzLyViewCountWordFrequencyMapper.createIndex();
MPJLambdaWrapper<TzLyViewCount> tzLyViewCountWrapper = JoinWrappers.lambda(TzLyViewCount.class)
.selectAll(TzLyViewCount.class)
.selectAs("lyUser", TzLyUser::getGender, TzLyViewCount::getGender)
.selectAs("lyUser", TzLyUser::getEthnicity, TzLyViewCount::getEthnicity)
.selectAs("lyUser", TzLyUser::getPoliticalStatus, TzLyViewCount::getPoliticalStatus)
.selectAs("lyUser", TzLyUser::getDomain, TzLyViewCount::getDomain)
.selectAs("lyUser", TzLyUser::getAreaCode, TzLyViewCount::getAreaCode)
.leftJoin(TzLyUser.class, "lyUser", on -> on.eq(TzLyViewCount::getViewUserId, TzLyUser::getId));
//本地限制数据大小
tzLyViewCountWrapper.between(TzLyViewCount::getCreatedDate, DateUtil.parse("2024-01-01 00:00:00"), DateUtil.parse("2024-02-01 00:00:00"));
int currentPage = 1;
int pageSize = 1000;
while (true) {
PageUtils.startPage(currentPage, pageSize);
List<TzLyViewCount> list = tzLyViewCountMapper.selectList(tzLyViewCountWrapper);
// 如果当前页没有数据,说明查询结束
if (CollectionUtil.isEmpty(list)) {
break;
}
EsTzLyViewCount esTzLyViewCount = null;
List<EsTzLyViewCount> esTzLyViewCountList = new ArrayList<>(list.size() * 2);
for (TzLyViewCount viewCount : list) {
esTzLyViewCount = new EsTzLyViewCount();
BeanUtils.copyBeanProp(esTzLyViewCount, viewCount);
esTzLyViewCountList.add(esTzLyViewCount);
}
splitWordAndInsertEs(esTzLyViewCountList);
// 准备查询下一页
currentPage++;
}
}
public void splitWordAndInsertEs(List<EsTzLyViewCount> esTzLyViewCountList) {
JiebaSegmenter js = new JiebaSegmenter();
EsTzLyViewCountWord esTzLyViewCountWord = null;
List<EsTzLyViewCountWord> esTzLyViewCountWordList = new ArrayList<>();
for (EsTzLyViewCount esTzLyViewCount : esTzLyViewCountList) {
if (StringUtils.isNotBlank(esTzLyViewCount.getViewTitle())) {
List<String> wordList = js.sentenceProcess(esTzLyViewCount.getViewTitle());
for (String word : wordList) {
if (word.length() >= 2) {
esTzLyViewCountWord = new EsTzLyViewCountWord();
BeanUtils.copyBeanProp(esTzLyViewCountWord, esTzLyViewCount);
esTzLyViewCountWord.setViewTitleWord(word);
esTzLyViewCountWordList.add(esTzLyViewCountWord);
}
}
}
}
esTzLyViewCountWordFrequencyMapper.insertBatch(esTzLyViewCountWordList);
esTzLyViewCountMapper.insertBatch(esTzLyViewCountList);
}
easy-es 聚合函数 混合写法
/**
* 访问信息关键词
*/
public List<HashMap<String, Long>> accessInformationKeyword(StatisticsDateQueryVo statisticsDateQueryVo) {
LambdaEsQueryWrapper<EsTzLyViewCountWord> wrapper = new LambdaEsQueryWrapper<>();
//不返回任何文档
wrapper.size(0);
//限制文档聚合范围
if (ObjectUtils.isNotEmpty(statisticsDateQueryVo.getBgnTimestamp()) && StringUtils.isNotNull(statisticsDateQueryVo.getEndTimestamp())) {
Date bgnTimestamp = new Date(statisticsDateQueryVo.getBgnTimestamp().getTime());
Date endTimestamp = new Date(statisticsDateQueryVo.getEndTimestamp().getTime());
wrapper.between(EsTzLyViewCountWord::getCreatedDate, bgnTimestamp, endTimestamp);
} else {
// 获取当前年份(假设 statisticsDateQueryVo.getQueryTimestamp() 返回的是Date类型)
Date queryDate = statisticsDateQueryVo.getQueryTimestamp();
Calendar calendar = Calendar.getInstance();
calendar.setTime(queryDate);
int currentYear = calendar.get(Calendar.YEAR);
// 获取当前年份的第一天(使用Calendar来设置日期部分)
calendar.set(Calendar.YEAR, currentYear);
calendar.set(Calendar.MONTH, Calendar.JANUARY);
calendar.set(Calendar.DAY_OF_MONTH, 1);
calendar.set(Calendar.HOUR_OF_DAY, 0);
calendar.set(Calendar.MINUTE, 0);
calendar.set(Calendar.SECOND, 0);
calendar.set(Calendar.MILLISECOND, 0);
Date firstDayOfYear = calendar.getTime();
// 获取当前年份的最后一天(使用Calendar来设置日期部分)
calendar.set(Calendar.YEAR, currentYear);
calendar.set(Calendar.MONTH, Calendar.DECEMBER);
calendar.set(Calendar.DAY_OF_MONTH, 31);
calendar.set(Calendar.HOUR_OF_DAY, 23);
calendar.set(Calendar.MINUTE, 59);
calendar.set(Calendar.SECOND, 59);
calendar.set(Calendar.MILLISECOND, 999);
Date lastDayOfYear = calendar.getTime();
// 设置查询条件(这里的wrapper类似前面示例中的查询条件包装类,需根据实际调整)
wrapper.between(EsTzLyViewCountWord::getCreatedDate, firstDayOfYear, lastDayOfYear);
}
AggregationBuilder aggregation = AggregationBuilders
.terms("viewTitleWordAgg")// 自定义聚合名
.field("viewTitleWord") //聚合字段
// .order() 排序,默认降序
.size(10); //要十条记录
SearchSourceBuilder searchSourceBuilder = esTzLyViewCountWordFrequencyMapper.getSearchSourceBuilder(wrapper);
searchSourceBuilder.aggregation(aggregation);
wrapper.setSearchSourceBuilder(searchSourceBuilder);
SearchResponse searchResponse = esTzLyViewCountWordFrequencyMapper.search(wrapper);
List<HashMap<String, Long>> list = new ArrayList<>();
Aggregation viewTitleWordAgg = searchResponse.getAggregations().getAsMap().get("viewTitleWordAgg");
for (int i = 0; i < ((Terms) viewTitleWordAgg).getBuckets().size(); i++) {
Terms.Bucket bucket = ((Terms) viewTitleWordAgg).getBuckets().get(i);
HashMap<String, Long> hashMap = new HashMap<>();
hashMap.put(bucket.getKeyAsString(), bucket.getDocCount());
list.add(hashMap);
}
return list;
}