当前位置: 首页 > article >正文

ElasticSearch easy-es 聚合函数 group by 混合写法求Top N 词云 分词

1.将用户访问记录表数据同步到ES,并且分词,获取用户访问最多前十条词语。

Elasticsearch、Easy-es 快速入门 SearchAfterPage分页 若依前后端分离 Ruoyi-Vue SpringBoot

使用结巴分词器

        <!-- 分词器-->
        <dependency>
            <groupId>com.huaban</groupId>
            <artifactId>jieba-analysis</artifactId>
            <version>1.0.2</version>
        </dependency>

初始化数据



    public void createAndInitToES() {
        // 1.初始化-> 创建索引(相当于mysql中的表)
        esTzLyViewCountMapper.deleteIndex("tz_ly_view_count");
        esTzLyViewCountMapper.createIndex();
        esTzLyViewCountWordFrequencyMapper.deleteIndex("tz_ly_view_count_word");
        esTzLyViewCountWordFrequencyMapper.createIndex();
        MPJLambdaWrapper<TzLyViewCount> tzLyViewCountWrapper = JoinWrappers.lambda(TzLyViewCount.class)
                .selectAll(TzLyViewCount.class)
                .selectAs("lyUser", TzLyUser::getGender, TzLyViewCount::getGender)
                .selectAs("lyUser", TzLyUser::getEthnicity, TzLyViewCount::getEthnicity)
                .selectAs("lyUser", TzLyUser::getPoliticalStatus, TzLyViewCount::getPoliticalStatus)
                .selectAs("lyUser", TzLyUser::getDomain, TzLyViewCount::getDomain)
                .selectAs("lyUser", TzLyUser::getAreaCode, TzLyViewCount::getAreaCode)
                .leftJoin(TzLyUser.class, "lyUser", on -> on.eq(TzLyViewCount::getViewUserId, TzLyUser::getId));

        //本地限制数据大小
        tzLyViewCountWrapper.between(TzLyViewCount::getCreatedDate, DateUtil.parse("2024-01-01 00:00:00"), DateUtil.parse("2024-02-01 00:00:00"));

        int currentPage = 1;
        int pageSize = 1000;

        while (true) {
            PageUtils.startPage(currentPage, pageSize);
            List<TzLyViewCount> list = tzLyViewCountMapper.selectList(tzLyViewCountWrapper);
            // 如果当前页没有数据,说明查询结束
            if (CollectionUtil.isEmpty(list)) {
                break;
            }
            EsTzLyViewCount esTzLyViewCount = null;
            List<EsTzLyViewCount> esTzLyViewCountList = new ArrayList<>(list.size() * 2);
            for (TzLyViewCount viewCount : list) {
                esTzLyViewCount = new EsTzLyViewCount();
                BeanUtils.copyBeanProp(esTzLyViewCount, viewCount);
                esTzLyViewCountList.add(esTzLyViewCount);
            }
            splitWordAndInsertEs(esTzLyViewCountList);
            // 准备查询下一页
            currentPage++;
        }
    }

    public void splitWordAndInsertEs(List<EsTzLyViewCount> esTzLyViewCountList) {
        JiebaSegmenter js = new JiebaSegmenter();
        EsTzLyViewCountWord esTzLyViewCountWord = null;
        List<EsTzLyViewCountWord> esTzLyViewCountWordList = new ArrayList<>();
        for (EsTzLyViewCount esTzLyViewCount : esTzLyViewCountList) {
            if (StringUtils.isNotBlank(esTzLyViewCount.getViewTitle())) {
                List<String> wordList = js.sentenceProcess(esTzLyViewCount.getViewTitle());
                for (String word : wordList) {
                    if (word.length() >= 2) {
                        esTzLyViewCountWord = new EsTzLyViewCountWord();
                        BeanUtils.copyBeanProp(esTzLyViewCountWord, esTzLyViewCount);
                        esTzLyViewCountWord.setViewTitleWord(word);
                        esTzLyViewCountWordList.add(esTzLyViewCountWord);
                    }
                }
            }
        }
        esTzLyViewCountWordFrequencyMapper.insertBatch(esTzLyViewCountWordList);
        esTzLyViewCountMapper.insertBatch(esTzLyViewCountList);
    }
easy-es 聚合函数 混合写法
 /**
     * 访问信息关键词
     */
    public List<HashMap<String, Long>> accessInformationKeyword(StatisticsDateQueryVo statisticsDateQueryVo) {
        LambdaEsQueryWrapper<EsTzLyViewCountWord> wrapper = new LambdaEsQueryWrapper<>();

        //不返回任何文档
        wrapper.size(0);
        //限制文档聚合范围
        if (ObjectUtils.isNotEmpty(statisticsDateQueryVo.getBgnTimestamp()) && StringUtils.isNotNull(statisticsDateQueryVo.getEndTimestamp())) {
            Date bgnTimestamp = new Date(statisticsDateQueryVo.getBgnTimestamp().getTime());
            Date endTimestamp = new Date(statisticsDateQueryVo.getEndTimestamp().getTime());
            wrapper.between(EsTzLyViewCountWord::getCreatedDate, bgnTimestamp, endTimestamp);
        } else {
            // 获取当前年份(假设 statisticsDateQueryVo.getQueryTimestamp() 返回的是Date类型)
            Date queryDate = statisticsDateQueryVo.getQueryTimestamp();
            Calendar calendar = Calendar.getInstance();
            calendar.setTime(queryDate);
            int currentYear = calendar.get(Calendar.YEAR);

            // 获取当前年份的第一天(使用Calendar来设置日期部分)
            calendar.set(Calendar.YEAR, currentYear);
            calendar.set(Calendar.MONTH, Calendar.JANUARY);
            calendar.set(Calendar.DAY_OF_MONTH, 1);
            calendar.set(Calendar.HOUR_OF_DAY, 0);
            calendar.set(Calendar.MINUTE, 0);
            calendar.set(Calendar.SECOND, 0);
            calendar.set(Calendar.MILLISECOND, 0);
            Date firstDayOfYear = calendar.getTime();

            // 获取当前年份的最后一天(使用Calendar来设置日期部分)
            calendar.set(Calendar.YEAR, currentYear);
            calendar.set(Calendar.MONTH, Calendar.DECEMBER);
            calendar.set(Calendar.DAY_OF_MONTH, 31);
            calendar.set(Calendar.HOUR_OF_DAY, 23);
            calendar.set(Calendar.MINUTE, 59);
            calendar.set(Calendar.SECOND, 59);
            calendar.set(Calendar.MILLISECOND, 999);
            Date lastDayOfYear = calendar.getTime();
            // 设置查询条件(这里的wrapper类似前面示例中的查询条件包装类,需根据实际调整)
            wrapper.between(EsTzLyViewCountWord::getCreatedDate, firstDayOfYear, lastDayOfYear);
        }
        AggregationBuilder aggregation = AggregationBuilders
                .terms("viewTitleWordAgg")// 自定义聚合名
                .field("viewTitleWord") //聚合字段
//                .order() 排序,默认降序
                .size(10); //要十条记录
        SearchSourceBuilder searchSourceBuilder = esTzLyViewCountWordFrequencyMapper.getSearchSourceBuilder(wrapper);
        searchSourceBuilder.aggregation(aggregation);
        wrapper.setSearchSourceBuilder(searchSourceBuilder);
        SearchResponse searchResponse = esTzLyViewCountWordFrequencyMapper.search(wrapper);
        List<HashMap<String, Long>> list = new ArrayList<>();

        Aggregation viewTitleWordAgg = searchResponse.getAggregations().getAsMap().get("viewTitleWordAgg");
        for (int i = 0; i < ((Terms) viewTitleWordAgg).getBuckets().size(); i++) {
            Terms.Bucket bucket = ((Terms) viewTitleWordAgg).getBuckets().get(i);
            HashMap<String, Long> hashMap = new HashMap<>();
            hashMap.put(bucket.getKeyAsString(), bucket.getDocCount());
            list.add(hashMap);
        }
        return list;
    }

http://www.kler.cn/a/422142.html

相关文章:

  • Secured Finance 推出 TVL 激励计划以及基于 FIL 的稳定币
  • FPGA Xilinx维特比译码器实现卷积码译码
  • git查看本地库对应的远端库的地址
  • nginx配置笔记
  • WEB开发: 丢掉包袱,拥抱ASP.NET CORE!
  • 技术创新与人才培养并重 软通动力子公司鸿湖万联亮相OpenHarmony人才生态大会
  • k8s,声明式API对象理解
  • 基于Java Springboot广西文化传承微信小程序
  • 洛谷 B2029:大象喝水 ← 圆柱体体积
  • 《Vue零基础教程》(5)计算属性和侦听器好讲解
  • 【Linux|计算机网络】HTTPS工作原理与安全机制详解
  • 说说Elasticsearch查询语句如何提升权重?
  • Leetcode 303 Range Sum Query - Immutable
  • 靶机dpwwn-01
  • vue3项目最新eslint9+prettier+husky+stylelint+vscode配置
  • Qt 面试题复习10~12_2024-12-2
  • Android Folding
  • kafka 配置消息编码格式、解决消费者中文乱码
  • uniapp动态表单
  • Llama模型分布式训练(微调)
  • 在RK3588/RK3588s中提升yolov8推理速度
  • WPF_2
  • IOS ARKit进行图像识别
  • 05云计算HCIA学习笔记
  • Git操作学习2
  • Simulink的SIL软件在环测试