当前位置: 首页 > article >正文

二百七十六、ClickHouse——Hive和ClickHouse非常不同的DWS指标数据SQL语句

一、目的

在完成数据之后对业务指标进行分析,Hive和ClickHouseSQL真不一样

二、部分业务指标表

2.1 统计数据流量表1天周期

2.1.1 Hive中原有代码

2.1.1.1 Hive中建表语句
--1、统计数据流量表——动态分区——1天周期
create  table  if not exists  hurys_db.dws_statistics_volume_1day(
    device_no        string         comment '设备编号',
    scene_name       string         comment '场景名称',
    lane_no          int            comment '车道编号',
    lane_direction   string         comment '车道流向',
    section_no       int            comment '断面编号',
    device_direction string         comment '雷达朝向',
    sum_volume_day   int            comment '每天总流量',
    week_day         string         comment '周几',
    month            string         comment '月份'
)
comment '统计数据流量表——动态分区——1天周期'
partitioned by (day string)
stored as orc
;
2.1.1.2 Hive中SQL语句
--动态加载数据
insert  overwrite  table  hurys_db.dws_statistics_volume_1day  partition(day)
select
       dwd_st.device_no,
       dwd_sc.scene_name,
       dwd_st.lane_no,
       dwd_rl.lane_direction,
       dwd_st.section_no,
       dwd_rc.device_direction,
       sum(volume_sum) sum_volume_day,
       case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'
      else '周日' end as week_day,
       substr(day,1,7) month,
       day
from hurys_db.dwd_statistics as dwd_st
   right join hurys_db.dwd_radar_lane as dwd_rl
             on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
   right join hurys_db.dwd_device_scene as dwd_ds
             on dwd_ds.device_no=dwd_st.device_no
   right join hurys_db.dwd_scene as dwd_sc
             on dwd_sc.scene_id = dwd_ds.scene_id
   right join hurys_db.dwd_radar_config as dwd_rc
             on dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null   and   dwd_st.day='2024-09-05'
group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'
           when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'
      else '周日' end, day
;

2.1.2 ClickHouse中现有代码

2.1.2.1 ClickHouse中表结构
--1、统计数据流量表——动态分区——1天周期
create  table  if not exists  hurys_jw.dws_statistics_volume_1day(
    device_no        String                   comment '设备编号',
    scene_name       String                   comment '场景名称',
    lane_no          Nullable(Int32)          comment '车道编号',
    lane_direction   Nullable(String)         comment '车道流向',
    section_no       Nullable(Int32)          comment '断面编号',
    device_direction Nullable(String)         comment '雷达朝向',
    sum_volume_day   Nullable(Int32)          comment '每天总流量',
    week_day         Nullable(String)         comment '周几',
    month            Nullable(String)         comment '月份',
    day              Date                    comment '日期'
)
ENGINE = MergeTree
PARTITION BY day
PRIMARY KEY day
ORDER BY day
SETTINGS index_granularity = 8192;
2.1.2.2 ClickHouse中SQL语句
--动态加载数据
select
       dwd_st.device_no,
       dwd_sc.scene_name,
       dwd_st.lane_no,
       dwd_rl.lane_direction,
       dwd_st.section_no,
       dwd_rc.device_direction,
       sum(volume_sum) sum_volume_day,
       case when toDayOfWeek(create_time) = 1 then '周一'
            when toDayOfWeek(create_time) = 2 then '周二'
            when toDayOfWeek(create_time) = 3 then '周三'
            when toDayOfWeek(create_time) = 4 then '周四'
            when toDayOfWeek(create_time) = 5 then '周五'
            when toDayOfWeek(create_time) = 6 then '周六'
            when toDayOfWeek(create_time) = 7 then '周日'
        end as week_day,
    concat(toString(toYear(dwd_st.day)), '-', lpad(toString(toMonth(dwd_st.day)), 2, '0')) AS month,
    cast(dwd_st.day as String) day
from hurys_jw.dwd_statistics as dwd_st
   right join hurys_jw.dwd_radar_lane as dwd_rl
             on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
   right join hurys_jw.dwd_device_scene as dwd_ds
             on dwd_ds.device_no=dwd_st.device_no
   right join hurys_jw.dwd_scene as dwd_sc
             on dwd_sc.scene_id = dwd_ds.scene_id
   right join hurys_jw.dwd_radar_config as dwd_rc
             on dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null and dwd_st.lane_no is not null  and   dwd_st.day >= ?
group by  dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when toDayOfWeek(create_time) = 1 then '周一'
            when toDayOfWeek(create_time) = 2 then '周二'
            when toDayOfWeek(create_time) = 3 then '周三'
            when toDayOfWeek(create_time) = 4 then '周四'
            when toDayOfWeek(create_time) = 5 then '周五'
            when toDayOfWeek(create_time) = 6 then '周六'
            when toDayOfWeek(create_time) = 7 then '周日'
        end, dwd_st.day
;

2.2 统计数据流量表5分钟周期

2.2.1 Hive中原有代码

2.2.1.1 Hive中建表语句
--5、统计数据流量表——动态分区——5分钟周期
create  table  if not exists  hurys_db.dws_statistics_volume_5min(
    device_no        string         comment '设备编号',
    scene_name       string         comment '场景名称',
    lane_no          int            comment '车道编号',
    lane_direction   string         comment '车道流向',
    section_no       int            comment '断面编号',
    device_direction string         comment '雷达朝向',
    sum_volume_5min  int            comment '每5分钟总流量',
    start_time       timestamp      comment '开始时间'
)
comment '统计数据流量表——动态分区——5分钟周期'
partitioned by (day string)
stored as orc
;
2.2.1.2 Hive中SQL语句
--动态加载数据
insert  overwrite  table  hurys_db.dws_statistics_volume_5min  partition(day)
select
       dwd_st.device_no,
       dwd_sc.scene_name,
       dwd_st.lane_no,
       dwd_rl.lane_direction,
       dwd_st.section_no,
       dwd_rc.device_direction,
       sum(volume_sum)   sum_volume_5min,
      case   when  minute(create_time ) < 5 then
        concat(substr(create_time, 1, 14), '00:00')
      when minute(create_time) >=5 and minute(create_time) <10 then
        concat(substr(create_time, 1, 14), '05:00')
      when minute(create_time) >=10 and minute(create_time) <15 then
        concat(substr(create_time, 1, 14), '10:00')
      when minute(create_time) >=15 and minute(create_time) <20 then
        concat(substr(create_time, 1, 14), '15:00')
      when minute(create_time) >=20 and minute(create_time) <25 then
        concat(substr(create_time, 1, 14), '20:00')
      when minute(create_time) >=25 and minute(create_time) <30 then
        concat(substr(create_time, 1, 14), '25:00')
      when minute(create_time) >=30 and minute(create_time) <35 then
        concat(substr(create_time, 1, 14), '30:00')
      when minute(create_time) >=35 and minute(create_time) <40 then
        concat(substr(create_time, 1, 14), '35:00')
      when minute(create_time) >=40 and minute(create_time) <45 then
        concat(substr(create_time, 1, 14), '40:00')
      when minute(create_time) >=45 and minute(create_time) <50 then
        concat(substr(create_time, 1, 14), '45:00')
      when minute(create_time) >=50 and minute(create_time) <55 then
        concat(substr(create_time, 1, 14), '50:00')
     else
        concat(substr(create_time, 1, 14), '55:00') end as start_time,
       day
from hurys_db.dwd_statistics as dwd_st
    right join hurys_db.dwd_radar_lane as dwd_rl
              on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
    right join hurys_db.dwd_device_scene as dwd_ds
              on dwd_ds.device_no=dwd_st.device_no
    right join hurys_db.dwd_scene as dwd_sc
              on dwd_sc.scene_id = dwd_ds.scene_id
    right join hurys_db.dwd_radar_config as dwd_rc
              on dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null   and   dwd_st.day='2024-09-05'
group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case   when  minute(create_time ) < 5 then
        concat(substr(create_time, 1, 14), '00:00')
      when minute(create_time) >=5 and minute(create_time) <10 then
        concat(substr(create_time, 1, 14), '05:00')
      when minute(create_time) >=10 and minute(create_time) <15 then
        concat(substr(create_time, 1, 14), '10:00')
      when minute(create_time) >=15 and minute(create_time) <20 then
        concat(substr(create_time, 1, 14), '15:00')
      when minute(create_time) >=20 and minute(create_time) <25 then
        concat(substr(create_time, 1, 14), '20:00')
      when minute(create_time) >=25 and minute(create_time) <30 then
        concat(substr(create_time, 1, 14), '25:00')
      when minute(create_time) >=30 and minute(create_time) <35 then
        concat(substr(create_time, 1, 14), '30:00')
      when minute(create_time) >=35 and minute(create_time) <40 then
        concat(substr(create_time, 1, 14), '35:00')
      when minute(create_time) >=40 and minute(create_time) <45 then
        concat(substr(create_time, 1, 14), '40:00')
      when minute(create_time) >=45 and minute(create_time) <50 then
        concat(substr(create_time, 1, 14), '45:00')
      when minute(create_time) >=50 and minute(create_time) <55 then
        concat(substr(create_time, 1, 14), '50:00')
     else
        concat(substr(create_time, 1, 14), '55:00') end, day
;

2.2.2 ClickHouse中现有代码

2.2.2.1 ClickHouse中表结构
--5、统计数据流量表——动态分区——5分钟周期
create  table  if not exists  hurys_jw.dws_statistics_volume_5min(
    device_no        String                   comment '设备编号',
    scene_name       String                   comment '场景名称',
    lane_no          Nullable(Int32)          comment '车道编号',
    lane_direction   Nullable(String)         comment '车道流向',
    section_no       Nullable(Int32)          comment '断面编号',
    device_direction Nullable(String)         comment '雷达朝向',
    sum_volume_5min  Nullable(Int32)          comment '每5分钟总流量',
    start_time       DateTime                 comment '开始时间',
    day              Date                    comment '日期'
)
ENGINE = MergeTree
PARTITION BY day
PRIMARY KEY day
ORDER BY day
SETTINGS index_granularity = 8192;
2.2.2.2 ClickHouse中SQL语句
--动态加载数据
select
       dwd_st.device_no,
       dwd_sc.scene_name,
       dwd_st.lane_no,
       dwd_rl.lane_direction,
       dwd_st.section_no,
       dwd_rc.device_direction,
       sum(volume_sum)   sum_volume_5min,
        toDateTime(concat(toString(toDate(create_time)),' ',lpad(toString(extract(hour FROM create_time)), 2, '0'),':',
            CASE
                WHEN extract(minute FROM create_time) < 5 THEN '00'
                WHEN extract(minute FROM create_time) >= 5 AND extract(minute FROM create_time) < 10 THEN '05'
                WHEN extract(minute FROM create_time) >= 10 AND extract(minute FROM create_time) < 15 THEN '10'
                WHEN extract(minute FROM create_time) >= 15 AND extract(minute FROM create_time) < 20 THEN '15'
                WHEN extract(minute FROM create_time) >= 20 AND extract(minute FROM create_time) < 25 THEN '20'
                WHEN extract(minute FROM create_time) >= 25 AND extract(minute FROM create_time) < 30 THEN '25'
                WHEN extract(minute FROM create_time) >= 30 AND extract(minute FROM create_time) < 35 THEN '30'
                WHEN extract(minute FROM create_time) >= 35 AND extract(minute FROM create_time) < 40 THEN '35'
                WHEN extract(minute FROM create_time) >= 40 AND extract(minute FROM create_time) < 45 THEN '40'
                WHEN extract(minute FROM create_time) >= 45 AND extract(minute FROM create_time) < 50 THEN '45'
                WHEN extract(minute FROM create_time) >= 50 AND extract(minute FROM create_time) < 55 THEN '50'
                ELSE '55'
            END,
            ':00'
        ))  as start_time,
       cast(dwd_st.day as String) day
from hurys_jw.dwd_statistics as dwd_st
    right join hurys_jw.dwd_radar_lane as dwd_rl
              on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
    right join hurys_jw.dwd_device_scene as dwd_ds
              on dwd_ds.device_no=dwd_st.device_no
    right join hurys_jw.dwd_scene as dwd_sc
              on dwd_sc.scene_id = dwd_ds.scene_id
    right join hurys_jw.dwd_radar_config as dwd_rc
              on dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null   and  dwd_st.lane_no is not null   and   dwd_st.day >= ?
group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, toDateTime(concat(toString(toDate(create_time)),' ',lpad(toString(extract(hour FROM create_time)), 2, '0'),':',
            CASE
                WHEN extract(minute FROM create_time) < 5 THEN '00'
                WHEN extract(minute FROM create_time) >= 5 AND extract(minute FROM create_time) < 10 THEN '05'
                WHEN extract(minute FROM create_time) >= 10 AND extract(minute FROM create_time) < 15 THEN '10'
                WHEN extract(minute FROM create_time) >= 15 AND extract(minute FROM create_time) < 20 THEN '15'
                WHEN extract(minute FROM create_time) >= 20 AND extract(minute FROM create_time) < 25 THEN '20'
                WHEN extract(minute FROM create_time) >= 25 AND extract(minute FROM create_time) < 30 THEN '25'
                WHEN extract(minute FROM create_time) >= 30 AND extract(minute FROM create_time) < 35 THEN '30'
                WHEN extract(minute FROM create_time) >= 35 AND extract(minute FROM create_time) < 40 THEN '35'
                WHEN extract(minute FROM create_time) >= 40 AND extract(minute FROM create_time) < 45 THEN '40'
                WHEN extract(minute FROM create_time) >= 45 AND extract(minute FROM create_time) < 50 THEN '45'
                WHEN extract(minute FROM create_time) >= 50 AND extract(minute FROM create_time) < 55 THEN '50'
                ELSE '55'
            END,
            ':00'
        )), cast(dwd_st.day as String)
;

就先这样,反正ClickHouse和Hive的SQL语句非常非常不一样!!!


http://www.kler.cn/a/374671.html

相关文章:

  • 软件测试基础知识总结
  • JAVA的设计模式都有那些
  • 十个常见的软件测试面试题,拿走不谢
  • centos 选什么Distribution?flash安装
  • 强大的文本编辑器Notepad++8.4.6 最新版
  • 逗号运算符应用举例
  • NPM 包开发与优化全面指南
  • Resnet50进行迁移学习实现图片二分类
  • vue vxeui 上传组件 vxe-upload 全局配置上传方法,显示上传进度,最完美的配置方案
  • 音视频听译:助力多维度沟通与发展的大门
  • 预告帖|在MATLAB/Simulink中调用C语言的几种方法
  • 【neo4j】 neo4j cypher单一语句 optional 可选操作的技巧
  • 【CSS in Depth 2 精译_055】8.3 伪类 :is() 和 :where() 的正确打开方式
  • JS 字符串拼接并去重
  • Java 判断回文数
  • 乐鑫ESP32-S3无线AI语音方案,教育机器人交互应用,启明云端乐鑫代理商
  • Linux补基础之:网络配置
  • 笔试题 求空格分割的英文句子中,最大单词长度。
  • 大语言模型推理代码构建(基于llama3模型)
  • 2001-2023年A股上市公司数字化转型数据(MDA报告词频统计)(三种方法)
  • (51)MATLAB迫零均衡器系统建模与性能仿真
  • python使用pymysql
  • 关于我、重生到500年前凭借C语言改变世界科技vlog.13——深入理解指针(3)
  • Glide 简易教程
  • 【Rust标准库中的convert(AsRef,From,Into,TryFrom,TryInto)】
  • PyQt5信号与槽一