HiveQL如何统计用户近一段时间活跃标签
create table if not exists schema_name.table_name
(
user_id string comment '用户id'
,country string comment '国家'
,magic_ui_version string comment 'magic版本'
,device_type string comment '机型'
,starting_source string comment '启动来源'
,today_active_flg string comment '今日活跃标识(1活跃0:未活跃)'
,first_event_time string comment '用户首次活跃时间'
,last_event_time string comment '用户末次活跃时间'
,user_active_365_flg string comment '用户365活跃标志'
,active_retention_2d_flg string comment '活跃用户次日留存标识'
)comment '用户365活跃日表'
partitioned by (pt_d string comment '天分区')
row format delimited
fields terminated by '\001'
lines terminated by '\n'
stored as orc
;
insert overwrite table schema_name.table_name
partition (pt_d = '${hiveconf:pt_d}')
select
user_id
,country
,magic_ui_version
,device_type
,starting_source
,today_active_flg
,first_event_time
,last_event_time
,user_active_365_flg
,if(substring(user_active_365_flg,2,1)='1',1,0) as active_retention_2d_flg
from
(
select
user_id
,country
,magic_ui_version
,device_type
,starting_source
,max(today_active_flg) as today_active_flg
,min(first_event_time) as first_event_time
,max(last_event_time) as last_event_time
,concat(max(today_active_flg),substr(max(user_active_365_flg),1,364)) as user_active_365_flg
from
(
select
user_id
,country
,magic_ui_version
,device_type
,starting_source
,'1' as today_active_flg
,rpad('',365,'0') as user_active_365_flg
,'${hiveconf:pt_d}' as first_event_time
,'${hiveconf:pt_d}' as last_event_time
from table_name_inc_d--增量表
where pt_d='${hiveconf:pt_d}'
group by
user_id
,country
,magic_ui_version
,device_type
,starting_source
union all
select
user_id
,country
,magic_ui_version
,device_type
,starting_source
,'0' as today_active_flg
,user_active_365_flg
,first_event_time
,last_event_time
from table_name_all_d--全量表
where pt_d='${hiveconf:last_day}'
)t1
group by
user_id
,country
,magic_ui_version
,device_type
,starting_source
)t2
;
通过设计一张全量数据累积模型,给用户打上活跃标签。可基于用户活跃标签数据模型,有效提高用户活跃相关标签计算性能