MySQL和Doris开窗函数LAG执行时的区别
目标表的表结构
CREATE TABLE `point_value_status_color_tmp` (
`id` bigint NOT NULL,
`org_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`device_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`origin_time` timestamp NULL DEFAULT NULL,
`status_color` varchar(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`duration` int DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
mysql8中执行将报错的SQL
INSERT INTO point_value_status_color_tmp SELECT `id`,`org_id`,`device_id`,origin_time,`status_color`,
IFNULL(timestampdiff(SECOND ,LAG(`origin_time`,1,'') OVER(PARTITION BY device_id ORDER BY origin_time ASC),origin_time),0) AS `duration` FROM (
SELECT *,LAG(`status_color`,1,'') OVER(PARTITION BY device_id ORDER BY origin_time ASC) AS c FROM point_value_tmp
) t2 WHERE `status_color` != c
报错提示
上述的SQL在MySQL执行时会报SQL错误[1292][22001] incorrect datetime value:'',而在Doris下不会有问题
去掉INSERT INTO 部分执行一切都都正常,含INSERT INTO时会触发INSERT语句的合法性校验,主要是LAG(`origin_time`,1,'')部分会导致新增的字段值可能是空字符串,关键是开窗函数计算并不会改变改变origin_time字段的值
解决此问题需将SQL改为下面两种方式
INSERT INTO point_value_status_color_tmp SELECT `id`,`org_id`,`device_id`,origin_time,`status_color`,
IFNULL(timestampdiff(SECOND ,LAG(`origin_time`,1,`origin_time`) OVER(PARTITION BY device_id ORDER BY origin_time ASC),origin_time),0) AS `duration` FROM (
SELECT *,LAG(`status_color`,1,'') OVER(PARTITION BY device_id ORDER BY origin_time ASC) AS c FROM point_value_tmp
) t2 WHERE `status_color` != c
上面这种方式在Doris下会报错,SQL 错误 [1105] [HY000]: errCode = 2, detailMessage = The default parameter (parameter 2 or parameter 3) of LEAD/LAG must be a constant: lag(`origin_time`, 1, `origin_time`)
或者
INSERT INTO point_value_status_color_tmp SELECT `id`,`org_id`,`device_id`,origin_time,`status_color`,
IFNULL(timestampdiff(SECOND ,LAG(`origin_time`,1,'1970-01-01 08:00:00') OVER(PARTITION BY device_id ORDER BY origin_time ASC),origin_time),0) AS `duration` FROM (
SELECT *,LAG(`status_color`,1,'') OVER(PARTITION BY device_id ORDER BY origin_time ASC) AS c FROM point_value_tmp
) t2 WHERE `status_color` != c