MySQL 学习笔记(刷题篇)
SQL进阶挑战
增删改操作
MySQL 插入数据
SQL 110(插入)
date类型的数据插入,也用引号括起来,日期和时间之间用空格隔开
# 表已经建好了,一次插入多个即可
INSERT INTO exam_record values(1 , 1001, 9001, '2021-09-01 22:11:12','2021-09-01 23:01:12', 90), (2, 1002, 9002, '2021-09-04 07:01:02',null, null);
# 看表结构可以发现 id 是一个自增的
# 把id的值设置为NULL或0,这样MySQL会自己处理这个自增的id列
INSERT INTO exam_record values
(null, 1001, 9001, '2021-09-01 22:11:12','2021-09-01 23:01:12', 90),
(null, 1002, 9002, '2021-09-04 07:01:02',null, null);
SQL 111(插入)
新表exam_record_before_2021已创建好;
第一列为自增主键列,不能直接复制过去;
只复制2021年之前的记录;
只复制已完成了的试题作答纪录;
# 把要复制的列枚举出来,这时的自增的id列是null,但是mysql会自动处理
insert into exam_record_before_2021(uid, exam_id, start_time, submit_time, score)
select uid, exam_id, start_time, submit_time, score
from exam_record
where date_format(submit_time, '%Y') < 2021
SQL 112(插入)
这个题直接插入会显示数据重复
replace
如果插入的数据记录存在就先删除再更新,不存在则直接新增记录
replace into examination_info values
(null, 9003, 'SQL', 'hard', 90, '2021-01-01 00:00:00')
SQL 114(更新)
set
多个字段时,逗号连接即可
update exam_record set submit_time = '2099-01-01 00:00:00', score = 0
where score is null
and date_format(start_time, '%Y%m%d') < '20210901'
SQL 115(删除)
delete from exam_record
where timestampdiff(minute, start_time, submit_time) < 5
and score < 60
SQL 116(删除)
delete from exam_record
where score is null
or timestampdiff(minute, start_time, submit_time) < 5
limit 3
SQL 117(删除全部)
这个题提交有bug
删除记录的方式汇总:
根据条件删除:DELETE FROM
tb_name
WHERE options
ORDER BY fields
LIMIT n
全部删除(表清空,包含自增计数器重置):TRUNCATE TABLE tb_name
TRUNCATE TABLE
用于删除表中的所有行,而不记录单个行删除操作;TRUNCATE TABLE
与没有 WHERE
子句的 DELETE
语句类似,但是,TRUNCATE TABLE
速度更快,使用的系统资源和事务日志资源更少
TRUNCATE table exam_record;
表与索引操作
MySQL 表的创建、复制、修改与删除
SQL 118(建表)
commit
添加字段或列的注释
需要修改字符编码
create table user_info_vip(
id int(11) primary key auto_increment comment '自增ID',
uid int(11) not null unique comment '用户ID',
nick_name varchar(64) comment '昵称',
achievement int(11) default 0 comment '成就值',
level int(11) comment '用户等级',
job varchar(32) comment '职业方向',
register_time datetime default current_timestamp comment '注册时间'
) default charset = utf8
# 字符编码默认是utf8mb4,需要将默认编码改成UTF8才能通过
SQL 119(修改表)
alter table user_info add school varchar(15) after level; # 增加列在某列之后
# alter table 表格名 add 字段名 数据类型 位置(after level)
alter table user_info change job profession varchar(10); # 更换列的名称及数据类型
# alter table user_info change 原列名 修改列名 修改数据类型
alter table user_info modify achievement int(11) default 0; # 更改数据类型
# alter table 表名 modify 修改列名称 数据类型 默认值等
SQL120(删除表)
DROP TABLE IF EXISTS exam_record_2011, exam_record_2012, exam_record_2013, exam_record_2014;
一个一个删
drop table exam_record_2011;
drop table exam_record_2012;
drop table exam_record_2013;
drop table exam_record_2014;
SQL 121(创建索引)
create index idx_duration on examination_info(duration);
create unique index uniq_idx_exam_id on examination_info(exam_id);
create fulltext index full_idx_tag on examination_info(tag);
SQL122(删除索引)
# 删除索引时不用指明类型,直接给出索引名和表名即可
drop index uniq_idx_exam_id on examination_info;
drop index full_idx_tag on examination_info;
聚合分组查询
SQL123
select tag, difficulty,
round((sum(score) - max(score) - min(score) ) / (count(score) - 2) ,1)
as clip_avg_score
from examination_info as ei, exam_record as er
where ei.exam_id = er.exam_id
and ei.tag = 'SQL'
and ei.difficulty = 'hard'
and er.score is not null;
SQL124
IF(expr1 , expr2 , expr3)
,expr1的值为TRUE 返回 expr2,否则返回 expr3
使用 distinct 是需要考虑 null 的,它会把 null 也算成一种情况
但是使用 count(字段) 是不用考虑 null 的,它不会计 null 为一种情况
select count(id) as total_pv,
count(submit_time) as complete_pv,
count(distinct if(submit_time is not null, exam_id, null)) as complete_exam_cnt
from exam_record
select count(id) as total_pv,
count(submit_time) as complete_pv,
count(distinct exam_id and score is not null) as complete_exam_cnt
from exam_record
SQL125
# 这样写为什么就错?
select min(score) as min_score_over_avg
from exam_record #这样写没有保证查询的试卷类型是SQL
where score >= (
select avg(score)
from exam_record as er , examination_info as ei
where ei.tag = 'SQL'
and ei.exam_id = er.exam_id
and er.score is not null
);
# correct
select min(score) as min_score_over_avg
from exam_record as er , examination_info as ei
where ei.tag = 'SQL'
and ei.exam_id = er.exam_id
and er.score is not null
and score >= (
select avg(score)
from exam_record as er
where er.exam_id = ei.exam_id
and er.score is not null
);
SQL126
题目:按年月进行分组,统计每组的用户id个数(也就是这个月有多少活跃用户),统计每组的用户活跃天数的平均值(总天数/总人数)
总天数计算方法:
∑
i
=
1
i
=
l
a
s
t
−
u
s
e
r
第
i
个用户一个月内的登录天数之和
\sum_{i=1}^{i=last-user} 第i个用户一个月内的登录天数之和
∑i=1i=last−user第i个用户一个月内的登录天数之和
DATE_FORMAT(date,fmt)
按照字符串 fmt
格式化日期 date
值
YEAR(date) / MONTH(date) / DAY(date)
返回具体的日期值
count( distinct uid, date_format(submit_time, '%y%m%d') )
,这里的知识点:count函数内本只能接收一个参数,distinct 修饰是所有字段的,并不是修饰一个字段
语句含义:去掉一个用户在一天内的多次登录计数的重复计数,保证如果同一用户在同一天进行了多次活动,只有一次会被计数。
select date_format(submit_time, '%Y%m') as month, # %Y四位年份,%m两位数字月份
round( count(distinct uid, date_format(submit_time, '%y%m%d') ) / count(distinct uid) , 2) as avg_active_days,
count(distinct uid) as mau #统计组内不同用户id数量
from exam_record
where submit_time is not null
and year(submit_time) = 2021
group by date_format(submit_time, '%Y%m') # 按照年月分组
SQL127(COALESCE)
select date_format(submit_time, '%Y%m') as submit_month,
count(distinct uid, submit_time) as month_q_cnt,
round(count(distinct uid, submit_time)
/ max(DAY(LAST_DAY(submit_time))) #这里必须用一个聚合函数,由于汇总时的天数按31算,因此用max最为合适,day+lasy_day一起得到当月的天数
, 3) as avg_day_q_cnt
from practice_record
where year(submit_time) = '2021' #过滤字段写到分组前
group by submit_month
union
select '2021汇总' as submit_month,
count(distinct uid, submit_time) as month_q_cnt,
round(count(distinct uid, submit_time) / 31, 3) as avg_day_q_cnt
from practice_record
where year(submit_time) = '2021'
order by submit_month;
COALESCE
是一个函数,coalesce (expression_1, expression_2, …,expression_n) ,依次检验,返回第一个不是 null 的值
只有两个参数的时候用 IFNULL
也可以
MySQL5.7之后,sql_mode中ONLY_FULL_GROUP_BY模式默认设置为打开状态。
ONLY_FULL_GROUP_BY的语义就是确定select target list中的所有列的值都是明确语义,因此这里的coalesce是不好使的,可以通过any_value()函数来抑制ONLY_FULL_GROUP_BY值被拒绝,any_value()会选择被分到同一组的数据里第一条数据的指定列值作为返回数据
GROUP BY中使用WITH ROLLUP
WITH ROLLUP
,使用 WITH ROLLUP
关键字之后,在所有查询出的分组记录之后增加一条记录
,该记录
计算查询出的所有记录的总和
注意:当使用ROLLUP时,不能同时使用ORDER BY子句进行结果排序,即ROLLUP和ORDER BY是互相排斥的。
SELECT
any_value(coalesce(DATE_FORMAT(submit_time,"%Y%m"),'2021汇总')) as submit_month,
count(submit_time) as month_q_cnt,
# 因为汇总除的数也是31,因此这里取max聚合
round(count(submit_time) / max(day(last_day(submit_time))),3) as avg_day_q_cnt
FROM practice_record
WHERE year(submit_time) = '2021'
GROUP BY date_format(submit_time,"%Y%m") with rollup;
SQL 128
使用 count()
函数实现条件统计的基础是:对于值为NULL的记录不计数,利用这个性质我们可以轻松统计出值不为 NULL 的记录,再统计总记录,即可得到值为 NULL 的记录。
# 统计num大于200的记录
select count(num > 200 or null) from a;
# or null 作用就是当条件不满足时,函数变成了count(null)不会统计数量
# 但是 num > 200 这个条件不成立时的 false 是会被统计到的
GROUP_CONCAT()
函数是mysql中非常实用的聚合函数,将给分组内的值连接为一个字符串。其完整语法:
GROUP_CONCAT([DISTINCT] 要连接的字段 [ORDER BY 排序字段 ASC/DESC] [SEPARATOR ‘分隔符’])
select uid,
count(uid) - count(submit_time) as incomplete_cnt,
count(submit_time) as complete_cnt,
group_concat(distinct date_format(start_time, '%Y-%m-%d'), ':', tag
Order BY start_time ASC #排序字段
SEPARATOR ';') as detail
from exam_record as er
inner join examination_info as ei
on er.exam_id = ei.exam_id
where year(start_time) = '2021' #过滤字段写到分组前
group by uid
having incomplete_cnt < 5 and incomplete_cnt > 1
and complete_cnt >= 1
order by incomplete_cnt desc;
多表查询
SQL 129
先考虑简单的,找出 “当月均完成试卷数”不小于3的用户们
,然后按 tag 分组统计存在 start_time 的作答记录个数即可
select tag, count(start_time) as tag_cnt
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where uid in (
select uid
from exam_record as er
inner join examination_info as ei
on er.exam_id = ei.exam_id
group by uid, date_format(start_time, '%Y%m')
having count(date_format(submit_time, '%Y%m')) >= 3
)
group by tag
order by tag_cnt desc;
SQL 130
select ei.exam_id as exam_id,
count(distinct uid) as uv,
# round(avg(score) ,1) as avg_score
round(sum(score) / count(score) , 1) as avg_score
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where date_format(start_time, '%Y%m%d') in ( # 时间
select date_format(release_time, '%Y%m%d') # 先弄出SQL试卷的发出的时间字段
from examination_info
where tag = 'SQL'
)
and uid in ( # 用户
select uid # 再弄出等级大于5的用户的uid
from user_info
where level > 5
)
and tag = 'SQL' # SQL试卷
group by ei.exam_id #所有的SQL试卷按exam_id分组
order by uv desc, avg_score;
SQL 131
select level, count(level) as level_cnt
from user_info as ui, (
select uid
from exam_record as er
inner join examination_info as ei
on er.exam_id = ei.exam_id
where tag = 'SQL' && score > 80
) as tmp
where ui.uid = tmp.uid
group by level
order by level_cnt desc;
SQL 132
再套一个 select 来使得子查询的排序独立
select * from (
select exam_id as tid,
count(distinct uid) as uv,
count(start_time) as pv
from exam_record
group by exam_id
order by uv desc, pv desc
) as t1
union
select * from (
select question_id as tid,
count(distinct uid) as uv,
count(submit_time) as pv
from practice_record
group by question_id
order by uv desc, pv desc
) as t2
SQL 133
TIME_TO_SEC()
将时间差转换为秒
select uid, 'activity1' as activity
from exam_record
group by uid
having min(score) >= 85
union
select uid, 'activity2' as activity
from examination_info as ei
inner join exam_record as er
on er.exam_id = ei.exam_id
where score > 80
and difficulty = 'hard'
and TIME_TO_SEC(timediff(submit_time, start_time)) < duration * 30
order by uid;
其他操作
SQL 146(IFNULL)
select uid,
floor(avg(any_value(ifnull(score, 0)))) as avg_score,
round(avg(
if(submit_time is not null, timestampdiff(minute, start_time, submit_time), duration)
), 1) as avg_time_took
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where difficulty = 'hard'
and uid in (
select uid
from user_info
where level = 0
)
group by uid
SQL 147
select uid, nick_name, achievement
from user_info
where nick_name like '牛客%'
and nick_name like '%号'
and achievement between 1200 and 2500
and uid in (
select uid
from exam_record
group by uid
having max(date_format(start_time, '%Y%m')) = '202109'
union
select uid
from practice_record
group by uid
having max(date_format(submit_time, '%Y%m')) = '202109'
)
select uid, nick_name, achievement
from user_info
where nick_name like '牛客%'
and nick_name like '%号'
and achievement between 1200 and 2500
and (
uid in(
select uid
from exam_record
group by uid
having max(date_format(start_time, '%Y%m')) = '202109'
)
or uid in(
select uid
from practice_record
group by uid
having max(date_format(submit_time, '%Y%m')) = '202109'
)
)
SQL 148(正则表达式)
用正则表达式匹配纯数字或者中间纯数字
select uid, er.exam_id,
round(avg(score) ,0) as avg_score
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where uid in (
select uid
from user_info
where nick_name regexp '^牛客[0-9]+号$'
or nick_name regexp '^[0-9]+$'
)
and ei.exam_id in (
select exam_id
from examination_info
where tag regexp '^[Cc]'
)
and score is not null
group by uid, exam_id
order by uid, avg_score
SQL 149(WITH AS)
比较复杂的一个题,需要用 WITH AS 存一下查询
with t as (
select ui.uid as uid,
count(start_time) - count(submit_time) as incomplete_cnt,
round(if(count(start_time) - count(submit_time) > 0,
(count(start_time) - count(submit_time)) / count(start_time),
0
)
,3) as incomplete_rate,
level,
count(start_time) as total_cnt # 作答个数
from user_info as ui
left join exam_record as er
on ui.uid = er.uid
group by uid
)
select uid, incomplete_cnt, incomplete_rate
from t
where exists(
select uid from t where level = 0 and incomplete_cnt > 2
)
and level = 0
union
select uid, incomplete_cnt, incomplete_rate
from t
where not exists (
select uid from t where level = 0 and incomplete_cnt > 2
)
and total_cnt > 0 # 有作答记录的用户
order by incomplete_rate
SQL150(CASE WHEN THEN)
很烂但有用的代码
select ui.level,
case
when score >= 90 then '优'
when score >= 75 then '良'
when score >= 60 then '中'
when score >= 0 then '差' end as score_grade,
round(count(
case
when score >= 90 then '优'
when score >= 75 then '良'
when score >= 60 then '中'
when score >= 0 then '差' end) / num
, 3) as ratio
from exam_record as er, user_info as ui, (
select level, count(level) as num
from exam_record as er
inner join user_info as ui
on er.uid = ui.uid
where score is not null
group by level
order by level desc
) as tmp
where er.uid = ui.uid
and tmp.level = ui.level
and score is not null
group by level, score_grade
order by level desc, ratio desc
SQL 152
select er.uid, level, register_time, score as max_score
from exam_record as er
inner join user_info as ui
on er.uid = ui.uid
where exam_id in ( # 把exam_record筛的只剩下job为算法的人做的算法试卷记录
select exam_id
from examination_info
where tag = '算法'
)
and er.uid in (
select uid
from user_info
where job = '算法'
)
and score is not null # 还得做完
order by score desc
limit 6, 3;
SQL 153(substring_index)
substring_index(str,delim,count)
,str:要处理的字符串,delm:分隔符
SELECT exam_id,
substring_index(tag, ',', 1) AS tag,
substring_index(substring_index(tag, ',', 2), ',', -1) AS difficulty,
substring_index(tag, ',', -1) AS duration
FROM examination_info
WHERE tag LIKE '%,%';
SQL 154(IF)
简单的 IF
应用
select uid, (
if(char_length(nick_name) > 13,
concat(substring(nick_name, 1, 10), '...'),
nick_name
)
) as nick_name
from user_info
where char_length(nick_name) > 10;
SQL 155
这个题写的我脑子有点乱
select t1.tag, t2.total_num
from (
select tag, num # 查询试卷作答数小于3的exam_id对应的tag和个数
from examination_info as ei, (
select exam_id, count(exam_id) as num #按exam_id分组,并统计个数
from exam_record
group by exam_id
) as tmp
where ei.exam_id = tmp.exam_id # 多表查询
and num < 3
) as t1, (
select tag, sum(num) as total_num #按tag分类,把大写的tag聚合起来统计个数
from examination_info as ei, (
select exam_id, count(exam_id) as num
from exam_record
group by exam_id
) as tmp
where ei.exam_id = tmp.exam_id
group by tag
) as t2
where upper(t1.tag) = t2.tag # 小写的t1.tag匹配大写的t2.tag
and t1.tag != t2.tag