当前位置: 首页 > article >正文

SQL数据库刷题sql_day34(移动平均值、累计求和)

描述 移动平均值

1.求不同产品 每个月以及截至当前月最近3个月的平均销售额

2.求不同产品截至当前月份的累计销售额

数据准备

mysql

CREATE TABLE sales_monthly (
    product VARCHAR(20),
    ym VARCHAR(10),
    amount DECIMAL(10,2)
);

-- 插入测试数据
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201801', 10159.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201802', 10211.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201803', 10247.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201804', 10376.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201805', 10400.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201806', 10565.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201807', 10613.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201808', 10696.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201809', 10751.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201810', 10842.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201811', 10900.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201812', 10972.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201901', 11155.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201902', 11202.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201903', 11260.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201904', 11341.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201905', 11459.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('苹果', '201906', 11560.00);

INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201801', 10138.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201802', 10194.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201803', 10328.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201804', 10322.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201805', 10481.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201806', 10502.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201807', 10589.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201808', 10681.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201809', 10798.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201810', 10829.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201811', 10913.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201812', 11056.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201901', 11161.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201902', 11173.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201903', 11288.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201904', 11408.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201905', 11469.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('香蕉', '201906', 11528.00);

INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201801', 10154.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201802', 10183.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201803', 10245.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201804', 10325.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201805', 10465.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201806', 10505.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201807', 10578.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201808', 10680.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201809', 10788.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201810', 10838.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201811', 10942.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201812', 10988.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201901', 11099.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201902', 11181.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201903', 11302.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201904', 11327.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201905', 11423.00);
INSERT INTO sales_monthly (product, ym, amount) VALUES ('桔子', '201906', 11524.00);

pandas

import pandas as pd

# 创建模拟数据
data = {
    'product': ['苹果']*18 + ['香蕉']*18 + ['桔子']*18,
    'ym': [
        '201801', '201802', '201803', '201804', '201805', '201806', '201807', '201808', '201809', '201810', '201811', '201812',
        '201901', '201902', '201903', '201904', '201905', '201906',
        '201801', '201802', '201803', '201804', '201805', '201806', '201807', '201808', '201809', '201810', '201811', '201812',
        '201901', '201902', '201903', '201904', '201905', '201906',
        '201801', '201802', '201803', '201804', '201805', '201806', '201807', '201808', '201809', '201810', '201811', '201812',
        '201901', '201902', '201903', '201904', '201905', '201906'
    ],
    'amount': [
        10159.00, 10211.00, 10247.00, 10376.00, 10400.00, 10565.00, 10613.00, 10696.00, 10751.00, 10842.00, 10900.00, 10972.00,
        11155.00, 11202.00, 11260.00, 11341.00, 11459.00, 11560.00,
        10138.00, 10194.00, 10328.00, 10322.00, 10481.00, 10502.00, 10589.00, 10681.00, 10798.00, 10829.00, 10913.00, 11056.00,
        11161.00, 11173.00, 11288.00, 11408.00, 11469.00, 11528.00,
        10154.00, 10183.00, 10245.00, 10325.00, 10465.00, 10505.00, 10578.00, 10680.00, 10788.00, 10838.00, 10942.00, 10988.00,
        11099.00, 11181.00, 11302.00, 11327.00, 11423.00, 11524.00
    ]
}

df = pd.DataFrame(data)

分析

三个窗口函数 实现三个功能

  • 第一个 
    avg(amount) over(partition by product order by ym rows between 2 preceding and current row)  

        根据product分组根据ym求平均 范围是前两行到当前行

  • 第二个
    avg(amount) over(partition by product order by ym rows unbounded preceding) 

        根据product、ym分组 求截止到当月的平均金额

  •  第三个
    sum(amount) over(partition by product order by ym rows between unbounded preceding and current row )

        根据product分组 求截止到当月的总金额

代码

select product,
       amount,ym,
       avg(amount) over(partition by product order by ym rows between 2 preceding and current row )r1,
       avg(amount) over(partition by product order by ym rows unbounded preceding) r2,
       sum(amount) over(partition by product order by ym rows between unbounded preceding and current row ) r3
from sales_monthly

df['count'] =df.groupby(by='product')['amount'].cumcount()
df['avg1'] = df.groupby('product').apply(lambda x: x['amount'].rolling(3, min_periods=1).mean()).reset_index(level=0, drop=True)

df['sum'] = df.groupby('product')['amount'].cumsum()
df['avg2'] = df['sum']/(df['count']+1)
print(df)

总结

rows 是根据该行的上下行划定范围的 

range是根据该行的值的邻近值划定范围(所以注意格式)

②pandas里的cumsum函数的积累 求累计和

pands求近三行数据用rolling(window=3,min_periods=1)

  • window:指定窗口的大小,即参与计算的连续数据点的数量。
  • min_periods:指定窗口中至少需要有多少个非缺失值数据点才进行计算,默认为None,表示窗口大小的所有数据点都必须存在才进行计算。
  • center:如果为True,则将窗口的标签设置为居中在当前位置。默认是窗口的右边界与当前位置对齐。
  • win_type:指定窗口的类型,可以是各种加权窗口函数,如矩形窗、三角窗等。默认为None,表示使用等权重的矩形窗。

描述

查找短期之内(5天)累计转账超过100万元的账户

数据准备

CREATE TABLE transfer_log (
    log_id    int, -- 交易日志编号
    log_ts    TIMESTAMP NOT NULL, -- 交易时间
    from_user VARCHAR(50) NOT NULL, -- 交易发起账号
    to_user   VARCHAR(50), -- 交易接收账号
    type      VARCHAR(10) NOT NULL, -- 交易类型
    amount    float NOT NULL -- 交易金额(元),保留两位小数
);

-- 插入测试数据
INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (1, '2021-01-02 10:31:40',  '62221234567890', NULL, '存款', 50000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (2, '2021-01-02 10:32:15', '62221234567890', NULL, '存款', 100000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (3, '2021-01-03 08:14:29',  '62221234567890', '62226666666666', '转账', 200000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (4, '2021-01-05 13:55:38',  '62221234567890', '62226666666666', '转账', 150000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (5, '2021-01-07 20:00:31',  '62221234567890', '62227777777777', '转账', 300000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (6, '2021-01-09 17:28:07',  '62221234567890', '62227777777777', '转账', 500000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (7, '2021-01-10 07:46:02',  '62221234567890', '62227777777777', '转账', 100000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (8, '2021-01-11 09:36:53',  '62221234567890', NULL, '存款', 40000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (9, '2021-01-12 07:10:01',  '62221234567890', '62228888888881', '转账', 10000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (10, '2021-01-12 07:11:12',  '62221234567890', '62228888888882', '转账', 8000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (11, '2021-01-12 07:12:36',  '62221234567890', '62228888888883', '转账', 5000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (12, '2021-01-12 07:13:55',  '62221234567890', '62228888888884', '转账', 6000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (13, '2021-01-12 07:14:24',  '62221234567890', '62228888888885', '转账', 7000);

INSERT INTO transfer_log (log_id, log_ts, from_user, to_user, type, amount)
VALUES (14, '2021-01-21 12:11:16',  '62221234567890', '62228888888885', '转账', 70000);

分析

通过sum(amount) over (partition by from_user order by log_ts range interval 5 day preceding)即可求出最近五天的总额

代码

 with t1 as (select *,
                    sum(amount) over (partition by from_user order by log_ts range interval 5 day preceding) r2
             from transfer_log
             where type = '转账')
 select *
 from t1
 where r2 > 1000000;


http://www.kler.cn/news/357695.html

相关文章:

  • 别再犯这些Java并发编程的常见错误!你中了几个?
  • Java-类与对象-下篇
  • mysql学习教程,从入门到精通,sql序列使用(45)
  • 红日安全vulnstack (一)
  • Redis应用高频面试题
  • 【VUE】封装用户树形选择和部门树形选择控件
  • 递归神经网络(RNN)简介
  • Docker无法拉取镜像解决办法
  • TiDB 关联子查询及半连接的优化实践
  • 微信小程序设计尺寸
  • 数据结构2-线性表
  • 富唯智能:重塑CNC格局,柔性制造开启智能制造新时代
  • 练习题 - Scrapy爬虫框架 Items 数据项
  • 【ARM】AMBA概述
  • 跟《经济学人》学英文:2024年10月19日这期 Pity the superstar fashion designer
  • 代码随想录算法训练营第三十六天|56. 合并区间,738. 单调递增的数字,968. 监控二叉树
  • 【OD】【E卷】【真题】【100分】流浪地球(PythonJavaJavaScriptC++C)
  • [论文笔记] Megatron LM环境安装
  • 如何查看默认网关地址:详细步骤
  • 高级大数据工程师带你一起学习Hdoop生态Flink基础原理保姆级教程