Python||数据分析与可视化_使用折线图分析各个城市的P.M.2.5月度差异情况(下)及使用堆叠柱状图对各个城市的PM2.5日均值情况进行数据分析与可视化
目录
1.使用Python折线图对各个城市的P.M.2.5月度差异情况进行数据分析与可视化。
2.使用Python堆叠柱状图对各个城市的PM2.5日均值情况进行数据分析与可视化。
1.使用Python折线图对各个城市的P.M.2.5月度差异情况进行数据分析与可视化。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 公共列
common_cols = ['year', 'month', 'day', 'hour']
# 5个城市
citys = ['beijing', 'chengdu', 'guangzhou', 'shanghai', 'shenyang']
# 数据集部分属性
data_config_dict = {
'beijing': ('Beijing.csv', ['PM_Dongsi', 'PM_Dongsihuan', 'PM_Nongzhanguan'], '北京'),
'chengdu': ('Chengdu.csv', ['PM_Caotangsi', 'PM_Shahepu'], '成都'),
'guangzhou': ('Guangzhou.csv', ['PM_City Station', 'PM_5th Middle School'], '广州'),
'shanghai': ('Shanghai.csv', ['PM_Jingan', 'PM_Xuhui'], '上海'),
'shenyang': ('Shenyang.csv', ['PM_Taiyuanjie', 'PM_Xiaoheyan'], '沈阳')
}
def pct_pol_level(data_arr):
level_1 = data_arr[data_arr <= 50].shape[0]
level_2 = data_arr[data_arr <= 100].shape[0]
level_3 = data_arr[(100 < data_arr) & (data_arr <= 150)].shape[0]
level_4 = data_arr[(150 < data_arr) & (data_arr <= 200)].shape[0]
level_5 = data_arr[(200 < data_arr) & (data_arr <= 300)].shape[0]
level_6 = data_arr[data_arr > 300].shape[0]
levels = np.array([level_1, level_2, level_3, level_4, level_5, level_6])
print(levels)
result = levels / np.sum(levels)
print(result)
return result
plt.rcParams['font.sans-serif'] = ['SimHei'] # 使得绘图能够显示中文
df = pd.DataFrame()
# 空气质量各个级别标签
level_label = ["优", "良", "轻度", "中度", "重度", "严重"]
for city in citys:
# 数据准备
filepath = data_config_dict[city][0]
districts = data_config_dict[city][1]
city_name = data_config_dict[city][2]
data = pd.read_csv(filepath, usecols=common_cols + districts)
# 绘图
fig = plt.figure(figsize=(10, 5))
# 五城市每个区空气质量的月度差异
group = data.groupby(data['month'])[districts]
group.mean().plot(marker='^')
plt.ylabel('PM2.5')
plt.title('月度指数')
plt.suptitle(city_name, x=0.05,y=1, fontsize=24,fontweight='bold')
plt.grid(alpha=0.4)
plt.show()
五城在6月、7月、8月、9月的空气质量最好,从10月份开始空气质量逐渐变差,在12月和1月达到一年中空气质量最差的时期,从2月份开始空气质量逐渐向好发展。
北京市和广州市的全年空气质量走向趋势较为平稳,而成都市的全年空气质量走向趋势则波动较大。
不同的是,北京市的全年空气质量走向平稳,是在“轻度污染”线上徘徊,而广州市的全年空气质量走向平稳,则是在“良”线上行走。换句话来说,北京市的全年空气质量差,广州市的全年空气质量良好。
上海市和广州市的全年空气质量都相对平稳,且波动趋势较为相近。
成都市和沈阳市的全年空气质量相对波动较大,波动趋势较为相近,在6月、7月、8月、9月的空气质量最好,从10月份开始空气质量逐渐变差,在12月和1月达到一年中空气质量最差的时期,从2月份开始空气质量逐渐向好发展。
2.使用Python堆叠柱状图对各个城市的PM2.5日均值情况进行数据分析与可视化。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#读入文件
cd = pd.read_csv('./Chengdu.csv')
gz = pd.read_csv('./Guangzhou.csv')
bj = pd.read_csv('./Beijing.csv')
sh = pd.read_csv('./Shanghai.csv')
sy = pd.read_csv('./Shenyang.csv')
fig = plt.figure(dpi=48,figsize=(16,11))
year = [2010,2011,2012,2013,2014,2015]
def PM(cd,str2,str3):
grade_dist_pm = cd.loc[:, [str3]]
grade_dist1_pm = grade_dist_pm.dropna(axis=0, subset=[str3])
grade_dist_pm_mean = float(grade_dist1_pm.mean())
grade_dist_pm_std = float(grade_dist1_pm.std())
pm_Caotangsi = grade_dist_pm[np.abs(grade_dist_pm - grade_dist_pm_mean) <= 3 * grade_dist_pm_std]
grade_dist2 = pm_Caotangsi.mean()
return grade_dist2
def good(pm):
#优
degree = pm-35
if degree > 0:
degree = 35
else:
degree += 35
return degree
def moderate(pm):
#良
degree = pm-35
if degree < 0:
degree = 0
degree -= 40
if degree > 0:
degree = 40
else:
degree += 40
return degree
def lightlyP(pm):
#轻度污染
degree = pm-75
if degree < 0:
degree = 0
degree -= 40
if degree > 0:
degree = 40
else:
degree += 40
return degree
def moderatelyP(pm):
#中度污染
degree = pm - 115
if degree < 0:
degree = 0
degree -= 35
if degree > 0:
degree = 35
else:
degree += 35
return degree
def heavilyP(pm):
#重度污染
degree = pm - 150
if degree < 0:
degree = 0
degree -= 100
if degree > 0:
degree = 100
else:
degree += 100
return degree
#成都
#PM_Caotangsi列
cd_cts = PM(cd,'day','PM_Caotangsi')
PM_Caotangsi = np.array(cd_cts['PM_Caotangsi'])
#PM_Shahepu列
cd_shp = PM(cd,'day','PM_Shahepu')
PM_Shahepu = np.array(cd_shp['PM_Shahepu'])
cd_pm = (PM_Shahepu+PM_Caotangsi)/2
cd_pm_good = good(cd_pm)
cd_pm_moderate = moderate(cd_pm)
cd_pm_lightlyP = lightlyP(cd_pm)
cd_pm_moderatelyP = moderatelyP(cd_pm)
cd_pm_heavilyP = heavilyP(cd_pm)
#北京
#PM_Dongsi列
bj_ds = PM(bj,'day','PM_Dongsi')
PM_Dongsi = np.array(bj_ds['PM_Dongsi'])
#PM_Dongsihuan列
bj_dsh = PM(bj,'day','PM_Dongsihuan')
PM_Dongsihuan = np.array(bj_dsh['PM_Dongsihuan'])
#PM_Nongzhanguan列
bj_nzg = PM(bj,'day','PM_Nongzhanguan')
PM_Nongzhanguan = np.array(bj_nzg['PM_Nongzhanguan'])
bj_pm = (PM_Dongsi+PM_Dongsihuan+PM_Nongzhanguan)/3
bj_pm_good = good(bj_pm)
bj_pm_moderate = moderate(bj_pm)
bj_pm_lightlyP = lightlyP(bj_pm)
bj_pm_moderatelyP = moderatelyP(bj_pm)
bj_pm_heavilyP = heavilyP(bj_pm)
#沈阳
#PM_Taiyuanjie列
sy_tyj = PM(sy,'day','PM_Taiyuanjie')
PM_Taiyuanjie = np.array(sy_tyj['PM_Taiyuanjie'])
#PM_Xiaoheyan列
sy_xhy = PM(sy,'day','PM_Xiaoheyan')
PM_Xiaoheyan = np.array(sy_xhy['PM_Xiaoheyan'])
sy_pm = (PM_Taiyuanjie+PM_Xiaoheyan)/2
sy_pm_good = good(sy_pm)
sy_pm_moderate = moderate(sy_pm)
sy_pm_lightlyP = lightlyP(sy_pm)
sy_pm_moderatelyP = moderatelyP(sy_pm)
sy_pm_heavilyP = heavilyP(sy_pm)
#广州
#PM_City Station列
gz_cs = PM(gz,'day','PM_City Station')
PM_CityStation = np.array(gz_cs['PM_City Station'])
#PM_5th Middle School列
gz_ms = PM(gz,'day','PM_5th Middle School')
PM_5thMiddleSchool = np.array(gz_ms['PM_5th Middle School'])
gz_pm = (PM_CityStation+PM_5thMiddleSchool)/2
gz_pm_good = good(gz_pm)
gz_pm_moderate = moderate(gz_pm)
gz_pm_lightlyP = lightlyP(gz_pm)
gz_pm_moderatelyP = moderatelyP(gz_pm)
gz_pm_heavilyP = heavilyP(gz_pm)
#上海
#PM_Jingan列
sh_jg = PM(sh,'day','PM_Jingan')
PM_Jingan = np.array(sh_jg['PM_Jingan'])
#PM_Xuhui列
sh_xh = PM(sh,'day','PM_Xuhui')
PM_Xuhui = np.array(sh_xh['PM_Xuhui'])
sh_pm = (PM_Jingan+PM_Xuhui)/2
sh_pm_good = good(sh_pm)
sh_pm_moderate = moderate(sh_pm)
sh_pm_lightlyP = lightlyP(sh_pm)
sh_pm_moderatelyP = moderatelyP(sh_pm)
sh_pm_heavilyP = heavilyP(sh_pm)
#输出
#条形图
bar_width = 0.1
x = [0.2,0.4,0.6,0.8,1]
plt.bar(x[0],cd_pm_good, width=bar_width,alpha=0.1,color='teal')
plt.bar(x[0],cd_pm_moderate, width=bar_width,bottom=cd_pm_good,alpha=0.4,color='teal')
plt.bar(x[0],cd_pm_lightlyP, width=bar_width,bottom=cd_pm_good+cd_pm_moderate,alpha=0.6,color='teal')
p1 = plt.bar(x[0],cd_pm_moderatelyP, width=bar_width,bottom=cd_pm_good+cd_pm_moderate+cd_pm_lightlyP,alpha=0.8,color='teal')
plt.bar(x[1],bj_pm, width=bar_width,alpha=0.2,color='seagreen')
plt.bar(x[1],bj_pm_moderate, width=bar_width,bottom=bj_pm_good,alpha=0.4,color='seagreen')
plt.bar(x[1],bj_pm_lightlyP, width=bar_width,bottom=bj_pm_good+bj_pm_moderate,alpha=0.6,color='seagreen')
p2 = plt.bar(x[1],bj_pm_moderatelyP, width=bar_width,bottom=bj_pm_good+bj_pm_moderate+bj_pm_lightlyP,alpha=0.8,color='seagreen')
plt.bar(x[2],sy_pm, width=bar_width,alpha=0.2,color='khaki')
plt.bar(x[2],sy_pm_moderate, width=bar_width,bottom=sy_pm_good,alpha=0.4,color='khaki')
plt.bar(x[2],sy_pm_lightlyP, width=bar_width,bottom=sy_pm_good+sy_pm_moderate,alpha=0.6,color='khaki')
p3 = plt.bar(x[2],sy_pm_moderatelyP, width=bar_width,bottom=sy_pm_good+sy_pm_moderate+sy_pm_lightlyP,alpha=0.8,color='khaki')
plt.bar(x[3],gz_pm, width=bar_width,alpha=0.2,color='orange')
plt.bar(x[3],gz_pm_moderate, width=bar_width,bottom=gz_pm_good,alpha=0.4,color='orange')
plt.bar(x[3],gz_pm_lightlyP, width=bar_width,bottom=gz_pm_good+gz_pm_moderate,alpha=0.6,color='orange')
p4 = plt.bar(x[3],gz_pm_moderatelyP, width=bar_width,bottom=gz_pm_good+gz_pm_moderate+gz_pm_lightlyP,alpha=0.8,color='orange')
plt.bar(x[4],sh_pm, width=bar_width,alpha=0.2,color='lightsalmon')
plt.bar(x[4],sh_pm_moderate, width=bar_width,bottom=sh_pm_good,alpha=0.4,color='lightsalmon')
plt.bar(x[4],sh_pm_lightlyP, width=bar_width,bottom=sh_pm_good+sh_pm_moderate,alpha=0.6,color='lightsalmon')
p5 = plt.bar(x[4],sh_pm_moderatelyP, width=bar_width,bottom=sh_pm_good+sh_pm_moderate+sh_pm_lightlyP,alpha=0.8,color='lightsalmon')
#显示数值
pm = [cd_pm,bj_pm,sy_pm,gz_pm,sh_pm]
for a,b in zip(x,pm):
plt.text(a,b+0.5,'%.2f'%b,ha='center',va='bottom',fontsize=20)
# plt.plot(x,pm,color='b', linestyle='--')
plt.legend((p1[0],p2[0],p3[0],p4[0],p5[0]),('成都','北京','沈阳','广州','上海'),fontsize=20)
plt.xticks(x,("成都","北京","沈阳","广州","上海"),fontsize=20)
y = [0,0,35,75,115,150]
plt.yticks(y,("0","优\n(0~35)","良\n(35~75)","轻度污染\n(75~115)","中度污染\n(115~150)","重度污染\n(150~250)"),fontsize=20)
plt.title(u"五城PM2.5日均值",fontsize=23)
plt.xlabel(u'城市',fontsize=22,verticalalignment='top',horizontalalignment='left', x=1,labelpad=-10)
plt.ylabel(u'污染程度',fontsize=23,rotation='horizontal',verticalalignment='top',horizontalalignment='left', y=1.07)
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.grid(alpha=0.4)
plt.show()
从中我们可以看出广州的空气质量是五个城市里最好的,上海仅次于广州;而北京的空气质量是最差的;成都和沈阳的空气质量不相上下,处于中等水平。