Python教学:文档处理及箱线图等
代码1:
import os
import pandas as pd
import numpy as py
import os.path
from os import listdir
import openpyxl
from openpyxl import Workbook
import re
import matplotlib.pyplot as plt # 导入matplotlib的绘图模块,用于可视化
cwd=os.getcwd()
xlsName='EC19A.xlsx'
xlsDir=cwd+"\\Xls"
os.chdir(xlsDir)
df=pd.read_excel(xlsName,sheet_name=0)
# 所有课程
df = df.map(lambda x: re.sub(r'\s+', '', x) if isinstance(x, str) else x)
# print(df)
df.columns=df.columns.map(lambda x: re.sub(r'\s+', '', x) if isinstance(x, str) else x)
# print(df["[020145]新编计算机基础(Z)"])
# print("---姓名名单A-------")
# print(df["姓名"])
# print("----姓名名单B-------")
# print(df.iloc[:,py.array([0,1,2])])
# print("----姓名名单C-------")
# dfStuName=df.loc[:,py.array(["编号","学号","姓名"])]
# print(dfStuName)
# dfStuName.to_excel("姓名名单.xlsx")
# print("----每门课程不及格学生名称-------")
mapping ={"不及格":-1,"不合格": -2, "及格": 888,"合格":999,"缺考":-3}
mappingRev ={-1:"不及格",-2:"不合格", 888:"及格",999:"合格",-3:"缺考"}
df= df.replace(mapping)
# print(df)
# for subjectName in df.columns:
# print(type(subjectName))
# print(df.columns[0])
c=len(df.columns)
print(c)
allData=[]
labelList=list(df.columns)
labelList.pop(0)
labelList.pop(0)
labelList.pop(0)
i=0
for e in df.columns:
if(i>=3):
allData.append(list(map(lambda u:float(u),df[e])))
i+=1
plt.figure(dpi=120)
print(len(labelList))
print(len(allData))
print(labelList)
print(allData)
plt.boxplot(x=allData,notch=False,showmeans=False,patch_artist=False,tick_labels=labelList)
plt.ylim(-3, 120)
plt.show()
代码2:
import os
import pandas as pd
import numpy as py
import os.path
from os import listdir
import openpyxl
from openpyxl import Workbook
import re
import matplotlib.pyplot as plt # 导入matplotlib的绘图模块,用于可视化
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
cwd=os.getcwd()
xlsName='EC19B.xlsx'
xlsDir=cwd+"\\Xls"
os.chdir(xlsDir)
df=pd.read_excel(xlsName,sheet_name=0)
df = df.map(lambda x: re.sub(r'\s+', '', x) if isinstance(x, str) else x)
df.columns=df.columns.map(lambda x: re.sub(r'\s+|\[\d+\]', '', x))
df.columns=df.columns.map(lambda x: re.sub(r'\(Z\)|\(Z\)', '', x))
mapping ={"不及格":-1,"不合格": -2, "及格": 888,"合格":999,"缺考":-3}
# mappingRev ={-1:"不及格",-2:"不合格", 888:"及格",999:"合格",-3:"缺考"}
df= df.replace(mapping)
df=df.drop(columns=df.iloc[:, 0:3].columns)
df=df.map(lambda e:float(e))
plt.figure(dpi=120)
plt.boxplot(x=df.values,notch=False,showmeans=True,patch_artist=False,tick_labels=df.columns)
plt.ylim(-3, 120)
plt.show()