子集选择——基于R语言实现(最优子集选择法、逐步回归法、Lasso回归法、交叉验证法)
( a )使用 rnorm()
函数生成预测变量X(n=100
)与噪声向量
ϵ
\epsilon
ϵ(n=100)
set.seed(1)
x<-rnorm(100)#预测变量X
eps<-rnorm(100)#噪声向量$\epsilon$
( b ) 生成响应变量Y(n=100
),
Y
=
β
0
+
β
1
X
+
β
2
X
2
+
β
3
X
3
+
ϵ
Y=\beta_0+\beta_1X+\beta_2X^{2}+\beta_3X^{3}+\epsilon
Y=β0+β1X+β2X2+β3X3+ϵ,在本次实验中,设定
β
0
=
1
,
β
1
=
2
,
β
2
=
3
,
β
3
=
4
\beta_0=1,\beta_1=2,\beta_2=3,\beta_3=4
β0=1,β1=2,β2=3,β3=4
y=1+2*x+3*x^2+4*x^3+eps#系数为1,2,3,4
( c ) 最优子集法
-
( c )
regsubsets()
实现最优子集算法,从包含 X 0 , X 1 , . . . , X 10 X^{0},X^{1},...,X^{10} X0,X1,...,X10的模型中选出最优的模型; -
根据 C p , B I C , A d j u s t R 2 C_p,BIC,Adjust R^{2} Cp,BIC,AdjustR2选择出最优模型;
-
给出最优子集模型的系数估计值。
library(leaps)
best=data.frame(y,x)#将x,y转化为数据框
#选择最优子集,子集中的变量为X的10次多项式,nvmax=10表示最多选择10个变量
b=regsubsets(y~poly(x,10,raw = T),data=best,nvmax=10)
s=summary(b)
print(s) #print the summary of the model,通过查看summary(b)的结果,*表示变量被选入
> print(s)#print the summary of the model,通过查看summary(b)的结果,*表示变量被选入
Subset selection object
Call: regsubsets.formula(y ~ poly(x, 10, raw = T), data = best, nvmax = 10)
10 Variables (and intercept)
Forced in Forced out
poly(x, 10, raw = T)1 FALSE FALSE
poly(x, 10, raw = T)2 FALSE FALSE
poly(x, 10, raw = T)3 FALSE FALSE
poly(x, 10, raw = T)4 FALSE FALSE
poly(x, 10, raw = T)5 FALSE FALSE
poly(x, 10, raw = T)6 FALSE FALSE
poly(x, 10, raw = T)7 FALSE FALSE
poly(x, 10, raw = T)8 FALSE FALSE
poly(x, 10, raw = T)9 FALSE FALSE
poly(x, 10, raw = T)10 FALSE FALSE
1 subsets of each size up to 10
Selection Algorithm: exhaustive
poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1 ( 1 ) " " " " "*"
2 ( 1 ) " " "*" "*"
3 ( 1 ) "*" "*" "*"
4 ( 1 ) "*" "*" "*"
5 ( 1 ) "*" "*" "*"
6 ( 1 ) "*" "*" "*"
7 ( 1 ) "*" "*" "*"
8 ( 1 ) "*" "*" "*"
9 ( 1 ) "*" "*" "*"
10 ( 1 ) "*" "*" "*"
poly(x, 10, raw = T)4 poly(x, 10, raw = T)5 poly(x, 10, raw = T)6
1 ( 1 ) " " " " " "
2 ( 1 ) " " " " " "
3 ( 1 ) " " " " " "
4 ( 1 ) " " "*" " "
5 ( 1 ) " " "*" "*"
6 ( 1 ) " " " " " "
7 ( 1 ) " " "*" "*"
8 ( 1 ) "*" " " "*"
9 ( 1 ) "*" "*" "*"
10 ( 1 ) "*" "*" "*"
poly(x, 10, raw = T)7 poly(x, 10, raw = T)8 poly(x, 10, raw = T)9
1 ( 1 ) " " " " " "
2 ( 1 ) " " " " " "
3 ( 1 ) " " " " " "
4 ( 1 ) " " " " " "
5 ( 1 ) " " " " " "
6 ( 1 ) "*" "*" "*"
7 ( 1 ) " " "*" " "
8 ( 1 ) " " "*" "*"
9 ( 1 ) " " "*" "*"
10 ( 1 ) "*" "*" "*"
poly(x, 10, raw = T)10
1 ( 1 ) " "
2 ( 1 ) " "
3 ( 1 ) " "
4 ( 1 ) " "
5 ( 1 ) " "
6 ( 1 ) " "
7 ( 1 ) "*"
8 ( 1 ) "*"
9 ( 1 ) "*"
10 ( 1 ) "*"
通过查看summary(b)的结果,
*
表示变量被选入,被选入模型的三次多项式和四次多项式
names(summary(b))#查看summary(b)的属性
> names(summary(b))#查看summary(b)的属性
[1] "which" "rsq" "rss" "adjr2" "cp" "bic" "outmat" "obj"
s$which #查看哪些变量被选入模型
> s$which#查看哪些变量被选入模型
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1 TRUE FALSE FALSE TRUE
2 TRUE FALSE TRUE TRUE
3 TRUE TRUE TRUE TRUE
4 TRUE TRUE TRUE TRUE
5 TRUE TRUE TRUE TRUE
6 TRUE TRUE TRUE TRUE
7 TRUE TRUE TRUE TRUE
8 TRUE TRUE TRUE TRUE
9 TRUE TRUE TRUE TRUE
10 TRUE TRUE TRUE TRUE
poly(x, 10, raw = T)4 poly(x, 10, raw = T)5 poly(x, 10, raw = T)6 poly(x, 10, raw = T)7
1 FALSE FALSE FALSE FALSE
2 FALSE FALSE FALSE FALSE
3 FALSE FALSE FALSE FALSE
4 FALSE TRUE FALSE FALSE
5 FALSE TRUE TRUE FALSE
6 FALSE FALSE FALSE TRUE
7 FALSE TRUE TRUE FALSE
8 TRUE FALSE TRUE FALSE
9 TRUE TRUE TRUE FALSE
10 TRUE TRUE TRUE TRUE
poly(x, 10, raw = T)8 poly(x, 10, raw = T)9 poly(x, 10, raw = T)10
1 FALSE FALSE FALSE
2 FALSE FALSE FALSE
3 FALSE FALSE FALSE
4 FALSE FALSE FALSE
5 FALSE FALSE FALSE
6 TRUE TRUE FALSE
7 TRUE FALSE TRUE
8 TRUE TRUE TRUE
9 TRUE TRUE TRUE
10 TRUE TRUE TRUE
s$cp#选择cp最小的进入模型
s$bic#选择BIC最小的进入模型
s$adjr2#选择adjr2最大的进入模型
which.min(s$cp)
which.min(s$bic)
which.max(s$adjr2)
coefficients(b,id=3)
coefficients(b,id=4)
par(mfrow=c(2,2))
plot(b,scale="bic")
plot(1:10,summary(b)$cp,type="b")
plot(1:10,summary(b)$bic,type="b")
plot(1:10,summary(b)$adjr2,type="b")
> s$cp#选择cp最小的进入模型
[1] 1123.2892318 109.3256041 2.1859433 0.6067483 2.1782005 3.9955812 5.7869063
[8] 7.1694092 9.1535580 11.0000000
> s$bic#选择BIC最小的进入模型
[1] -262.7744 -437.2907 -509.6393 -508.9084 -504.7773 -500.3748 -496.0018 -492.0868 -487.4994
[10] -483.0666
> s$adjr2#选择adjr2最大的进入模型
[1] 0.9334429 0.9887867 0.9947516 0.9948979 0.9948680 0.9948233 0.9947792 0.9947581 0.9947008
[10] 0.9946505
> which.min(s$cp)
[1] 4
> which.min(s$bic)
[1] 3
> which.max(s$adjr2)
[1] 4
#画图
par(mfrow=c(2,2))
plot(b,scale="bic",main="BIC")
plot(1:10,summary(b)$cp,type="b")
points(4, s$cp[3], pch=4, col="red", lwd=7)
plot(1:10,summary(b)$bic,type="b")
points(3, s$bic[3], pch=4, col="red", lwd=7)
plot(1:10,summary(b)$adjr2,type="b",)
points(4, s$adjr2[3], pch=4, col="red", lwd=7)
通过最优子集法分析,并通过图像得出,根据
B
I
C
BIC
BIC选择出来的最优模型为
y
=
ϵ
+
β
0
+
β
1
x
+
β
2
x
2
+
β
3
x
3
y=\epsilon+\beta_0+\beta_1x+\beta_2x^2+\beta_3x^3
y=ϵ+β0+β1x+β2x2+β3x3;根据
C
p
,
A
d
j
u
s
t
R
2
C_p,Adjust R^2
Cp,AdjustR2选择出来的最优模型为
y
=
ϵ
+
β
0
+
β
1
x
+
β
2
x
2
+
β
3
x
3
+
β
4
x
5
y=\epsilon+\beta_0+\beta_1x+\beta_2x^2+\beta_3x^3+\beta_4x^5
y=ϵ+β0+β1x+β2x2+β3x3+β4x5。
> coefficients(b,id=3)#选择BIC最小的进入模型
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1.061507 1.975280 2.876209 4.017639
> coefficients(b,id=4)#选择cp,Adjust R2最小的进入模型
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1.07200775 2.38745596 2.84575641 3.55797426
poly(x, 10, raw = T)5
0.08072292
通过最优子集法分析,并通过参数的估计得出,根据 B I C BIC BIC选择出来的最优模型为 y = ϵ + 1.06 + 1.98 x + 2.88 x 2 + 4.02 x 3 y=\epsilon+1.06+1.98x+2.88x^2+4.02x^3 y=ϵ+1.06+1.98x+2.88x2+4.02x3,这个模型与问题(b)中的结果十分接近;根据 C p , A d j u s t R 2 C_p,Adjust R^2 Cp,AdjustR2选择出来的最优模型为 y = ϵ + 1.07 + 2.39 x + 2.85 x 2 + 3.56 x 3 + 0.08 x 5 y=\epsilon+1.07+2.39x+2.85x^2+3.56x^3+0.08x^5 y=ϵ+1.07+2.39x+2.85x2+3.56x3+0.08x5。
(d)逐步选择
1 向前逐步选择
fit2=regsubsets(y~poly(x,10,raw = T),method="forward",data=best)
s2=summary(fit2)
which.min(s2$cp)
which.min(s2$bic)
which.max(s2$adjr2)
> fit2=regsubsets(y~poly(x,10,raw = T),method="forward",data=best)
> s2=summary(fit2)
> which.min(s2$cp)
[1] 4
> which.min(s2$bic)
[1] 3
> which.max(s2$adjr2)
[1] 4
向前逐步选择结果与(c)得出的最优拟合模型一致,根据 B I C BIC BIC选择出来的最优模型为 y = ϵ + β 0 + β 1 x + β 2 x 2 + β 3 x 3 y=\epsilon+\beta_0+\beta_1x+\beta_2x^2+\beta_3x^3 y=ϵ+β0+β1x+β2x2+β3x3;根据 C p , A d j u s t R 2 C_p,Adjust R^2 Cp,AdjustR2选择出来的最优模型为 y = ϵ + β 0 + β 1 x + β 2 x 2 + β 3 x 3 + β 4 x 9 y=\epsilon+\beta_0+\beta_1x+\beta_2x^2+\beta_3x^3+\beta_4x^9 y=ϵ+β0+β1x+β2x2+β3x3+β4x9。
2 向后逐步选择
fit3=regsubsets(y~poly(x,10,raw = T),method="backward",data=best,nvmax = 10)
s3=summary(fit3)
which.min(s3$cp)
which.min(s3$bic)
which.max(s3$adjr2)
> fit3=regsubsets(y~poly(x,10,raw = T),method="backward",data=best,nvmax = 10)
> s3=summary(fit3)
> which.min(s3$cp)
[1] 4
> which.min(s3$bic)
[1] 3
> which.max(s3$adjr2)
[1] 4
向后逐步选择结果与(c)得出的最优拟合模型一致,根据 B I C BIC BIC选择出来的最优模型为 y = ϵ + β 0 + β 1 x + β 2 x 2 + β 3 x 3 y=\epsilon+\beta_0+\beta_1x+\beta_2x^2+\beta_3x^3 y=ϵ+β0+β1x+β2x2+β3x3;根据 C p , A d j u s t R 2 C_p,Adjust R^2 Cp,AdjustR2选择出来的最优模型为 y = ϵ + β 0 + β 1 x + β 2 x 2 + β 3 x 3 + β 4 x 9 y=\epsilon+\beta_0+\beta_1x+\beta_2x^2+\beta_3x^3+\beta_4x^9 y=ϵ+β0+β1x+β2x2+β3x3+β4x9。
3 两种方法比较——向前逐步选择与向后逐步选择
3.1绘图
#向前逐步选择
par(mfrow=c(2,3))
plot(1:10,s2$cp,type="b")
points(4, s2$cp[3], pch=4, col="red", lwd=7)
plot(1:10,s2$bic,type="b")
points(3, s2$bic[3], pch=4, col="red", lwd=7)
plot(1:10,s2$adjr2,type="b")
points(4, s2$adjr2[3], pch=4, col="red", lwd=7)
#向后逐步选择
plot(1:10,s3$cp,type="b")
points(4, s3$cp[3], pch=4, col="red", lwd=7)
plot(1:10,s3$bic,type="b")
points(3, s3$bic[3], pch=4, col="red", lwd=7)
plot(1:10,s3$adjr2,type="b")
points(4, s3$adjr2[3], pch=4, col="red", lwd=7)
3.2 求解模型系数
#三次拟合模型
coefficients(fit2,id=3)#向前逐步选择
coefficients(fit3,id=3)#向后逐步选择
#四次拟合模型
coefficients(fit2,id=4)#向前逐步选择
coefficients(fit3,id=4)#向后逐步选择
> #两种方法比较
> coefficients(fit2,id=3)#向前逐步选择
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1.061507 1.975280 2.876209 4.017639
> coefficients(fit3,id=3)#向后逐步选择
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1.061507 1.975280 2.876209 4.017639
> coefficients(fit2,id=4)#向前逐步选择
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1.07200775 2.38745596 2.84575641 3.55797426
poly(x, 10, raw = T)5
0.08072292
> coefficients(fit3,id=4)#向后逐步选择
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1.079236362 2.231905828 2.833494180 3.819555807
poly(x, 10, raw = T)9
0.001290827
对于向前逐步选择,向前逐步选择结果与(c)得出的最优拟合模型大致相同,根据 B I C BIC BIC选择出来的最优模型为 y = ϵ + 1.06 + 1.98 x + 2.88 x 2 + 4.02 x 3 y=\epsilon+ 1.06+1.98x+2.88x^2+4.02x^3 y=ϵ+1.06+1.98x+2.88x2+4.02x3;根据 C p , A d j u s t R 2 C_p,Adjust R^2 Cp,AdjustR2选择出来的最优模型为 y = ϵ + 1.07 + 2.39 x + 2.85 x 2 + 3.56 x 3 + 0.08 x 9 y=\epsilon+1.07+2.39x+2.85x^2+3.56x^3+0.08x^9 y=ϵ+1.07+2.39x+2.85x2+3.56x3+0.08x9。
向后逐步回归结果与(c)得出的最优拟合模型大致相同,根据 B I C BIC BIC选择出来的最优模型为 y = ϵ + 1.06 + 1.98 x + 2.88 x 2 + 4.02 x 3 y=\epsilon+1.06+1.98x+2.88x^2+4.02x^3 y=ϵ+1.06+1.98x+2.88x2+4.02x3;根据 C p , A d j u s t R 2 C_p,Adjust R^2 Cp,AdjustR2选择出来的最优模型为 y = ϵ + 1.07 + 2.39 x + 2.85 x 2 + 3.56 x 3 + 0.08 x 9 y=\epsilon+1.07+2.39x+2.85x^2+3.56x^3+0.08x^9 y=ϵ+1.07+2.39x+2.85x2+3.56x3+0.08x9。
(e)lasso法选择:交叉验证法求出最优 λ \lambda λ,并对系数进行估计。
x=model.matrix(y~poly(x,10,raw = T),data=best)#将数据转化为矩阵
print(x)#输出x的值
#去除最后一列
xmat=model.matrix(y~poly(x,10,raw = T),data=best)[,-1]#去除第一列
print(xmat)
library(glmnet)#加载glmnet包
grid=10^seq(10,-2,length=100)#生成一个从10^10到10^-2的长度为100的等比数列,用于lambda的选择
set.seed(1)#设置随机种子,保证每次结果一样
#交叉验证法求出最优lambda
fit5=cv.glmnet(xmat,y,nfolds =5,alpha=1,lambda = grid)#交叉验证,alpha=1表示lasso回归,alpha=0代表岭回归,lambda=grid表示lambda的选择范围
plot(fit5)#画图
#求解最优lambda,交叉验证法求出最优lambda
bestlambda=fit5$lambda.min#选择最优的lambda,交叉验证选择最小的lambda,lamda.min=6
bestlambda
> bestlambda
[1] 0.07054802
通过交叉验证法求出最优 λ = 0.07 \lambda=0.07 λ=0.07
fit6=glmnet(xmat,y,alpha=1)#拟合模型lasso回归
coefficients(fit6,s=bestlambda)#输出系数
> fit6=glmnet(xmat,y,alpha=1)#拟合模型lasso回归
> coefficients(fit6,s=bestlambda)#输出系数
11 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) 1.178301396
poly(x, 10, raw = T)1 2.142635982
poly(x, 10, raw = T)2 2.628493946
poly(x, 10, raw = T)3 3.812038946
poly(x, 10, raw = T)4 0.042147458
poly(x, 10, raw = T)5 0.012647742
poly(x, 10, raw = T)6 .
poly(x, 10, raw = T)7 0.003884896
poly(x, 10, raw = T)8 .
poly(x, 10, raw = T)9 .
poly(x, 10, raw = T)10 .
通过分析得出有6个变量不等于0,说明使用lasso法筛选出来6个变量,得出拟合模型为 y = ϵ + 1.17 + 2.14 x + 2.63 x 2 + 3.81 x 3 + 0.04 x 4 + 0.01 x 5 + 0.004 x 7 y=\epsilon+1.17+2.14x+2.63x^2+3.81x^3+0.04x^4+0.01x^5+0.004x^7 y=ϵ+1.17+2.14x+2.63x2+3.81x3+0.04x4+0.01x5+0.004x7.
(f) 现在依据 Y = b β 0 + β 7 X 7 + ϵ Y=b\beta_0+\beta_7X^{7}+\epsilon Y=bβ0+β7X7+ϵ产生响应变量Y,使用最优子集选择法与lasso法,对比分析。
1 产生响应变量Y
y1=1+2*x^7+eps
best1=data.frame(y1,x)
2最优子集选择法
fit7=regsubsets(y1~poly(x,10,raw = T),data=best1,nvmax=10)
s7=summary(fit7)
which.min(s7$cp)
which.min(s7$bic)
which.max(s7$adjr2)
coefficients(fit7,id=2)
coefficients(fit7,id=1)
coefficients(fit7,id=4)
> set.seed(1)
> x=rnorm(100)
> eps=rnorm(100)
> #(f)
> y1=1+2*x^7+eps
> best1=data.frame(y1,x)
> library(leaps)
> fit7=regsubsets(y1~poly(x,10,raw = T),data=best1,nvmax=10)
> s7=summary(fit7)
> which.min(s7$cp)
[1] 2
> which.min(s7$bic)
[1] 1
> which.max(s7$adjr2)
[1] 4
> coefficients(fit7,id=2)
(Intercept) poly(x, 10, raw = T)2 poly(x, 10, raw = T)7
1.0704904 -0.1417084 2.0015552
> coefficients(fit7,id=1)
(Intercept) poly(x, 10, raw = T)7
0.9589402 2.0007705
> coefficients(fit7,id=4)
(Intercept) poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1.0762524 0.2914016 -0.1617671 -0.2526527
poly(x, 10, raw = T)7
2.0091338
通过最优子集选择法,并通过参数的估计得出,根据 B I C BIC BIC选择出来的最优模型为 y = ϵ + 0.96 + 2 x 7 y=\epsilon+0.96+2x^7 y=ϵ+0.96+2x7,这个模型与假设的结果十分接近;根据 C p C_p Cp选择出来的最优模型为y= ϵ + 1.07 − 0.14 x 2 + 2 x 7 \epsilon+1.07-0.14x^2+2x^7 ϵ+1.07−0.14x2+2x7,,根据 A d j u s t R 2 Adjust R^2 AdjustR2选择出来的最优模型为y= ϵ + 1.08 + 0.29 x − 0.16 x 2 − 0.25 x 3 + 2 x 7 \epsilon+1.08+0.29x-0.16x^2-0.25x^3+2x^7 ϵ+1.08+0.29x−0.16x2−0.25x3+2x7.
par(mfrow=c(2,2))
plot(fit7,scale="bic")
plot(1:10,s7$cp,type="b")
points(2, s7$cp[2], pch=4, col="red", lwd=7)
plot(1:10,s7$bic,type="b")
points(1, s7$bic[1], pch=4, col="red", lwd=7)
plot(1:10,s7$adjr2,type="b")
points(4, s7$adjr2[4], pch=4, col="red", lwd=7)
3 lasso法
3.1 交叉验证法
library(glmnet)
xmat=model.matrix(y1~poly(x,10,raw = T),data=best1)[,-1]
set.seed(1)
#nfolds=5表示5折交叉验证,alpha=1表示lasso回归,lambda=grid表示lambda的选择范围
grid=10^seq(10,-2,length=100)
fit8=cv.glmnet(xmat,y1,nfolds =5,alpha=1,lambda = grid)
plot(fit8)
best.lambda=fit8$lambda.min
best.lambda
> best.lambda
[1] 0.05336699
predict(fit8, s = best.lambda, type = "coefficients")
> predict(fit8, s = best.lambda, type = "coefficients")
11 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) 1.0247546165
poly(x, 10, raw = T)1 .
poly(x, 10, raw = T)2 -0.0810199644
poly(x, 10, raw = T)3 .
poly(x, 10, raw = T)4 .
poly(x, 10, raw = T)5 0.0004449398
poly(x, 10, raw = T)6 .
poly(x, 10, raw = T)7 1.9966707008
poly(x, 10, raw = T)8 .
poly(x, 10, raw = T)9 0.0007067609
poly(x, 10, raw = T)10 .
通过分析得出有4个变量不等于0,说明使用lasso法筛选出来6个变量,得出拟合模型为 y = ϵ + 1.02 − 0.08 x 2 + 0.0004 x 5 + 1.997 x 7 + 0.04 x 4 + 0.0007 x 9 y=\epsilon+ 1.02-0.08x^2+0.0004x^5+1.997x^7+0.04x^4+0.0007x^9 y=ϵ+1.02−0.08x2+0.0004x5+1.997x7+0.04x4+0.0007x9.
通过 Y = b β 0 + β 7 X 7 + ϵ Y=b\beta_0+\beta_7X^{7}+\epsilon Y=bβ0+β7X7+ϵ产生响应变量Y,使用最优子集选择法与lasso法,对比分析,得出通过最优子集选择法,并通过参数的估计得出,
- 根据 B I C BIC BIC选择出来的最优模型为 y = ϵ + 0.96 + 2 x 7 y=\epsilon+0.96+2x^7 y=ϵ+0.96+2x7,这个模型与假设的结果十分接近;
- 根据 C p C_p Cp选择出来的最优模型为y= ϵ + 1.07 − 0.14 x 2 + 2 x 7 \epsilon+1.07-0.14x^2+2x^7 ϵ+1.07−0.14x2+2x7;
- 根据 A d j u s t R 2 Adjust R^2 AdjustR2选择出来的最优模型为y= ϵ + 1.08 + 0.29 x − 0.16 x 2 − 0.25 x 3 + 2 x 7 \epsilon+1.08+0.29x-0.16x^2-0.25x^3+2x^7 ϵ+1.08+0.29x−0.16x2−0.25x3+2x7.
通过lasso法分析得出有4个变量不等于0,说明使用lasso法筛选出来4个变量,得出拟合模型为 y = ϵ + 1.02 − 0.08 x 2 + 0.0004 x 5 + 1.997 x 7 + 0.0007 x 9 y=\epsilon+ 1.02-0.08x^2+0.0004x^5+1.997x^7+0.0007x^9 y=ϵ+1.02−0.08x2+0.0004x5+1.997x7+0.0007x9.