Oran Chan Nov-27-2020
In this simulation study we will investigate the significance of regression test. We will simulate from two different models:
- The “significant” model
- The “non-significant” model
For both, we will consider a sample size of
and three possible
levels of noise. That is, three values of
.
Simulation will be used to obtain an empirical distribution for each of
the following values, for each of the three values of
, for
both models.
- The
statistic for the significance of regression test.
- The p-value for the significance of regression test
# Seed
birthday = 19380301
set.seed(birthday)Data found in study_1.csv will be the values of the
predictors. These will be kept constant for the entirety of this study.
The y values in this data are a blank placeholder.
# Load data source
library(readr)
study1 = read.csv('study_1.csv')
str(study1)## 'data.frame': 25 obs. of 4 variables:
## $ y : int 0 0 0 0 0 0 0 0 0 0 ...
## $ x1: num -1.54 -1.61 -4.56 -2.28 -2.76 -3.01 0.69 -3.31 -3.53 -4.12 ...
## $ x2: num 3 2.9 2.9 2.5 3 2.9 2.1 2.7 2.2 2.8 ...
## $ x3: num 2.96 2.71 2.62 2.41 2.77 ...
# GLobal variables
n = 25
p = 3
sigma = c(1, 5, 10)
x = study1
sim_cycle = 2000For each model and
combination, there will be
simulations.
For each simulation, we will fit a regression model of the same form
used to perform the simulation.
The y vector will be simulated times.
initialize_var = function(cycle){
ph = rep(0,cycle)
fstat_sig <<- data.frame(sigma_1 = ph, sigma_2 = ph, sigma_3 = ph)
pvalue_sig <<-data.frame(sigma_1 = ph, sigma_2 = ph, sigma_3 = ph)
R2_sig <<- data.frame(sigma_1 = ph, sigma_2 = ph, sigma_3 = ph)
fstat_nsig <<- data.frame(sigma_1 = ph, sigma_2 = ph, sigma_3 = ph)
pvalue_nsig <<- data.frame(sigma_1 = ph, sigma_2 = ph, sigma_3 = ph)
R2_nsig <<- data.frame(sigma_1 = ph, sigma_2 = ph, sigma_3 = ph)
p <<- 3
}
# Generate sim data
sim_mlr = function(x, sigma = 1, model = 'sig') {
if (model == 'sig') {
beta_0 = 3
beta_1 = 1
beta_2 = 1
beta_3 = 1
} else{
beta_0 = 3
beta_1 = 0
beta_2 = 0
beta_3 = 0
}
n = nrow(x)
x = x[, -1] #remove 1st column(y value)
ones = rep(1, n)
X = as.matrix(cbind(ones, x))
epsilon = rnorm(n, mean = 0, sd = sigma)
B = rbind(beta_0, beta_1, beta_2, beta_3)
Y = X %*% B + epsilon
data.frame(y = Y, predictor = x
)
}# Simulation
simulate = function(x) {
for (i in 1:3) { # 3 sigmas
for (j in 1:sim_cycle) {
sim_data = sim_mlr(x, sigma[i], 'sig')
model = lm(y ~ ., sim_data)
#fstat_sig[[i]][j] = summary(model)$f[1]
fstat_sig[j, i] <<- summary(model)$f[1]
pvalue_sig[j, i] <<-
pf(
summary(model)$f[1],
df1 = summary(model)$f[2],
df2 = summary(model)$f[3],
lower.tail = FALSE
)
R2_sig[j, i] <<- summary(model)$r.squared
sim_data = sim_mlr(x, sigma[i], 'nsig')
model = lm(y ~ ., sim_data)
fstat_nsig[j, i] <<- summary(model)$f[1]
pvalue_nsig[j, i] <<-
pf(
summary(model)$f[1],
df1 = summary(model)$f[2],
df2 = summary(model)$f[3],
lower.tail = FALSE
)
R2_nsig[j, i] <<- summary(model)$r.squared
} #sim_cycle
} #sigma
}- Simulation results will be plotted by below functions
# F-statistics Histogram
fstat_hist = function(df1, df2, sig=FALSE, nsig=FALSE, curve=FALSE){
# set height of histogram for true distribution curve
t_ylim = ifelse(curve == FALSE, 0.5, 0.8)
if(sig==TRUE){
hist(fstat_sig[[1]], freq=FALSE, breaks='Scott', ylim=c(0,t_ylim), xlim=c(-0.1,75),
main='F-Statistic of Sigma = 1', xlab='Significant Model', col='cornflowerblue')
if (curve==TRUE){curve(df(x, df1=df1, df2=df2), col='red', add=TRUE)}
hist(fstat_sig[[2]], freq=FALSE, breaks='Scott', ylim=c(0,t_ylim), xlim=c(-0.1,20),
main='Sigma = 5', xlab='Significant Model', ylab='', col='cornflowerblue')
if (curve==TRUE){curve(df(x, df1=df1, df2=df2), col='red', add=TRUE)}
hist(fstat_sig[[3]], freq=FALSE, breaks='Scott', ylim=c(0,t_ylim), xlim=c(-0.1,20),
main='Sigma = 10', xlab='Significant Model', ylab='', col='cornflowerblue')
if (curve==TRUE){curve(df(x, df1=df1, df2=df2), col='red', add=TRUE)}
}
if(nsig==TRUE){
hist(fstat_nsig[[1]], freq=FALSE, breaks=50, xlim=c(-0.1,12), ylim=c(0,1),
main='F-Statistic of Sigma = 1', xlab='Non - significant Model', col='lightgreen')
if (curve==TRUE){curve(df(x, df1=df1, df2=df2), col='red', add=TRUE)}
hist(fstat_nsig[[2]], freq=FALSE, breaks=50, xlim=c(-0.1,12), ylim=c(0,1),
main='Sigma = 5', xlab='Non - significant Model', col='lightgreen')
if (curve==TRUE){curve(df(x, df1=df1, df2=df2), col='red', add=TRUE)}
hist(fstat_nsig[[3]], freq=FALSE, breaks=50, xlim=c(-0.1,12), ylim=c(0,1),
main='Sigma = 10', xlab='Non - significant Model', col='lightgreen')
if (curve==TRUE){curve(df(x, df1=df1, df2=df2), col='red', add=TRUE)}
}
}
# R2 Histogram
R2_hist = function(df1, df2, sig=FALSE, nsig=FALSE){
if(sig==TRUE){
hist(R2_sig[[1]], freq=FALSE, breaks=50, xlim = (c(0,0.8)), ylim=c(0,8),
main='R2 of Sigma = 1', xlab='Significant Model', col='cadetblue1')
hist(R2_sig[[2]], freq=FALSE, breaks=50, xlim = (c(0,0.8)), ylim=c(0,8),
main='Sigma = 5', xlab='Significant Model', ylab='', col='cadetblue1')
hist(R2_sig[[3]], freq=FALSE, breaks=50, xlim = (c(0,0.8)), ylim=c(0,8),
main='Sigma = 10', xlab='Significant Model', ylab='', col='cadetblue1')
}
if(nsig==TRUE){
hist(R2_nsig[[1]], freq = FALSE, breaks = 50, xlim = (c(0, 0.8)), ylim = c(0, 8),
main = 'R2 of Sigma = 1', xlab = 'Non - significant Model', col = 'green')
hist(R2_nsig[[2]], freq = FALSE, breaks = 50, xlim = (c(0, 0.8)), ylim = c(0, 8),
main = 'Sigma = 5', xlab = 'Non - significant Model', col = 'green')
hist(R2_nsig[[3]], freq = FALSE, breaks = 50, xlim = (c(0, 0.8)), ylim = c(0, 8),
main = 'Sigma = 10', xlab = 'Non - significant Model', col = 'green')
}
}
# P-value Histogram
pvalue_hist = function(df1,df2, sig=FALSE, nsig=FALSE){
if(sig==TRUE){
hist(pvalue_sig[[1]], freq = TRUE, breaks = 50, ylim = c(0, 2000),
main = 'P-value of Sigma = 1', xlab = 'Significant Model',
col = 'darkblue', border = 'grey')
hist(pvalue_sig[[2]], freq = TRUE, breaks = 50,
main = 'Sigma = 5', xlab = 'Significant Model',
col = 'darkblue', border = 'grey')
hist(pvalue_sig[[3]], freq = TRUE, breaks = 50, ylim = c(0, 120),
main = 'Sigma = 10', xlab = 'Significant Model', col = 'darkblue',
border = 'grey')
}
if(nsig==TRUE){
hist(pvalue_nsig[[1]], freq = TRUE, breaks = 50,
main = 'P-value of Sigma = 1', xlab = 'Non - significant Model',
col = 'darkgreen', border = 'grey')
hist(pvalue_nsig[[2]], freq = TRUE, breaks = 50,
main = 'Sigma = 5', xlab = 'Non - significant Model',
col = 'darkgreen', border = 'grey')
hist(pvalue_nsig[[3]], freq = TRUE, breaks = 50,
main = 'Sigma = 10', xlab = 'Non - significant Model',
col = 'darkgreen', border = 'grey')
}
}
# Mean of Significant Model
mean_sig= function(){
df = data.frame('.' = c('F-statistics', 'R2', 'P-value'),
'Sigma=1' = c(mean(fstat_sig$sigma_1),
mean(R2_sig$sigma_1),
mean(pvalue_sig$sigma_1)),
'Sigma=5' = c(mean(fstat_sig$sigma_2),
mean(R2_sig$sigma_2),
mean(pvalue_sig$sigma_2)),
'Sigma=10' = c(mean(fstat_sig$sigma_3),
mean(R2_sig$sigma_3),
mean(pvalue_sig$sigma_3))
)
kable(df, 'markdown')
}
# Mean of Non-significant Model
mean_nsig = function(){
df = data.frame('.' = c('F-statistics', 'R2', 'P-value'),
'Sigma=1' = c(mean(fstat_nsig$sigma_1),
mean(R2_nsig$sigma_1),
mean(pvalue_nsig$sigma_1)),
'Sigma=5' = c(mean(fstat_nsig$sigma_2),
mean(R2_nsig$sigma_2),
mean(pvalue_nsig$sigma_2)),
'Sigma=10' = c(mean(fstat_nsig$sigma_3),
mean(R2_nsig$sigma_3),
mean(pvalue_nsig$sigma_3))
)
kable(df, 'markdown')
}#main
initialize_var(sim_cycle) #set no. of simulation cycle 2000
simulate(x) #simulate xdf1 = p - 1 #p-1
df2 = n - p #n-p
par(mfrow = c(2, 3))
fstat_hist(df1, df2, TRUE, TRUE)par(mfrow = c(2, 3))
R2_hist(df1, df2, TRUE, TRUE)par(mfrow = c(2, 3))
pvalue_hist(df1, df2, TRUE, TRUE)- Since know the true distribution of F-statistics with two of the degress of freedom, it has been plotted with the empirical value in the F-statistics grid of plots in coming discussion.
- F-statistics has a distribution under null hypothesis. It means that assuming null hypothesis is true, the test statistics follows an F-distribution.
Since F-statistics has the above assumption, the empirical value of “significant” model does not follow the true distribution.
- We can see in the plot on the left below, most of the F-value fall in the range from 20 to 50, which are considered as some very large F values. Such large F value corresponds to a very small p-value, that is to reject the null hypothesis.
- When we look at the plot of
for
=1, majority of value falls above 0.7. It means the althernative hypothesis model is explaining well by the predictors on the relationship with the response. It is also a indicator for the decision to reject
.
- The average F-statistics,
and p-value are listed for each
.
par(mfrow = c(2, 3))
fstat_hist(df1, df2, sig = TRUE, curve = TRUE)
R2_hist(df1, df2, sig = TRUE)# Significant Model
mean_sig() # mean values of F-statistics, R2 and P-value| . | Sigma.1 | Sigma.5 | Sigma.10 |
|---|---|---|---|
| F-statistics | 41.4048 | 2.7377 | 1.4859 |
| R2 | 0.8402 | 0.2535 | 0.1591 |
| P-value | 0.0000 | 0.2090 | 0.4054 |
- In the above plots of F-value for the other two
(s), the empirical values lean to the left of the graph, which represents majority of the F-value are small. It also means the corresponding p-value becomes larger.
- In the plots of
, majority of values fall in the range of 0.1 to 0.3 for
=5, 0.05 to 0.15 for
=10. The model with these
do not performe well in explaining the relatinoship.
- With sufficiently larger p-value, the chance of FTR
will be much higher than that with
=1.
- They partially align with the characteristic of F-distribution. Nonetheless, even the empirical values becomes closer to the true value, it is still far from it. We will see how it goes with “Non-significant” model in the next section.
P-value distribution of 3
varies.
- In the first plots with
= 1, as discussed above, it comes with a critically small p-value so the decision is to reject the null hypothesis. The peak close to zero as shown in the plot is where the alternative hypothesis located.
par(mfrow = c(1, 3))
pvalue_hist(df1, df2, TRUE)- In the second plot with
= 5. More p-value is falling into larger values, which contribute to null hypothesis. The frequency of the value close to zero is lower comparing to the 1st plot.
- We should be reminded that null hypothesis contains low p-value as well.
- In the thrid plot with
= 10. Even p-value is falling into larger values, we can see a close to uniform distribution when
. For p-value < 0.1, frequency from 0 to 40 might contain null hypothesis as well.
- The interpretation of p-value aligns with our previous explaination
that the chance of FTR
will be much higher.
par(mfrow = c(2, 3))
fstat_hist(df1, df2, nsig = TRUE, curve = TRUE)
R2_hist(df1, df2, nsig = TRUE)# Non - Significant Model
mean_nsig() # mean values of F-statistics, R2 and P-value| . | Sigma.1 | Sigma.5 | Sigma.10 |
|---|---|---|---|
| F-statistics | 1.0604 | 1.1039 | 1.1077 |
| R2 | 0.1205 | 0.1248 | 0.1249 |
| P-value | 0.5159 | 0.5011 | 0.5019 |
- As shown from the plots above, the F-value distribution of all
are critically close to the true F-distribution.
- The average F values are rougly between 1.1 and 1.2, which is considered very small comparning to the significant model.
- The average
of all
(s) are very small. It implies the “non-significant” model is not performing well in explaining the relationship between the predictors and reponse.
- The average P-value of all
(s) are considered very large as they are close to 0.5.It alignes with the small F-values. Therefore, the decision is very likely to be FTR the null hypothesis.
- Hence, the very similar behaviour between the true F-distrbution and the “non-significant” model could be explained by the above findings.
- We can also interpret the result by looking at setting of
. As they are all zero, so we expect the
is very likely to be true.
P-value distribution of 3
are
very similar.
- The P-value follow a uniform distribution in all three plots.
par(mfrow = c(1, 3))
pvalue_hist(df1, df2, nsig = TRUE)- It means that 5% of the p-value will be above the 9th percentile, another 5% will be between 90th and 95th percentile, and so forth. 5% of the value will be from 0 to 0.05, and another will be from 0.05 to 0.1.
- The uniformly distributed p-value is one of the definition of a p-value under null hypothesis.
- The above observation aligns with our discussed devision to be FTR
.
We will look into the relatinoship between
, and
statistic, the p-value,
.
- When we perform a significant test on regression, the relationship
varies differently according to the settings of the model.
Specifically, the relationship is affected by the value of
.
In “Significant” model, we have positive signal strength for
.
- When
becomes larger, the F-statistic distribution becomes closer to the true F-distribution. The value of
drops significantly and p-value increases significantly.
- We can interpret that, when
becomes larger in “Signigficant” model, it is more likely to be FTR
.
In “Non-Significant” model, we have zero signal strength for
and
= 3.
- When
varies, the effect on
statistic, the p-value and
are considerably small.
- Hence, given
= 3, under the settings of “Non-Significant”, we can interpret that,
has very slight influcence on
statistic, the p-value and
. The decision of FTR
would remain unchanged.
In this simulation study we will investigate how well this procedure works. Since splitting the data is random, we don’t expect it to work correctly each time. We could get unlucky. But averaged over many attempts, we should expect it to select the appropriate model.
We will simulate from the model
# Seed
set.seed(birthday)Data in study_2.csv will be the values of the
predictors. These will be kept constant for the entirety of this study.
The y values in this data are a blank placeholder.
# Load Data source
library(readr)
study2 = read.csv('study_2.csv')We will consider a sample size of
and three
possible levels of noise. That is, three values of
.
# GLobal variables
n = 500
sigma = c(1, 2, 4)
s2 = study2
sim_cycle = 1000
# RMSE for each sigma
RMSE_train = data.frame(sig1 = rep(0, 9), sig2 = rep(0, 9), sig3 = rep(0, 9))
RMSE_test = data.frame(sig1 = rep(0, 9), sig2 = rep(0, 9), sig3 = rep(0, 9))
# Selected Model size
Model_select_trn = matrix(0, sim_cycle, 3)
Model_select_tst = matrix(0, sim_cycle, 3)initialize_var = function(cycle){
sim_cycle <<- cycle
RMSE_train <<- data.frame(sig1 = rep(0, 9), sig2 = rep(0, 9), sig3 = rep(0, 9))
RMSE_test <<- data.frame(sig1 = rep(0, 9), sig2 = rep(0, 9), sig3 = rep(0, 9))
Model_select_trn <<- matrix(0, sim_cycle, 3)
Model_select_tst <<- matrix(0, sim_cycle, 3)
}
# Generate sim data
sim_mlr = function(x, sigma = 1) {
beta_0 = 0
beta_1 = 3
beta_2 = -4
beta_3 = 1.6
beta_4 = -1.1
beta_5 = 0.7
beta_6 = 0.5
beta_7 = 0
beta_8 = 0
beta_9 = 0
n = nrow(x)
ones = rep(1, n)
X = as.matrix(cbind(ones, x[,-1]))
epsilon = rnorm(n, mean = 0, sd = sigma)
B = as.matrix(rbind(beta_0, beta_1, beta_2, beta_3, beta_4,
beta_5, beta_6, beta_7, beta_8, beta_9))
y_hat = X %*% B + epsilon
data.frame(y = y_hat, x = x[, -1])
}
# RMSE
get_RMSE = function(actual, predicted) {
sqrt(mean((actual - predicted) ^ 2))
}Each time the data is simulated, they will be randomly split into train and test sets of equal sizes (250 observations for training, 250 observations for testing).
For each, we will fit nine models, with forms:
y ~ x1y ~ x1 + x2y ~ x1 + x2 + x3y ~ x1 + x2 + x3 + x4y ~ x1 + x2 + x3 + x4 + x5y ~ x1 + x2 + x3 + x4 + x5 + x6, the correct form of the model as noted abovey ~ x1 + x2 + x3 + x4 + x5 + x6 + x7y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9
For each model, we will calculate Train and Test RMSE.
- In each simluation, one out of nine models will be selected for each sigma.
- The one with lowest RMSE value in test set will be selected.
# Simulation
simulate = function(x){
RMSE_trn_tmp = matrix(0, 9, sim_cycle)
RMSE_tst_tmp = matrix(0, 9, sim_cycle)
for (i in 1:3) { # 3 sigmas
for (j in 1:sim_cycle) {
sim_data = sim_mlr(x, sigma[i])
#split data set
trn_idx = sample(1:nrow(sim_data), 250)
trn_data = sim_data[trn_idx, ]
test_data = sim_data[-trn_idx, ]
# fit model
model1 = lm(y ~ x.x1, trn_data)
model2 = lm(y ~ x.x1 + x.x2, trn_data)
model3 = lm(y ~ x.x1 + x.x2 + x.x3, trn_data)
model4 = lm(y ~ x.x1 + x.x2 + x.x3 + x.x4, trn_data)
model5 = lm(y ~ x.x1 + x.x2 + x.x3 + x.x4 + x.x5, trn_data)
model6 = lm(y ~ x.x1 + x.x2 + x.x3 + x.x4 + x.x5 + x.x6, trn_data)
model7 = lm(y ~ x.x1 + x.x2 + x.x3 + x.x4 + x.x5 + x.x6 + x.x7, trn_data)
model8 = lm(y ~ x.x1 + x.x2 + x.x3 + x.x4 + x.x5 + x.x6 + x.x7 + x.x8 , trn_data)
model9 = lm(y ~ x.x1 + x.x2 + x.x3 + x.x4 + x.x5 + x.x6 + x.x7 + x.x8 + x.x9, trn_data)
# RMSE - train data
RMSE_trn_tmp[1, j] = get_RMSE(trn_data$y, fitted(model1))
RMSE_trn_tmp[2, j] = get_RMSE(trn_data$y, fitted(model2))
RMSE_trn_tmp[3, j] = get_RMSE(trn_data$y, fitted(model3))
RMSE_trn_tmp[4, j] = get_RMSE(trn_data$y, fitted(model4))
RMSE_trn_tmp[5, j] = get_RMSE(trn_data$y, fitted(model5))
RMSE_trn_tmp[6, j] = get_RMSE(trn_data$y, fitted(model6))
RMSE_trn_tmp[7, j] = get_RMSE(trn_data$y, fitted(model7))
RMSE_trn_tmp[8, j] = get_RMSE(trn_data$y, fitted(model8))
RMSE_trn_tmp[9, j] = get_RMSE(trn_data$y, fitted(model9))
# RMSE - test data
RMSE_tst_tmp[1, j] = get_RMSE(test_data$y, predict(model1, test_data))
RMSE_tst_tmp[2, j] = get_RMSE(test_data$y, predict(model2, test_data))
RMSE_tst_tmp[3, j] = get_RMSE(test_data$y, predict(model3, test_data))
RMSE_tst_tmp[4, j] = get_RMSE(test_data$y, predict(model4, test_data))
RMSE_tst_tmp[5, j] = get_RMSE(test_data$y, predict(model5, test_data))
RMSE_tst_tmp[6, j] = get_RMSE(test_data$y, predict(model6, test_data))
RMSE_tst_tmp[7, j] = get_RMSE(test_data$y, predict(model7, test_data))
RMSE_tst_tmp[8, j] = get_RMSE(test_data$y, predict(model8, test_data))
RMSE_tst_tmp[9, j] = get_RMSE(test_data$y, predict(model9, test_data))
Model_select_trn[j,i] <<- which.min(RMSE_trn_tmp[, j])
Model_select_tst[j,i] <<- which.min(RMSE_tst_tmp[, j])
}
#complete 1 of 3 sigmas
for (k in 1:9) {
RMSE_train[k, i] <<- mean(RMSE_trn_tmp[k,])
RMSE_test[k, i] <<- mean(RMSE_tst_tmp[k,])
}
RMSE_trn_tmp = matrix(0, 9, sim_cycle)
RMSE_tst_tmp = matrix(0, 9, sim_cycle)
}
Model_num = seq(1:9)
RMSE_train <<- cbind(Model_num, RMSE_train)
RMSE_test <<- cbind(Model_num, RMSE_test)
}This above process will be repeated with
simulations
for each of the
values
of
.
For each value of
,
there will be a plot that shows how average Train RMSE and average Test
RMSE changes as a function of model size. We can also tell number of
times the model of each size was chosen for each value of
.
We will simulate the
vector
times and fit
models.
- Simulation results will be plotted by below functions
# Plot average RMSE as a function of Model size
plot_RMSE = function(show_diverge = FALSE, select_model = -1){
if (show_diverge == FALSE){
ylim1 = c(0.5, 4.7)
ylim2 = c(0.5, 4.7)
ylim3 = c(0.5, 4.7)
}else{
ylim1 = c(0.5, 2.5)
ylim2 = c(1.5, 3.5)
ylim3 = c(3.5, 5)
}
par(mfrow=c(1,3))
plot(sig1 ~ Model_num, RMSE_train, type = 'l', col = 'orange',
cex = 1, pch = 1, ylim = ylim1, main = 'Sigma = 1',
xlab = 'Model size', ylab = 'RMSE', lwd = 3)
lines(sig1 ~ Model_num, RMSE_test, col = 'blue', lty = 3, lwd = 2)
if (select_model > 0){abline(v = select_model, col = 'red', lty = 6, lwd = 2)}
legend('topright', c('Train', 'Test'), lty = c(1, 3), col = c('orange', 'blue'))
plot(sig2 ~ Model_num, RMSE_train, type = 'l', col = 'orange',
cex = 1, pch = 1, ylim = ylim2, main = 'Sigma = 2',
xlab = 'Model size', ylab = 'RMSE', lwd = 3)
lines(sig2 ~ Model_num, RMSE_test, col = 'blue', lty = 3, lwd = 2)
if (select_model > 0){abline(v = select_model, col = 'red', lty = 6, lwd = 2)}
legend('topright', c('Train', 'Test'), lty = c(1, 3), col = c('orange', 'blue'))
plot(sig3 ~ Model_num, RMSE_train, type = 'l', col = 'orange',
cex = 1, pch = 1, ylim = ylim3, main = 'Sigma = 4',
xlab = 'Model size', ylab = 'RMSE', lwd = 3)
lines(sig3 ~ Model_num, RMSE_test, col = 'blue', lty = 3, lwd = 2)
if (select_model > 0){abline(v = select_model, col = 'red', lty = 6, lwd = 2)}
legend('topright', c('Train', 'Test'), lty = c(1, 3), col = c('orange', 'blue'))
}
# Print RMSE by sigma, model size and data set
print_by_sigma = function(){
RMSE_sig1 = unname(cbind(RMSE_train$Model_num, RMSE_train$sig1, RMSE_test$sig1))
RMSE_sig2 = unname(cbind(RMSE_train$sig2, RMSE_test$sig2))
RMSE_sig3 = unname(cbind(RMSE_train$sig3, RMSE_test$sig3))
df = data.frame(RMSE_sig1, RMSE_sig2, RMSE_sig3)
colnames(df) = c('Model size',
'Sigma.1.Train', 'Sigma.1.Test',
'Sigma.2.Train', 'Sigma.2.Test',
'Sigma.4.Train', 'Sigma.4.Test')
kable(df, "markdown")
}
# Print selected Model size of each sigma
print_model_selected = function(portion=FALSE, correct_model = 6){
sigma1 = table(factor(Model_select_tst[, 1], levels = 1:9))
sigma2 = table(factor(Model_select_tst[, 2], levels = 1:9))
sigma3 = table(factor(Model_select_tst[, 3], levels = 1:9))
combine = rbind(sigma1, sigma2, sigma3)
rownames(combine) = c('Sigma = 1', 'Sigma = 2', 'Sigma = 4')
if (portion == FALSE){
kable(combine, "markdown")
}else{
portion_sig1 = sigma1[correct_model] / sim_cycle
portion_sig2 = sigma2[correct_model] / sim_cycle
portion_sig3 = sigma3[correct_model] / sim_cycle
portion = rbind(portion_sig1, portion_sig2, portion_sig3)
rownames(portion) = c('Sigma = 1', 'Sigma = 2', 'Sigma = 4')
colnames(portion) = 'Portion of the correct Model Selected'
kable(portion, "markdown")
}
}#main
initialize_var(sim_cycle) #set no. of simulation cycle 1000
simulate(s2)
# Plot RMSE vs Model size
plot_RMSE()# Print RMSE value for each sigma, train and test set
print_by_sigma()| Model size | Sigma.1.Train | Sigma.1.Test | Sigma.2.Train | Sigma.2.Test | Sigma.4.Train | Sigma.4.Test |
|---|---|---|---|---|---|---|
| 1 | 2.5665 | 2.578 | 3.084 | 3.107 | 4.643 | 4.667 |
| 2 | 1.5885 | 1.608 | 2.335 | 2.364 | 4.177 | 4.208 |
| 3 | 1.1642 | 1.180 | 2.069 | 2.102 | 4.025 | 4.074 |
| 4 | 1.1276 | 1.147 | 2.046 | 2.088 | 4.006 | 4.073 |
| 5 | 1.1115 | 1.137 | 2.035 | 2.085 | 3.994 | 4.076 |
| 6 | 0.9861 | 1.014 | 1.970 | 2.025 | 3.954 | 4.055 |
| 7 | 0.9841 | 1.016 | 1.966 | 2.030 | 3.946 | 4.063 |
| 8 | 0.9822 | 1.018 | 1.962 | 2.034 | 3.938 | 4.071 |
| 9 | 0.9803 | 1.020 | 1.958 | 2.038 | 3.930 | 4.079 |
We will discuss whether the RMSE method always select the correct model.
- RMSE can be interpreted as the smaller the better.
- From the above 3 plots, we can see a considerably high RMSE value when model size is 1 and 2. It is a case of underfitting for both train and test set.
- RMSE value starts to diminish slowely after sitting at the model with size 3.
- Meanwhile, the test and train set RMSE start to diverge.
- Train set RMSE remains in a state of good performance and it is further improved at model with size 6. We don’t see any issue of underfitting as train set RMSE converges to the minimum at model with size 9.
- RMSE value starts to diminish slowely after model with size 3, and even slower than Train set RMSE.
- Test RMSE set converges to minimum point at model with size 6.
- However, it starts to rebound from model size 7 to 9. It is a case of overfitting.
# Zoom in the plots to show the effect of divergence
plot_RMSE(show_diverge = TRUE, select_model = 6)- Hence, we conclude that model with size 6 is the correct model for explaining the dataset. Although train set RMSE is the lowest for model 9, test set RMSE is higher, which indicates the model does not work well with new data and suffers from overfitting.
- In the above plots, the red line indicates the correct model size.
# No. of times a model is selected
print_model_selected()| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|
| Sigma = 1 | 0 | 0 | 0 | 0 | 1 | 528 | 203 | 122 | 146 |
| Sigma = 2 | 0 | 0 | 30 | 12 | 14 | 524 | 196 | 110 | 114 |
| Sigma = 4 | 0 | 23 | 171 | 107 | 65 | 316 | 130 | 85 | 103 |
- The model with size 6, is the most selected model for all 3
(s).
- But the portion of the selection is not ideal. The highest
percentage rougly 54% of
= 1. The lowest is only 34% of
= 4.
# Portion of the correct model selected
print_model_selected(portion = TRUE, correct_model = 6)| Portion of the correct Model Selected | |
|---|---|
| Sigma = 1 | 0.528 |
| Sigma = 2 | 0.524 |
| Sigma = 4 | 0.316 |
- The RMSE method does not always select the correct model. It is not a reliable method for model selection in current study.
We will discuss whether the level of noice would affect our selection result.
# Re-plot for easy reference
plot_RMSE()- RMSE
- When
becomes larger, the average RMSE value becomes larger, but the variability of RMSE value becomes lower. There is a positive relationship between
and average RMSE value.
- When
becomes larger, the performace of test set will start to decrease earlier. Test and train set will diverge more than that of smaller
.
- When
# Re-plot for easy reference
print_model_selected()| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|
| Sigma = 1 | 0 | 0 | 0 | 0 | 1 | 528 | 203 | 122 | 146 |
| Sigma = 2 | 0 | 0 | 30 | 12 | 14 | 524 | 196 | 110 | 114 |
| Sigma = 4 | 0 | 23 | 171 | 107 | 65 | 316 | 130 | 85 | 103 |
print_model_selected(portion = TRUE, correct_model = 6)| Portion of the correct Model Selected | |
|---|---|
| Sigma = 1 | 0.528 |
| Sigma = 2 | 0.524 |
| Sigma = 4 | 0.316 |
- Model selection
We can interpret as, with higher variance, there is higher probability other models will perform better, but generally over 1000 simulations, they perform poorly.
We can interpret that some other models would perform better due to high
variance. Therefore, we prefer a lower
value during model selection process.
In this simulation study we will investigate the power of the significance of regression test for simple linear regression.
Recall, we had defined the significance level,
, to
be the probability of a Type I error.
Similarly, the probability of a Type II error is often denoted using
;
however, this should not be confused with a regression parameter.
Power is the probability of rejecting the null hypothesis when the
null is not true, that is, the alternative is true and
is non-zero.
Essentially, power is the probability that a signal of a particular strength will be detected. Many things affect the power of a test. In this case, some of those are:
We’ll investigate the first three.
For simplicity, we will let , thus
is essentially controlling the amount of “signal.” We will then consider
different signals, noises, and sample sizes:
We will hold the significance level constant at .
# Seed
set.seed(birthday)# Global Parameter
beta_1 = seq(-2, 2, 0.1)
sigma = c(1, 2, 4)
n = c(10, 20, 30)
beta_0 = 0
alpha = 0.05
sim_cycle = 1000power = matrix(0, length(beta_1), length(sigma)) # power for each sigma
power_n = data.frame(n10 = power, n20 = power, n30 = power) # power for all nWe will simulate from the model
# simulate data
sim_slr = function(x, beta_1, sigma, beta_0 = 0){
n = length(x)
epsilon = rnorm(n, mean = 0, sd = sigma)
Y = beta_0 + beta_1 * x + epsilon
data.frame(y = Y, x = x)
}For each possible
and
combination, simulate from the true model at least
times. Each
time, perform the significance of the regression test.
The following sample code is the method to generate the predictor
values, x: values for different sample sizes.
x_values = seq(0, 5, length = n)# estimate power by different sample sizes
Estimate_power = function(size = 10){
x = seq(0, 5, length = size) # generate x by the sample size
for (i in 1:length(sigma)) { #sigma
for (j in 1:length(beta_1)) { #beta_1
cnt_rej = 0
for (k in 1:sim_cycle) { #sim_cycle = 1000
sim_data = sim_slr(x, beta_1[j], sigma[i])
ml = lm(y ~ x, sim_data)
p_value = summary(ml)$coef['x', 'Pr(>|t|)'] # retrieve p-value
if (p_value < alpha) {
cnt_rej = cnt_rej + 1 #count no. of rejection
}
}
power[j, i] <<- cnt_rej
cnt_rej = 0 #reset count after simulation cycle
} #beta_1
} #sigma
if (size == n[1]) {
power_n[, 1:3] <<- power
} else if (size == n[2]) {
power_n[, 4:6] <<- power
} else if (size == n[3]) {
power_n[, 7:9] <<- power
}
}In the functions below, before plotting the result, we will calculate the estimate of Power by this formula,
# initialize variables
initialize_var = function(cycle){
sim_cycle <<- cycle
power <<- matrix(0, length(beta_1), length(sigma))
power_n <<- data.frame(n10 = power, n20 = power, n30 = power) # variable for all n(s)
}
# plot power for each sigma, with different n
plot_power = function(Power_n, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE){
tmp_power = Power_n / sim_cycle #calculate estimate of Power
tmp_power = cbind(beta_1, tmp_power) #add beta_1 value
if (plot1 == TRUE){
plot(n10.1 ~ beta_1, tmp_power, lty = 3,
pch = 1, lwd = 2, col = 'darkorange', ylab = 'Power', xlab = expression(beta[1]),
main = expression('Strength of Power vs ' ~ beta[1] ~ ' | ' ~ sigma ~ ' = 1'))
lines(n20.1 ~ beta_1, tmp_power, col = 'blue', lty = 2, lwd = 2)
lines(n30.1 ~ beta_1, tmp_power, col = 'darkgreen', lty = 1, lwd = 2)
legend('bottomright', pch = c(1, NA, NA), lty = c(NA, 2, 1), lwd = c(2, 2, 2),
col = c('darkorange', 'blue', 'darkgreen'),
c('n = 10', 'n = 20', 'n = 30'))
}
if (plot2 == TRUE){
plot(n10.2 ~ beta_1, tmp_power,
pch = 1, lwd = 2, col = 'darkorange', ylab = 'Power', xlab = expression(beta[1]),
main = expression('Strength of Power vs ' ~ beta[1] ~ ' | ' ~ sigma ~ ' = 2'),
ylim = c(0, 1))
lines(n20.2 ~ beta_1, tmp_power, col = 'blue', lty = 2, lwd = 2)
lines(n30.2 ~ beta_1, tmp_power, col = 'darkgreen', lty = 1, lwd = 2)
legend('bottomright', pch = c(1, NA, NA), lty = c(NA, 2, 1), lwd = c(2, 2, 2),
col = c('darkorange', 'blue', 'darkgreen'),
c('n = 10', 'n = 20', 'n = 30'))
}
if (plot3 == TRUE){
plot(n10.3 ~ beta_1, tmp_power,
pch = 1, lwd = 2, col = 'darkorange', ylab = 'Power', xlab = expression(beta[1]),
main = expression('Strength of Power vs ' ~ beta[1] ~ ' | ' ~ sigma ~ ' = 4'),
ylim = c(0, 1))
lines(n20.3 ~ beta_1, tmp_power, col = 'blue', lty = 2, lwd = 2)
lines(n30.3 ~ beta_1, tmp_power, col = 'darkgreen', lty = 1, lwd = 2)
legend('bottomright', pch = c(1, NA, NA), lty = c(NA, 2, 1), lwd = c(2, 2, 2),
col = c('darkorange', 'blue', 'darkgreen'),
c('n = 10', 'n = 20', 'n = 30'))
}
}
# print average power per sigma
effect_sigma_avg = function(Power_n){
tmp_power = Power_n / sim_cycle
df = data.frame(
'Sample size' = c(10, 20, 30),
'Sigma 4' = c(mean(tmp_power[, 3]),
mean(tmp_power[, 6]),
mean(tmp_power[, 9])),
'Sigma 2' = c(mean(tmp_power[, 2]),
mean(tmp_power[, 5]),
mean(tmp_power[, 8])),
'Sigma 1' = c(mean(tmp_power[, 1]),
mean(tmp_power[, 4]),
mean(tmp_power[, 7]))
)
kable(df, "markdown")
}
# print portion of power > 0.95
effect_sigma_95 = function(Power_n){
tmp_power = Power_n / sim_cycle
df2 = data.frame(
'Sample size' = c(10, 20, 30),
'Sigma 4' = c(mean(tmp_power[, 3] > 0.95),
mean(tmp_power[, 6] > 0.95),
mean(tmp_power[, 9] > 0.95)),
'Sigma 2' = c(mean(tmp_power[, 2] > 0.95),
mean(tmp_power[, 5] > 0.95),
mean(tmp_power[, 8] > 0.95)),
'Sigma 1' = c(mean(tmp_power[, 1] > 0.95),
mean(tmp_power[, 4] > 0.95),
mean(tmp_power[, 7] > 0.95)))
kable(df2, "markdown")
}
# plot distribution of power of each sigma and sample size
effect_sigma_hist = function(Power_n){
tmp_power = Power_n / sim_cycle
par(mfrow = c(3, 3))
# n = 10
hist(tmp_power[, 1], breaks = 10, freq = TRUE, ylim = c(0, 30),
xlim = c(0, 1), xlab = 'n = 10', col = 'orange', border = 'brown',
main = 'Power @ Sigma = 1') #sigma1
hist(tmp_power[, 2], breaks = 10, freq = TRUE, ylim = c(0, 30),
xlim = c(0, 1), xlab = 'n = 10', col = 'orange', border = 'brown',
main = 'Power @ Sigma = 2', ylab = '') #sigma2
hist(tmp_power[, 3], breaks = 8, freq = TRUE, ylim = c(0, 30),
xlim = c(0, 1), xlab = 'n = 10', col = 'orange', border = 'brown',
main = 'Power @ Sigma = 4', ylab = '') #sigma3
hist(tmp_power[, 4], breaks = 10, freq = TRUE, ylim = c(0, 35),
xlim = c(0, 1), main = '', xlab = 'n = 20',
col='orange', border='brown') #sigma1
hist(tmp_power[, 5], breaks = 10, freq = TRUE, ylim = c(0, 35),
xlim = c(0, 1), main = '', xlab = 'n = 20',
col = 'orange', border = 'brown', ylab = '') #sigma2
hist(tmp_power[, 6], breaks = 8, freq = TRUE, ylim = c(0, 35),
xlim = c(0, 1), main = '', xlab = 'n = 20',
col = 'orange', border = 'brown', ylab = '') #sigma3
hist(tmp_power[, 7], breaks = 10, freq = TRUE, ylim = c(0, 35),
xlim = c(0, 1), xlab = 'n = 30', main = '',
col = 'orange', border = 'brown') #sigma1
hist(tmp_power[, 8], breaks = 10, freq = TRUE, ylim = c(0, 35),
xlim = c(0, 1), xlab = 'n = 30', main = '',
col = 'orange', border = 'brown', ylab = '') #sigma2
hist(tmp_power[, 9], breaks = 8, freq = TRUE, ylim = c(0, 35),
xlim = c(0, 1), xlab = 'n = 30', main = '',
col = 'orange', border = 'brown', ylab = '') #sigma3
}
# print average power per sample size, beta_1
effect_sample = function(Power_n, from, to){
tmp_power = Power_n / sim_cycle
power_size = cbind(beta_1, cbind(tmp_power$n10.2, tmp_power$n20.2, tmp_power$n30.2))
colnames(power_size) = c('beta_1', 'n = 10', 'n = 20', 'n = 30')
kable(power_size[from:to, ], "markdown")
}
# Plot effect of beta vs Power
effect_beta = function(Power_n){
tmp_power = Power_n / sim_cycle
tmp_power = cbind(beta_1, tmp_power)
plot(n30.1 ~ abs(beta_1), tmp_power, col = 'blue',
main = expression('Strength of Power vs ' ~ beta[1]),
pch = 1, xlab = expression(beta[1]), ylab = 'Power')
points(n30.2 ~ abs(beta_1), tmp_power, col = 'orange')
points(n30.3 ~ abs(beta_1), tmp_power, col = 'red')
legend('bottomright', c('sigma = 1', 'sigma = 2', 'sigma = 4'),
col = c('blue', 'orange', 'red'), pch = 1)
}
# Boxplot effect of sigma vs power
effec_sigma_plot = function(Power_n){
tmp_power = Power_n / sim_cycle
tmp_power = cbind(beta_1, tmp_power)
s1 = c(tmp_power$n10.1, tmp_power$n20.1, tmp_power$n30.1)
s2 = c(tmp_power$n10.2, tmp_power$n20.2, tmp_power$n30.2)
s3 = c(tmp_power$n10.3, tmp_power$n20.3, tmp_power$n30.3)
tmp_power2 = cbind(s1, s2, s3)
colnames(tmp_power2) = c(1, 2, 4)
boxplot(tmp_power2, col = 'orange', xlab = 'Power',
main = expression('Strength of Power vs ' ~ sigma),
ylab = c(expression(sigma)), border = 'brown', horizontal = TRUE)
}
# Boxplot effect of samplesize vs power
effect_sample_plot = function(Power_n){
tmp_power = Power_n / sim_cycle
tmp_power = cbind(beta_1, tmp_power)
n10 = c(tmp_power$n10.1, tmp_power$n10.2, tmp_power$n10.3)
n20 = c(tmp_power$n20.1, tmp_power$n20.2, tmp_power$n20.3)
n30 = c(tmp_power$n30.1, tmp_power$n20.2, tmp_power$n30.3)
tmp_power2 = cbind(n10, n20, n30)
colnames(tmp_power2) = c(10, 20, 30)
boxplot(tmp_power2, col = 'cadetblue2', xlab = 'Power',
main = expression('Strength of Power vs Sample size'),
ylab = 'Sample Size', border = 'brown', horizontal = TRUE
)
}initialize_var(sim_cycle) #set no. of simulation cycle 1000
Estimate_power(n[1]) # n = 10
Estimate_power(n[2]) # n = 20
Estimate_power(n[3]) # n = 30plot_power(power_n, TRUE, TRUE, TRUE)- When sigma becomes smaller, the average strength of Power is higher.
effect_sigma_avg(power_n) #Average power| Sample.size | Sigma.4 | Sigma.2 | Sigma.1 |
|---|---|---|---|
| 10 | 0.2523 | 0.5676 | 0.7843 |
| 20 | 0.4157 | 0.7043 | 0.8523 |
| 30 | 0.5199 | 0.7599 | 0.8790 |
- When sigma becomes smaller, the portion of Power > 0.95 becomes
higher. When sigma = 1 and n = 30, there are 78% of the simulations
would be very likely to reject
.
- When sigma is large and beta_1 becomes closer to zero, the power starts to decrease earlier than that with smaller sigma. We can observe the trend from the above plots.
- When sigma becomes larger, the maximom rejection number becomes lower, and the power starts to deterioate earlier when beta_1 is closer to zero.
# Portion of Power > 0.95
effect_sigma_95(power_n)| Sample.size | Sigma.4 | Sigma.2 | Sigma.1 |
|---|---|---|---|
| 10 | 0.0000 | 0.1707 | 0.6098 |
| 20 | 0.0000 | 0.4634 | 0.7317 |
| 30 | 0.0976 | 0.5366 | 0.7805 |
# Plot the effect of sigma vs Power
effec_sigma_plot(power_n)The histograms below are combination of sigma and sample size. They show similar findings as discussed above.
# Plot distribution of Power of each sigma and n
effect_sigma_hist(power_n)# Re-plot for easy reference
par(mfrow=c(1,1))
plot_power(power_n, plot1 = TRUE) #Sigma = 1effect_sample_plot(power_n) # Plot the effect of Sample size n- When n becames larger, the average stength of power is higher. The improvement is more obvious when for sigma equals to 2 or 4.
effect_sigma_avg(power_n) # Print average power per sample size, sigma| Sample.size | Sigma.4 | Sigma.2 | Sigma.1 |
|---|---|---|---|
| 10 | 0.2523 | 0.5676 | 0.7843 |
| 20 | 0.4157 | 0.7043 | 0.8523 |
| 30 | 0.5199 | 0.7599 | 0.8790 |
- When n becomes smaller with a larger sigma, say sigma = 4, it is
very unlikely to reject
.
- However, when n = 30, with larger sigma, say sigma = 4, it is
possible to reject
, but chance is very low.
- When n becomes larger, with smaller sigma, both average power and
chance to reject
increase significantly.
effect_sigma_95(power_n) # Print portion of power > 95% per sample size, sigma| Sample.size | Sigma.4 | Sigma.2 | Sigma.1 |
|---|---|---|---|
| 10 | 0.0000 | 0.1707 | 0.6098 |
| 20 | 0.0000 | 0.4634 | 0.7317 |
| 30 | 0.0976 | 0.5366 | 0.7805 |
par(mfrow=c(3,1))
# Visualize the effect of sample size vs power decrease
plot_power(power_n, plot1 = TRUE)
plot_power(power_n, plot2 = TRUE)
plot_power(power_n, plot3 = TRUE)- There is a relationship between the sample size and the moment power starts to decrease.
- The smaller the value of n, the earlier the power starts to decrease.
- When n is larger, say
,
, power will hold at 1 from
before it starts to decrease.
- When n is smaller, say
, power starts to decrease when
. When
, it starts even earlier.
- We can see the trend from both the above 3 plots, and the table below.
effect_sample(power_n, 1, 15) #print the first 15 rows where power starts to decrease in each n| beta_1 | n = 10 | n = 20 | n = 30 |
|---|---|---|---|
| -2.0 | 0.995 | 1.000 | 1.000 |
| -1.9 | 0.987 | 1.000 | 1.000 |
| -1.8 | 0.977 | 1.000 | 1.000 |
| -1.7 | 0.942 | 1.000 | 1.000 |
| -1.6 | 0.933 | 0.999 | 1.000 |
| -1.5 | 0.917 | 0.997 | 1.000 |
| -1.4 | 0.882 | 0.996 | 1.000 |
| -1.3 | 0.829 | 0.990 | 1.000 |
| -1.2 | 0.743 | 0.973 | 0.997 |
| -1.1 | 0.684 | 0.942 | 0.994 |
| -1.0 | 0.600 | 0.889 | 0.973 |
| -0.9 | 0.528 | 0.831 | 0.943 |
| -0.8 | 0.416 | 0.724 | 0.884 |
| -0.7 | 0.348 | 0.609 | 0.801 |
| -0.6 | 0.276 | 0.479 | 0.644 |
- When the absolute value of Beta_1 comes close to zero, the Power decrases, and could be critically close to zero for all n. The trend can be seen in the above 3 plots.
- The relationship forms a symmetric, inverted-bell shape. The actual
numbers associated with
, are in the below table
effect_sample(power_n, 14, 29) #print row 14 to 29, where Beta_1 is close to zero| beta_1 | n = 10 | n = 20 | n = 30 |
|---|---|---|---|
| -0.7 | 0.348 | 0.609 | 0.801 |
| -0.6 | 0.276 | 0.479 | 0.644 |
| -0.5 | 0.210 | 0.372 | 0.507 |
| -0.4 | 0.140 | 0.241 | 0.370 |
| -0.3 | 0.101 | 0.155 | 0.224 |
| -0.2 | 0.083 | 0.101 | 0.135 |
| -0.1 | 0.057 | 0.065 | 0.087 |
| 0.0 | 0.050 | 0.050 | 0.048 |
| 0.1 | 0.052 | 0.074 | 0.058 |
| 0.2 | 0.070 | 0.099 | 0.147 |
| 0.3 | 0.129 | 0.182 | 0.215 |
| 0.4 | 0.146 | 0.250 | 0.365 |
| 0.5 | 0.183 | 0.374 | 0.487 |
| 0.6 | 0.269 | 0.499 | 0.674 |
| 0.7 | 0.354 | 0.612 | 0.801 |
| 0.8 | 0.401 | 0.753 | 0.880 |
- The absoule value of the signal strength,
, is very likely to have a positive relatinoship with the strength of power.
effect_beta(power_n) #plot effect of Beta_1 vs power, for each sigma 

















