-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathhw2ques4.R
35 lines (27 loc) · 1.21 KB
/
hw2ques4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#loading credit data
credit.xy <- read.table("credit_data.txt", header = TRUE)
dim(credit.xy)
set.seed(123)
#selecting 80% of data as training data
train <- sample(1:885, 0.8*885)
# refactoring credit data frame as a matrix and mapping y (response variables)
#values from 0/1 to -1/1
x <- as.matrix(credit.xy[train, 3:15])
y <- 2*credit.xy$Fail[train]-1
#fitting regularized logistic regression to training data and plotting cross
#validation curve
credit.glmnet <- cv.glmnet(x, y, family = "binomial")
plot(credit.glmnet)
#Error decreases between lambda min and lambda max and starts
#flattening out after about 10 predictors have non-zero coefficients.
#passing the best lambda value to coefficient function
beta <- coef(credit.glmnet, lambda = credit.glmnet$lambda.1se)
print(beta)
#Three predictors, OperProfit, FiscalLag and InFinan are
#equal to zero.
#predicting on test set, creating confusion matrix and calculating accuracy
x.test <- as.matrix(credit.xy[-train, c(3:15)])
y.test <- credit.xy$Fail[-train]
yHat <- as.numeric(predict(credit.glmnet, x.test, type = "class", lambda = credit.glmnet$lambda.1se))
conf_mat <- table(y.test, yHat)
accuracy <- sum(diag(conf_mat))/sum(conf_mat)