-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasicr2.txt
More file actions
72 lines (60 loc) · 1.91 KB
/
basicr2.txt
File metadata and controls
72 lines (60 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# 📥 LECTURA DE DATOS
df <- read.csv("archivo.csv", sep = ",", header = TRUE)
# Si es Excel
library(readxl)
df <- read_excel("archivo.xlsx")
# 📊 ANÁLISIS UNIVARIADO
summary(df)
library(DataExplorer)
plot_intro(df)
plot_histogram(df)
# 🔁 ANÁLISIS BIVARIADO
# Numérica vs Categórica
boxplot(Income ~ Gender, data = df)
# Numérica vs Numérica
plot(df$Age, df$Income)
# Categórica vs Categórica
table(df$Gender, df$Default)
# 🛠️ IMPUTACIÓN DE DATOS
df$Income[is.na(df$Income)] <- mean(df$Income, na.rm = TRUE)
df$Income[is.na(df$Income)] <- median(df$Income, na.rm = TRUE)
library(mice)
df_imp <- mice(df, m=1, method='pmm')
df <- complete(df_imp)
# 🔗 MATRIZ DE CORRELACIÓN
library(corrplot)
cor_matrix <- cor(df[sapply(df, is.numeric)], use = "complete.obs")
corrplot(cor_matrix, method = "color")
# 🔂 PARTICIÓN DE DATOS
set.seed(123)
library(caret)
particion <- createDataPartition(df$target, p = 0.7, list = FALSE)
train <- df[particion, ]
test <- df[-particion, ]
# 🔄 VALIDACIÓN CRUZADA (K-FOLD)
ctrl <- trainControl(method = "cv", number = 5)
modelo <- train(target ~ ., data = train, method = "glm", trControl = ctrl)
# 🌳 ÁRBOLES DE DECISIÓN
library(rpart)
arbol <- rpart(target ~ ., data = train, method = "class")
library(rpart.plot)
rpart.plot(arbol)
# 📉 REGRESIÓN LOGÍSTICA
modelo <- glm(target ~ ., data = train, family = "binomial")
summary(modelo)
# ✅ MATRIZ DE CONFUSIÓN
library(caret)
pred <- predict(modelo, test, type = "response")
pred_clas <- ifelse(pred > 0.5, 1, 0)
confusionMatrix(as.factor(pred_clas), as.factor(test$target))
# 📈 CURVA ROC Y AUC
library(pROC)
roc_obj <- roc(test$target, pred)
plot(roc_obj)
auc(roc_obj)
# 📐 WOE E IV (para scoring)
library(scorecard)
bins <- woebin(df, y = "target")
df_woe <- woebin_ply(df, bins)
# 📦 EXPORTAR DATOS
write.csv(df, "salida.csv", row.names = FALSE)