# ============================================================================= # TEMA 07 — CP3: Tiempo hasta Impago Crediticio # ============================================================================= # Manual de Microeconometría — Carlos de Anta Puig # https://github.com/carlanta/MicroEconometrics Versión 1.0 — 2026 # # OBJETIVO: Modelizar el riesgo de impago de 1000 créditos (credit scoring). # Alta censura (~67%). Ratio deuda/ingreso como principal predictor. # INSTRUCCIONES: Session > Set Working Directory > To Source File Location # ============================================================================= pausa <- function(msg="\n>>> Pulsa ENTER para continuar...") { if (interactive()) readline(msg) else Sys.sleep(0.5) } if (!requireNamespace("survival", quietly=TRUE)) install.packages("survival", quiet=TRUE) suppressPackageStartupMessages(library(survival)) .get_script_dir <- function() { args <- commandArgs(trailingOnly = FALSE) for (a in args) { if (startsWith(a, "--file=")) return(dirname(normalizePath(substring(a, 8)))) } for (i in seq_len(sys.nframe())) { ofile <- tryCatch(sys.frame(i)$ofile, error = function(e) NULL) if (!is.null(ofile)) return(dirname(normalizePath(ofile))) } return(normalizePath(".")) } .sdir <- .get_script_dir() DATA_DIR <- normalizePath(file.path(.sdir, "..", "data"), mustWork=FALSE) OUTPUT_DIR <- normalizePath(file.path(.sdir, "..", "output"), mustWork=FALSE) if (!dir.exists(OUTPUT_DIR)) dir.create(OUTPUT_DIR, recursive=TRUE) cat("\n================================================================\n") cat(" CP03 — Tiempo hasta Impago Crediticio\n") cat("================================================================\n\n") load(file.path(DATA_DIR, "T07_CP03_impago_crediticio.RData")) cat("Dataset:", nrow(cred_dur), "créditos\n") cat(sprintf(" Impagos: %d | Sin impago (censurados): %d (%.0f%%)\n\n", sum(cred_dur$impago), sum(1-cred_dur$impago), 100*mean(1-cred_dur$impago))) # --- DESCRIPTIVOS --- cat("--- ESTADÍSTICOS DESCRIPTIVOS ---\n\n") vars <- c("meses_hasta_evento","importe","tipo_interes","ratio_deuda_ingreso", "historial_credito","hipoteca") labs <- c("Meses hasta evento","Importe","Tipo interés","Ratio deuda/ing.", "Historial cred.","Hipoteca") cat(sprintf(" %-18s %10s %10s %10s %10s\n", "Variable","Media","D.E.","Mín.","Máx.")) cat(sprintf(" %s\n", paste(rep("-",62), collapse=""))) for (i in seq_along(vars)) { x <- cred_dur[[vars[i]]] cat(sprintf(" %-18s %10.2f %10.2f %10.2f %10.2f\n", labs[i], mean(x), sd(x), min(x), max(x))) } pausa() # --- EDA --- png(file.path(OUTPUT_DIR, "T07_CP03_eda.png"), width=1400, height=700, res=150) par(mfrow=c(1,2), mar=c(4.5,4.5,2.5,0.5), cex.main=0.88) hist(cred_dur$meses_hasta_evento, breaks=30, col=gray(0.70), border="white", main="Distribución duración", xlab="Meses", ylab="Frecuencia") surv_obj <- Surv(cred_dur$meses_hasta_evento, cred_dur$impago) # KM por ratio alto/bajo cred_dur$ratio_alto <- ifelse(cred_dur$ratio_deuda_ingreso > median(cred_dur$ratio_deuda_ingreso), 1, 0) km_ratio <- survfit(surv_obj ~ cred_dur$ratio_alto) plot(km_ratio, lwd=2, lty=c(1,2), col=gray(c(0.2,0.5)), xlab="Meses", ylab="S(t)", main="KM: ratio deuda/ingreso", conf.int=FALSE) legend("bottomleft", c("Ratio bajo","Ratio alto"), lty=c(1,2), lwd=2, col=gray(c(0.2,0.5)), cex=0.8, bty="n") dev.off() cat("\n Gráfico: output/T07_CP03_eda.png\n") cat(" Los créditos con ratio alto impagan más rápido.\n") pausa() # --- COX --- cat("\n--- MODELO DE COX ---\n\n") cox <- coxph(surv_obj ~ importe + tipo_interes + ratio_deuda_ingreso + historial_credito + hipoteca, data=cred_dur) s <- summary(cox) cat(sprintf(" %-20s %9s %9s %8s %10s\n", "Variable","HR","Coef.","EE","p-valor")) cat(sprintf(" %s\n", paste(rep("-",62), collapse=""))) for (i in seq_len(nrow(s$coef))) { nm <- rownames(s$coef)[i] sig <- ifelse(s$coef[i,5]<0.001,"***",ifelse(s$coef[i,5]<0.01,"**", ifelse(s$coef[i,5]<0.05,"*"," "))) cat(sprintf(" %-20s %9.4f %9.4f %8.4f %10.6f %s\n", nm, s$coef[i,2], s$coef[i,1], s$coef[i,3], s$coef[i,5], sig)) } cat(sprintf("\n Concordancia: %.3f\n", s$concordance[1])) cat(sprintf("\n Ratio deuda/ingreso: HR=%.3f → el principal predictor de impago.\n", s$coef["ratio_deuda_ingreso",2])) cat(sprintf(" Historial: HR=%.3f → mejor historial REDUCE el riesgo.\n", s$coef["historial_credito",2])) pausa() # --- PROPORCIONALIDAD --- cat("\n--- PROPORCIONALIDAD (SCHOENFELD) ---\n\n") zph <- cox.zph(cox) for (i in seq_len(nrow(zph$table))) { cat(sprintf(" %-20s p = %.4f %s\n", rownames(zph$table)[i], zph$table[i,"p"], ifelse(zph$table[i,"p"]>0.05,"✓",""))) } pausa() # --- WEIBULL --- cat("\n--- WEIBULL ---\n\n") dur_pos <- pmax(cred_dur$meses_hasta_evento, 0.01) wei <- survreg(Surv(dur_pos, cred_dur$impago) ~ importe + tipo_interes + ratio_deuda_ingreso + historial_credito + hipoteca, data=cred_dur, dist="weibull") cat(sprintf(" Shape: %.3f | AIC: %.1f\n", 1/wei$scale, AIC(wei))) pausa() cat("\n================================================================\n") cat(" RESUMEN CP03 — Impago Crediticio\n") cat("================================================================\n") cat(sprintf(" Concordancia Cox: %.3f\n", s$concordance[1])) cat(" Alto ratio deuda/ingreso = principal factor de riesgo.\n") cat(" Buen historial crediticio = factor protector.\n") cat(" Modelo con alta censura (67%) — típico de credit scoring.\n") cat("================================================================\n")