# ============================================================================= # TEMA 07 — CP1: Duración del Desempleo # ============================================================================= # Manual de Microeconometría — Carlos de Anta Puig # https://github.com/carlanta/MicroEconometrics Versión 1.0 — 2026 # # OBJETIVO: Analizar la duración del desempleo de 800 individuos. # KM por grupos, Cox con 4 covariables, proporcionalidad, Weibull. # INSTRUCCIONES: Session > Set Working Directory > To Source File Location # ============================================================================= pausa <- function(msg="\n>>> Pulsa ENTER para continuar...") { if (interactive()) readline(msg) else Sys.sleep(0.5) } pkgs <- c("survival") for (p in pkgs) if (!requireNamespace(p, quietly=TRUE)) install.packages(p, quiet=TRUE) suppressPackageStartupMessages({ library(survival) }) .get_script_dir <- function() { args <- commandArgs(trailingOnly = FALSE) for (a in args) { if (startsWith(a, "--file=")) return(dirname(normalizePath(substring(a, 8)))) } for (i in seq_len(sys.nframe())) { ofile <- tryCatch(sys.frame(i)$ofile, error = function(e) NULL) if (!is.null(ofile)) return(dirname(normalizePath(ofile))) } return(normalizePath(".")) } .sdir <- .get_script_dir() DATA_DIR <- normalizePath(file.path(.sdir, "..", "data"), mustWork=FALSE) OUTPUT_DIR <- normalizePath(file.path(.sdir, "..", "output"), mustWork=FALSE) if (!dir.exists(OUTPUT_DIR)) dir.create(OUTPUT_DIR, recursive=TRUE) cat("\n================================================================\n") cat(" CP01 — Duración del Desempleo\n") cat("================================================================\n\n") load(file.path(DATA_DIR, "T07_CP01_duracion_desempleo.RData")) cat("Dataset:", nrow(desemp_dur), "individuos\n") cat(sprintf(" Eventos: %d | Censurados: %d (%.0f%%)\n\n", sum(desemp_dur$evento), sum(1-desemp_dur$evento), 100*mean(1-desemp_dur$evento))) # --- DESCRIPTIVOS --- cat("--- ESTADÍSTICOS DESCRIPTIVOS ---\n\n") vars <- c("meses_desempleo","edad","educacion","prestacion","sexo") labs <- c("Meses desempleo","Edad","Educación","Prestación","Sexo (mujer)") cat(sprintf(" %-16s %8s %8s %8s %8s\n", "Variable","Media","D.E.","Mín.","Máx.")) cat(sprintf(" %s\n", paste(rep("-",54), collapse=""))) for (i in seq_along(vars)) { x <- desemp_dur[[vars[i]]] cat(sprintf(" %-16s %8.2f %8.2f %8.2f %8.2f\n", labs[i], mean(x), sd(x), min(x), max(x))) } pausa() # --- EDA GRÁFICO --- cat("\n--- ANÁLISIS EXPLORATORIO ---\n\n") png(file.path(OUTPUT_DIR, "T07_CP01_eda.png"), width=1400, height=700, res=150) par(mfrow=c(1,2), mar=c(4.5,4.5,2.5,0.5), cex.main=0.88) hist(desemp_dur$meses_desempleo, breaks=30, col=gray(0.70), border="white", main="Distribución duración", xlab="Meses", ylab="Frecuencia") surv_obj <- Surv(desemp_dur$meses_desempleo, desemp_dur$evento) km_prest <- survfit(surv_obj ~ desemp_dur$prestacion) plot(km_prest, lwd=2, lty=c(1,2), col=gray(c(0.2,0.5)), xlab="Meses", ylab="S(t)", main="KM por prestación", conf.int=FALSE) legend("topright", c("Sin prestación","Con prestación"), lty=c(1,2), lwd=2, col=gray(c(0.2,0.5)), cex=0.8, bty="n") abline(h=0.5, lty=3, col=gray(0.6)) dev.off() cat(" Gráfico: output/T07_CP01_eda.png\n") cat(" Los que cobran prestación tienen duración mayor (curva más alta).\n") pausa() # --- LOG-RANK --- cat("\n--- TEST DE LOG-RANK: Prestación ---\n\n") lr <- survdiff(surv_obj ~ desemp_dur$prestacion) p_lr <- pchisq(lr$chisq, df=1, lower.tail=FALSE) cat(sprintf(" Chi² = %.2f | p = %.4f\n", lr$chisq, p_lr)) if (p_lr < 0.05) cat(" ✓ Diferencia significativa.\n") else cat(" ✗ No significativa.\n") pausa() # --- MODELO DE COX --- cat("\n--- MODELO DE COX ---\n\n") cox <- coxph(surv_obj ~ edad + educacion + prestacion + sexo, data=desemp_dur) s <- summary(cox) cat(sprintf(" %-16s %9s %9s %8s %10s\n", "Variable","HR","Coef.","EE","p-valor")) cat(sprintf(" %s\n", paste(rep("-",58), collapse=""))) for (i in seq_len(nrow(s$coefficients))) { nm <- rownames(s$coefficients)[i] sig <- ifelse(s$coef[i,5]<0.001,"***",ifelse(s$coef[i,5]<0.01,"**", ifelse(s$coef[i,5]<0.05,"*"," "))) cat(sprintf(" %-16s %9.4f %9.4f %8.4f %10.6f %s\n", nm, s$coef[i,2], s$coef[i,1], s$coef[i,3], s$coef[i,5], sig)) } cat(sprintf("\n Concordancia: %.3f\n", s$concordance[1])) cat("\n INTERPRETACIÓN:\n") cat(sprintf(" - Educación: HR=%.3f → cada año de educación %s el riesgo\n", s$coef["educacion",2], ifelse(s$coef["educacion",2]>1,"AUMENTA","REDUCE"))) cat(sprintf(" de salir del desempleo en un %.1f%%.\n", abs(100*(s$coef["educacion",2]-1)))) cat(sprintf(" - Prestación: HR=%.3f → cobrar prestación %s el riesgo\n", s$coef["prestacion",2], ifelse(s$coef["prestacion",2]>1,"AUMENTA","REDUCE"))) cat(sprintf(" de salir en un %.1f%% (efecto desincentivador).\n", abs(100*(s$coef["prestacion",2]-1)))) pausa() # --- PROPORCIONALIDAD --- cat("\n--- TEST DE PROPORCIONALIDAD (SCHOENFELD) ---\n\n") zph <- cox.zph(cox) for (i in seq_len(nrow(zph$table))) { lab <- rownames(zph$table)[i] p <- zph$table[i,"p"] cat(sprintf(" %-16s p = %.4f %s\n", lab, p, ifelse(p>0.05,"✓",""))) } pausa() # --- WEIBULL --- cat("\n--- MODELO WEIBULL ---\n\n") dur_pos <- pmax(desemp_dur$meses_desempleo, 0.01) surv_pos <- Surv(dur_pos, desemp_dur$evento) wei <- survreg(surv_pos ~ edad + educacion + prestacion + sexo, data=desemp_dur, dist="weibull") shape <- 1/wei$scale cat(sprintf(" Shape (alpha): %.3f → hazard %s\n", shape, ifelse(shape>1,"CRECIENTE","DECRECIENTE"))) cat(sprintf(" AIC: %.1f\n\n", AIC(wei))) ct_w <- summary(wei)$table for (i in 2:(nrow(ct_w)-1)) { cat(sprintf(" %-16s Coef: %8.4f | p: %.4f\n", rownames(ct_w)[i], ct_w[i,1], ct_w[i,4])) } pausa() cat("\n================================================================\n") cat(" RESUMEN CP01 — Duración del Desempleo\n") cat("================================================================\n\n") cat(sprintf(" Observaciones: %d | Censurados: %.0f%%\n", nrow(desemp_dur), 100*mean(1-desemp_dur$evento))) cat(sprintf(" Log-rank (prestación): p = %.4f\n", p_lr)) cat(sprintf(" Cox: concordancia = %.3f\n", s$concordance[1])) cat(sprintf(" Weibull shape = %.3f (%s)\n", shape, ifelse(shape>1,"creciente","decreciente"))) cat(" La prestación alarga la duración; la educación la acorta.\n") cat("================================================================\n")