# \donttest{
## ------------------------------------------------------------------
## Construct sample panel dataset (banks00_07)
## ------------------------------------------------------------------
# Download data from the link in "Source"
banks00_07 <- read.delim("2b_QLH.txt")
# rename 'entity' to 'id'
colnames(banks00_07)[colnames(banks00_07) == "entity"] <- "id"
# keep only years 2000–2007
banks00_07 <- banks00_07[
banks00_07$year >= 2000 & banks00_07$year <= 2007, ]
# restrict sample to interquartile range of total assets
q1q3 <- quantile(banks00_07$TA, probs = c(.25, .75))
banks00_07 <- banks00_07[
banks00_07$TA >= q1q3[1] & banks00_07$TA <= q1q3[2], ]
# generate required variables
banks00_07$TC <- banks00_07$TOC
banks00_07$ER <- banks00_07$Z / banks00_07$TA # Equity ratio
banks00_07$LA <- banks00_07$Y2 / banks00_07$TA # Loans-to-assets ratio
# keep only needed variables
keep.vars <- c("id", "year", "Ti", "TC", "Y1", "Y2", "W1","W2",
"ER", "LA", "TA", "LLP")
banks00_07 <- banks00_07[, colnames(banks00_07) %in% keep.vars]
# number of periods per id
t0 <- as.vector( by(banks00_07$id, banks00_07$id,
FUN = function(qq) length(qq)) )
banks00_07$Ti <- rep(t0, times = t0)
# keep if Ti > 4
banks00_07 <- banks00_07[banks00_07$Ti > 4, ]
# complete observations only
banks00_07 <- banks00_07[complete.cases(banks00_07), ]
# sample 500 banks at random
set.seed(816376586)
id_names <- unique(banks00_07$id)
ids2choose <- sample(id_names, 500)
banks00_07 <- banks00_07[banks00_07$id %in% ids2choose, ]
# recompute Ti
t0 <- as.vector( by(banks00_07$id, banks00_07$id,
FUN = function(qq) length(qq)) )
banks00_07$Ti <- rep(t0, times = t0)
banks00_07 <- banks00_07[banks00_07$Ti > 4, ]
# sort
banks00_07 <- banks00_07[order(banks00_07$id, banks00_07$year), ]
banks07 <- banks00_07[banks00_07$year == 2007, ]
# }
Run the code above in your browser using DataLab