Learn R Programming

BIFIEsurvey (version 1.5-0)

BIFIE.data.transform: Data Transformation for BIFIEdata Objects

Description

Computes a data transformation for BIFIEdata objects.

Usage

BIFIE.data.transform( bifieobj , transform.formula , varnames.new=NULL )

Arguments

bifieobj
Object of class BIFIEdata
transform.formula
Rformula object for data transformation.
varnames.new
Optional vector of names for new defined variables.

Value

  • An object of class BIFIEdata. Additional values are
  • varnames.addedAdded variables in data transformation
  • varsindex.addedIndices of added variables

Examples

Run this code
library(miceadds)

#############################################################################
# EXAMPLE 1: Data transformations for TIMSS data
#############################################################################

data(data.timss2)
data(data.timssrep)
# create BIFIEdata object
bifieobj1 <- BIFIE.data( data.timss2 , wgt=data.timss2[[1]]$TOTWGT , 
            wgtrep=data.timssrep[,-1] )
# create BIFIEdata object in compact way (cdata=TRUE)            
bifieobj2 <- BIFIE.data( data.timss2 , wgt=data.timss2[[1]]$TOTWGT , 
            wgtrep=data.timssrep[,-1] , cdata=TRUE)

#****************************            
#*** Transformation 1: Squared and cubic book variable
transform.formula <- ~ 0 + I( books^2 ) + I( books^3 )
# as.character(transform.formula)
bifieobj <- BIFIE.data.transform( bifieobj1 , transform.formula =transform.formula )
bifieobj$variables
# rename added variables
bifieobj$varnames[ bifieobj$varsindex.added ] <- c("books_sq" , "books_cub")

# check descriptive statistics
res1 <- BIFIE.univar( bifieobj , vars = c("books_sq" , "books_cub" ) )
summary(res1)

#****************************	
#*** Transformation 2: Create dummy variables for variable book
transform.formula <- ~ 0 + as.factor(books)
bifieobj <- BIFIE.data.transform( bifieobj , transform.formula=transform.formula )
##   Included 5 variables: as.factor(books)1 as.factor(books)2 as.factor(books)3 
##        as.factor(books)4 as.factor(books)5
bifieobj$varnames[ bifieobj$varsindex.added ] <- paste0("books_D" , 1:5)

#****************************
#*** Transformation 3: Discretized mathematics score
hi3a <- BIFIE.hist( bifieobj , vars = "ASMMAT" )
plot(hi3a)
transform.formula <- ~ 0 + I( as.numeric(cut( ASMMAT , breaks=seq(200,800,100) )) )
bifieobj <- BIFIE.data.transform( bifieobj , transform.formula =transform.formula ,
                  varnames.new = "ASMMAT_discret" )
hi3b <- BIFIE.hist( bifieobj , vars = "ASMMAT_discret" , breaks = 1:7 )
plot(hi3b)
# check frequencies
fr3b <- BIFIE.freq( bifieobj , vars = "ASMMAT_discret" , se=FALSE )
summary(fr3b)

#****************************
#*** Transformation 4: include standardization variables for book variable

# start with testing the transformation function on a single dataset
dat1 <- bifieobj$dat1
weighted.mean( dat1[,"books"] , dat1[,"TOTWGT"] , na.rm=TRUE)
sqrt( Hmisc::wtd.var( dat1[,"books"] , dat1[,"TOTWGT"] , na.rm=TRUE) )
# z standardization
transform.formula <- ~ 0 + I( ( books - weighted.mean( books , TOTWGT , na.rm=TRUE) ) /
                                sqrt( Hmisc::wtd.var( books , TOTWGT , na.rm=TRUE) )  )
bifieobj <- BIFIE.data.transform( bifieobj , transform.formula =transform.formula ,
                  varnames.new = "z_books" )
# standardize variable books with M=500 and SD=100
transform.formula <- ~ 0 + I( 500 + 100*( books - weighted.mean( books , TOTWGT , na.rm=TRUE) ) /
                                sqrt( Hmisc::wtd.var( books , TOTWGT , na.rm=TRUE) )  )
bifieobj <- BIFIE.data.transform( bifieobj , transform.formula =transform.formula ,
                  varnames.new = "z500_books" )
                  
# standardize variable books with respect to M and SD of ALL imputed datasets
res <- BIFIE.univar( bifieobj , vars = "books" )
summary(res)
##       var  Nweight Ncases     M M_SE M_fmi M_VarMI M_VarRep    SD SD_SE SD_fmi
##   1 books 76588.72   4554 2.945 0.04     0       0    0.002 1.146 0.015      0
M <- round(res$output$mean1,5)
SD <- round(res$output$sd1,5)
transform.formula <- paste0( " ~ 0 + I( ( books - " ,  M  , " ) / " , SD  , ")"  )
##   > transform.formula
##   [1] " ~ 0 + I( ( books - 2.94496 ) / 1.14609)"
bifieobj <- BIFIE.data.transform( bifieobj, transform.formula =as.formula(transform.formula),
                 varnames.new = "zall_books" )                  
                  
# check statistics
res4 <- BIFIE.univar( bifieobj , vars = c("z_books" , "z500_books" , "zall_books") )
summary(res4)

#****************************
#*** Transformation 5: include rank transformation for variable ASMMAT

# calculate percentage ranks using wtd.rank function from Hmisc package
dat1 <- bifieobj$dat1
100 * Hmisc::wtd.rank( dat1[,"ASMMAT"] , w=dat1[,"TOTWGT"] ) / sum( dat1[,"TOTWGT"] )
# define an auxiliary function for calculating percentage ranks
wtd.percrank <- function( x , w ){
        100 * Hmisc::wtd.rank( x , w , na.rm=TRUE ) / sum( w , na.rm=TRUE )
                    }
wtd.percrank( dat1[,"ASMMAT"] , dat1[,"TOTWGT"] )
# define transformation formula
transform.formula <- ~ 0 + I( wtd.percrank( ASMMAT , TOTWGT ) )
# add ranks to BIFIEdata object
bifieobj <- BIFIE.data.transform( bifieobj , transform.formula =transform.formula ,
                  varnames.new = "ASMMAT_rk" )
# check statistic
res5 <- BIFIE.univar( bifieobj , vars = c("ASMMAT_rk" ) )
summary(res5)

#****************************
#*** Transformation 6: recode variable books

library(car)
# recode variable books according to "1,2=0 , 3,4=1 , 5=2"
dat1 <- bifieobj$dat1
# use Recode function from car package
car::Recode( dat1[,"books"], "1:2='0'; c(3,4)='1';5='2'")
# define transformation formula
transform.formula <- ~ 0 + I( car::Recode( books, "1:2='0'; c(3,4)='1';5='2'") )
bifieobj <- BIFIE.data.transform( bifieobj , transform.formula =transform.formula ,
                  varnames.new = "book_rec" )
res6 <- BIFIE.freq( bifieobj , vars = c("book_rec" ) )
summary(res6)

#****************************
#*** Transformation 7: include some variables aggregated to the school level

dat1 <- as.data.frame(bifieobj$dat1)
# at first, create school ID in the dataset by transforming the student ID
dat1$idschool <- as.numeric(substring( dat1$IDSTUD , 1 , 5 ))
transform.formula <- ~ 0 +I( as.numeric( substring( IDSTUD  , 1 , 5 ) ) )
bifieobj <- BIFIE.data.transform( bifieobj , transform.formula = transform.formula  ,
               varnames.new = "idschool" )
                          
# define group mean for variable ASMMAT
dat1 <- as.data.frame(bifieobj$dat1)
gm <- miceadds::fast.groupmean( data=dat1$ASMMAT , group=dat1$idschool )
gm[ match( dat1$idschool , gm[,1] ) , 2 ]   # school means replicated for each student

# define utility function for group mean
add.groupmean <- function( x , group ){
    gm <- miceadds::fast.groupmean( data=x , group=group )
    gm[ match( group , gm[,1] ) , 2 ]  
                }
# add school mean ASMMAT
bifieobj <- BIFIE.data.transform( bifieobj , 
                transform.formula = ~ 0 + I( add.groupmean( ASMMAT , idschool ) ) , 
                varnames.new = "M_ASMMAT" )
# add school mean books
bifieobj <- BIFIE.data.transform( bifieobj , 
                transform.formula = ~ 0 + I( add.groupmean( books , idschool ) ) , 
                varnames.new = "M_books" )

#****************************
#*** Transformation 8: include fitted values and residuals from a linear model
# create new BIFIEdata object
data(data.timss1)
bifieobj3 <- BIFIE.data( data.timss1 , wgt=data.timss1[[1]]$TOTWGT , 
            wgtrep=data.timssrep[,-1] )

# specify transformation
transform.formula <- ~ 0 + I( fitted( lm( ASMMAT ~ migrant + female ) ) ) +
                             I( residuals( lm( ASMMAT ~ migrant + female ) ) )
  # Note that lm omits cases in regression by listwise deletion.                             
# add fitted values and residual to BIFIEdata object                             
bifieobj <- BIFIE.data.transform( bifieobj3 , transform.formula =transform.formula )
bifieobj$varnames[ bifieobj$varsindex.added ] <- c("math_fitted1" , "math_resid1")   

#****************************
#*** Transformation 9: Including principal component scores in BIFIEdata object

# define auxiliary function for extracting PCA scores
BIFIE.princomp <- function( formula , Ncomp ){
          X <- princomp( formula , cor=TRUE)
          Xp <- X$scores[ , 1:Ncomp ] 
          return(Xp)
                }
# define transformation formula                
transform.formula <- ~ 0 +
        I( BIFIE.princomp( ~ migrant + female + books + lang + ASMMAT , 3 ) )
# apply transformation        
bifieobj <- BIFIE.data.transform( bifieobj3 , transform.formula =transform.formula )
bifieobj$varnames[ bifieobj$varsindex.added ] <- c("pca_sc1" , "pca_sc2","pca_sc3")
# check descriptive statistics
res9 <- BIFIE.univar( bifieobj, vars="pca_sc1", se=FALSE)
summary(res9)
res9$output$mean1M

# The transformation formula can also be conveniently generated by string operations
vars <- c("migrant" , "female" , "books" , "lang" )
transform.formula2 <- as.formula( paste0( "~ 0 + I ( BIFIE.princomp( ~ " , 
       paste0( vars , collapse="+" )  ,  " , 3 ) )") )
  ##   > transform.formula2
  ##   ~0 + I(BIFIE.princomp(~migrant + female + books + lang, 3))
  
#****************************
#*** Transformation 10: Overwriting variables books and migrant 
bifieobj4 <-  BIFIE.data.transform( bifieobj3 , 
	        transform.formula = ~ 0 + I( 1*(books >= 1 ) ) + I(2*migrant) ,
                varnames.new= c("books","migrant") )  
summary(bifieobj4)

Run the code above in your browser using DataLab