Last chance! 50% off unlimited learning
Sale ends in
Collection of functions for the Yeo-Johnson transformation
(Yeo & Johnson, 2000) and the corresponding distribution family of scaled
probit
).
The Box-Cox transformation (bc
; Sakia, 1992)
can be applied for variables with positive values.
# Yeo-Johnson transformation and its inverse transformation
yj_trafo(y, lambda, use_rcpp=TRUE, probit=FALSE)
yj_antitrafo(y, lambda, probit=FALSE)#---- scaled t distribution with Yeo-Johnson transformation
dyjt_scaled(x, location=0, shape=1, lambda=1, df=Inf, log=FALSE, probit=FALSE)
ryjt_scaled(n, location=0, shape=1, lambda=1, df=Inf, probit=FALSE)
fit_yjt_scaled(x, df=Inf, par_init=NULL, lambda_fixed=NULL, weights=NULL, probit=FALSE)
# S3 method for fit_yjt_scaled
coef(object, ...)
# S3 method for fit_yjt_scaled
logLik(object, ...)
# S3 method for fit_yjt_scaled
summary(object, digits=4, file=NULL, ...)
# S3 method for fit_yjt_scaled
vcov(object, ...)
# Box-Cox transformation and its inverse transformation
bc_trafo(y, lambda)
bc_antitrafo(y, lambda)
#---- scaled t distribution with Box-Cox transformation
dbct_scaled(x, location=0, shape=1, lambda=1, df=Inf, log=FALSE, check_zero=TRUE)
rbct_scaled(n, location=0, shape=1, lambda=1, df=Inf)
fit_bct_scaled(x, df=Inf, par_init=NULL, lambda_fixed=NULL, weights=NULL)
# S3 method for fit_bct_scaled
coef(object, ...)
# S3 method for fit_bct_scaled
logLik(object, ...)
# S3 method for fit_bct_scaled
summary(object, digits=4, file=NULL, ...)
# S3 method for fit_bct_scaled
vcov(object, ...)
#---- scaled t distribution
dt_scaled(x, location=0, shape=1, df=Inf, log=FALSE)
rt_scaled(n, location=0, shape=1, df=Inf)
fit_t_scaled(x, df=Inf, par_init=NULL, weights=NULL)
# S3 method for fit_t_scaled
coef(object, ...)
# S3 method for fit_t_scaled
logLik(object, ...)
# S3 method for fit_t_scaled
summary(object, digits=4, file=NULL, ...)
# S3 method for fit_t_scaled
vcov(object, ...)
Vector or an object of fitted distribution depending on the called function
Numeric vector
Transformation parameter
Logical indicating whether Rcpp package should be used
Logical indicating whether probit transformation should be
applied for bounded variables on
Numeric vector
Location parameter of (transformed) scaled
Shape parameter of (transformed) scaled
Degrees of freedom of (transformed) scaled
Logical indicating whether logarithm of the density should be computed
Logical indicating whether check for inadmissible values should be conducted
Number of observations to be simulated
Optional vector of initial parameters
Optional value for fixed
Optional vector of sampling weights
Object of class fit_yjt_scaled
or fit_t_scaled
Number of digits used for rounding in summary
File name for the summary
to be sunk into
Further arguments to be passed
Let df
degrees of freedom.
For a bounded variable
For a Yeo-Johnson normally distributed variable, a normally distributed variable results in
case of
Sakia, S. M. (1992). The Box-Cox transformation technique: A review. The Statistician, 41(2), 169-178. tools:::Rd_expr_doi("10.2307/2348250")
Yeo, I.-K., & Johnson, R. (2000). A new family of power transformations to improve normality or symmetry. Biometrika, 87(4), 954-959. tools:::Rd_expr_doi("10.1093/biomet/87.4.954")
See yjt_regression
for fitting a regression model in which
the response variable is distributed according to the scaled
See car::yjPower
for fitting the Yeo-Johnson
transformation in the car package. See car::bcPower
for the
Box-Cox transformation.
The scaled metRology::dt.scaled
(metRology package).
See stats::dt
for the
See the fitdistrplus package or the general
stats4::mle
function
for fitting several distributions in R.
#############################################################################
# EXAMPLE 1: Transforming values according to Yeo-Johnson transformation
#############################################################################
# vector of y values
y <- seq(-3,3, len=100)
# non-negative lambda values
plot( y, mdmb::yj_trafo( y, lambda=1 ), type="l", ylim=8*c(-1,1),
ylab=expression( g[lambda] (y) ) )
lines( y, mdmb::yj_trafo( y, lambda=2 ), lty=2 )
lines( y, mdmb::yj_trafo( y, lambda=.5 ), lty=3 )
lines( y, mdmb::yj_trafo( y, lambda=0 ), lty=4 )
# non-positive lambda values
plot( y, mdmb::yj_trafo( y, lambda=-1 ), type="l", ylim=8*c(-1,1),
ylab=expression(g[lambda] (y) ) )
lines( y, mdmb::yj_trafo( y, lambda=-2 ), lty=2 )
lines( y, mdmb::yj_trafo( y, lambda=-.5 ), lty=3 )
lines( y, mdmb::yj_trafo( y, lambda=0 ), lty=4 )
if (FALSE) {
#############################################################################
# EXAMPLE 2: Density of scaled t distribution
#############################################################################
# define location and scale parameter
m0 <- 0.3
sig <- 1.5
#-- compare density of scaled t distribution with large degrees of freedom
# with normal distribution
y1 <- mdmb::dt_scaled( y, location=m0, shape=sig, df=100 )
y2 <- stats::dnorm( y, mean=m0, sd=sig )
max(abs(y1-y2))
#############################################################################
# EXAMPLE 3: Simulating and fitting the scaled t distribution
#############################################################################
#-- simulate data with 10 degrees of freedom
set.seed(987)
df0 <- 10 # define degrees of freedom
x <- mdmb::rt_scaled( n=1E4, location=m0, shape=sig, df=df0 )
#** fit data with df=10 degrees of freedom
fit1 <- mdmb::fit_t_scaled(x=x, df=df0 )
#** compare with fit from normal distribution
fit2 <- mdmb::fit_t_scaled(x=x, df=Inf ) # df=Inf is the default
#-- some comparisons
coef(fit1)
summary(fit1)
logLik(fit1)
AIC(fit1)
AIC(fit2)
#############################################################################
# EXAMPLE 4: Simulation and fitting of scaled t distribution with
# Yeo-Johnson transformation
#############################################################################
# define parameters of transformed scaled t distribution
m0 <- .5
sig <- 1.5
lam <- .5
# evaluate density
x <- seq( -5, 5, len=100 )
y <- mdmb::dyjt_scaled( x, location=m0, shape=sig, lambda=lam )
graphics::plot( x, y, type="l")
# transform original values
mdmb::yj_trafo( y=x, lambda=lam )
#** simulate data
set.seed(987)
x <- mdmb::ryjt_scaled(n=3000, location=m0, shape=sig, lambda=lam )
graphics::hist(x, breaks=30)
#*** Model 1: Fit data with lambda to be estimated
fit1 <- mdmb::fit_yjt_scaled(x=x)
summary(fit1)
coef(fit1)
#*** Model 2: Fit data with lambda fixed to simulated lambda
fit2 <- mdmb::fit_yjt_scaled(x=x, lambda_fixed=lam)
summary(fit2)
coef(fit2)
#*** Model 3: Fit data with lambda fixed to 1
fit3 <- mdmb::fit_yjt_scaled(x=x, lambda_fixed=1)
#-- compare log-likelihood values
logLik(fit1)
logLik(fit2)
logLik(fit3)
#############################################################################
# EXAMPLE 5: Approximating the chi square distribution
# with yjt and bct distribution
#############################################################################
#-- simulate data
set.seed(987)
n <- 3000
df0 <- 5
x <- stats::rchisq( n=n, df=df0 )
#-- plot data
graphics::hist(x, breaks=30)
#-- fit data with yjt distribution
fit1 <- mdmb::fit_yjt_scaled(x)
summary(fit1)
c1 <- coef(fit1)
#-- fit data with bct distribution
fit2 <- mdmb::fit_bct_scaled(x)
summary(fit2)
c2 <- coef(fit2)
# compare log-likelihood values
logLik(fit1)
logLik(fit2)
#-- plot chi square distribution and approximating yjt distribution
y <- seq( .01, 3*df0, len=100 )
dy <- stats::dchisq( y, df=df0 )
graphics::plot( y, dy, type="l", ylim=c(0, max(dy) )*1.1 )
# approximation with scaled t distribution and Yeo-Johnson transformation
graphics::lines( y, mdmb::dyjt_scaled(y, location=c1[1], shape=c1[2], lambda=c1[3]),
lty=2)
# approximation with scaled t distribution and Box-Cox transformation
graphocs::lines( y, mdmb::dbct_scaled(y, location=c2[1], shape=c2[2], lambda=c2[3]),
lty=3)
# appoximating normal distribution
graphics::lines( y, stats::dnorm( y, mean=df0, sd=sqrt(2*df0) ), lty=4)
graphics::legend( .6*max(y), .9*max(dy), c("chi square", "yjt", "bct", "norm"),
lty=1:4)
#############################################################################
# EXAMPLE 6: Bounded variable on (0,1) with Probit Yeo-Johnson transformation
#############################################################################
set.seed(876)
n <- 1000
x <- stats::rnorm(n)
y <- stats::pnorm( 1*x + stats::rnorm(n, sd=sqrt(.5) ) )
dat <- data.frame( y=y, x=x )
#*** fit Probit Yeo-Johnson distribution
mod1 <- mdmb::fit_yjt_scaled(x=y, probit=TRUE)
summary(mod1)
#*** estimation using regression model
mod2 <- mdmb::yjt_regression( y ~ x, data=dat, probit=TRUE )
summary(mod2)
}
Run the code above in your browser using DataLab