Last chance! 50% off unlimited learning
Sale ends in
gIndex
computes the total $g$-index for a model based on
the vector of linear predictors, and the partial $g$-index for
each predictor in a model. The latter is computed by summing all the
terms involving each variable, weighted by their regression
coefficients, then computing Gini's mean difference on this sum. For
example, a regression model having age and sex and age*sex on the
right hand side, with corresponding regression coefficients $b_{1},
b_{2}, b_{3}$ will have the $g$-index for age
computed from Gini's mean
difference on the product of age $\times (b_{1} + b_{3}w)$ where
$w$ is an indicator set to one for observations with sex not equal
to the reference value. When there are nonlinear terms associated
with a predictor, these terms will also be combined. A print
method is defined, and there is a plot
method for displaying
$g$-indexes using a dot chart.
A basic function GiniMD
computes Gini's mean difference on a
numeric vector. This index is defined as the mean absolute difference
between any two distinct elements of a vector. For a Bernoulli
(binary) variable with proportion of ones equal to $p$ and sample
size $n$, Gini's mean difference is
$2\frac{n}{n-1}p(1-p)$. For a
trinomial variable (e.g., predicted values for a 3-level categorical
predictor using two dummy variables) having (predicted)
values $A, B, C$ with corresponding proportions $a, b, c$,
Gini's mean difference is
$2\frac{n}{n-1}[ab|A-B|+ac|A-C|+bc|B-C|]$
gIndex(object, partials=TRUE, type=c('ccterms', 'cterms', 'terms'),
lplabel=if(length(object$scale) && is.character(object$scale))
object$scale[1] else 'X*Beta',
fun, funlabel=if(missing(fun)) character(0) else
deparse(substitute(fun)),
postfun=if(length(object$scale)==2) exp else NULL,
postlabel=if(length(postfun))
ifelse(missing(postfun),
if((length(object$scale) > 1) &&
is.character(object$scale)) object$scale[2] else
'Anti-log',
deparse(substitute(postfun))) else character(0),
...)## S3 method for class 'gIndex':
print(x, digits=4, abbrev=FALSE,
vnames=c("names","labels"), ...)
## S3 method for class 'gIndex':
plot(x, what=c('pre', 'post'),
xlab=NULL, pch=16, rm.totals=FALSE,
sort=c('descending', 'ascending', 'none'), ...)
GiniMd(x, na.rm=FALSE)
rms
fitting functionFALSE
to suppress computation of partial
$g$s'ccterms'
which causes partial discrimination
indexes to be computed after maximally combining all related main
effects and interactions. The is usually the only way that makes
sense when considering partial linear predictors."X*Beta"
or "log odds"
/gtrans
is added to the attributes of the object
resulting from gIndex
.fun
, otherwise
taken from the function name itselfexp
(anti-log), which is the default for certain models such as the
logistic and Cox modelspostfun
gIndex
, passed to predict.rms
.
Ignored for print
. Passed to dotchart2
for plot
.gIndex
(for print
or plot
)
or a numeric vector (for GiniMd
)digits
decimal placeTRUE
to abbreviate labels if
vname="labels"
"labels"
to print predictor labels instead
of names"post"
to plot the transformed $g$-index
if there is one (e.g., ratio scale)TRUE
to remove the total $g$-index
when plottingTRUE
if you suspect there may be NA
s
in x
; these will then be removed. Otherwise an error will
result.gIndex
returns a matrix of class "gIndex"
with auxiliary
information stored as attributes, such as variable labels.
GiniMd
returns a scalar.predict.rms
set.seed(1)
n <- 40
x <- 1:n
w <- factor(sample(c('a','b'), n, TRUE))
u <- factor(sample(c('A','B'), n, TRUE))
y <- .01*x + .2*(w=='b') + .3*(u=='B') + .2*(w=='b' & u=='B') + rnorm(n)/5
dd <- datadist(x,w,u); options(datadist='dd')
f <- ols(y ~ x*w*u, x=TRUE, y=TRUE)
f
anova(f)
z <- list()
for(type in c('terms','cterms','ccterms'))
{
zc <- predict(f, type=type)
cat('type:', type, '<n>')
print(zc)
z[[type]] <- zc</n>
# Test GiniMd against a brute-force solution
gmd <- function(x)
{
n <- length(x)
sum(outer(x, x, function(a, b) abs(a - b)))/n/(n-1)
}
zc <- z$cterms
gmd(zc[, 1])
GiniMd(zc[, 1])
GiniMd(zc[, 2])
GiniMd(zc[, 3])
GiniMd(f$linear.predictors)
g <- gIndex(f)
g
g['Total',]
gIndex(f, partials=FALSE)
gIndex(f, type='cterms')
gIndex(f, type='terms')
z <- c(rep(0,17), rep(1,6))
n <- length(z)
GiniMd(z)
2*mean(z)*(1-mean(z))*n/(n-1)
a <- 12; b <- 13; c <- 7; n <- a + b + c
A <- -.123; B <- -.707; C <- 0.523
xx <- c(rep(A, a), rep(B, b), rep(C, c))
GiniMd(xx)
2*(a*b*abs(A-B) + a*c*abs(A-C) + b*c*abs(B-C))/n/(n-1)
y <- y > .8
f <- lrm(y ~ x * w * u, x=TRUE, y=TRUE)
gIndex(f, fun=plogis, funlabel='Prob[y=1]')
# Manual calculation of combined main effect + interaction effort of
# sex in a 2x2 design with treatments A B, sexes F M,
# model -.1 + .3*(treat=='B') + .5*(sex=='M') + .4*(treat=='B' & sex=='M')
set.seed(1)
X <- expand.grid(treat=c('A','B'), sex=c('F', 'M'))
a <- 3; b <- 7; c <- 13; d <- 5
X <- rbind(X[rep(1, a),], X[rep(2, b),], X[rep(3, c),], X[rep(4, d),])
y <- with(X, -.1 + .3*(treat=='B') + .5*(sex=='M') + .4*(treat=='B' & sex=='M'))
f <- ols(y ~ treat*sex, data=X, x=TRUE)
gIndex(f, type='cterms')
k <- coef(f)
b1 <- k[2]; b2 <- k[3]; b3 <- k[4]
n <- nrow(X)
( (a+b)*c*abs(b2) + (a+b)*d*abs(b2+b3) + c*d*abs(b3))/(n*(n-1)/2 )
# Manual calculation for combined age effect in a model with sex,
# age, and age*sex interaction
a <- 13; b <- 7
sex <- c(rep('female',a), rep('male',b))
agef <- round(runif(a, 20, 30))
agem <- round(runif(b, 20, 40))
age <- c(agef, agem)
y <- (sex=='male') + age/10 - (sex=='male')*age/20
f <- ols(y ~ sex*age, x=TRUE)
f
gIndex(f, type='cterms')
k <- coef(f)
b1 <- k[2]; b2 <- k[3]; b3 <- k[4]
n <- a + b
sp <- function(w, z=w) sum(outer(w, z, function(u, v) abs(u-v)))
( abs(b2)*sp(agef) + abs(b2+b3)*sp(agem) + 2*sp(b2*agef, (b2+b3)*agem) ) / (n*(n-1))
( abs(b2)*GiniMd(agef)*a*(a-1) + abs(b2+b3)*GiniMd(agem)*b*(b-1) +
2*sp(b2*agef, (b2+b3)*agem) ) / (n*(n-1))
# Compare partial and total g-indexes over many random fits
plot(NA, NA, xlim=c(0,3), ylim=c(0,3), xlab='Global',
ylab='x1 (black) x2 (red) x3 (green) x4 (blue)')
abline(a=0, b=1, col=gray(.9))
big <- integer(3)
n <- 50 # try with n=7 - see lots of exceptions esp. for interacting var
for(i in 1:100) {
x1 <- runif(n)
x2 <- runif(n)
x3 <- runif(n)
x4 <- runif(n)
y <- x1 + x2 + x3 + x4 + 2*runif(n)
f <- ols(y ~ x1*x2+x3+x4, x=TRUE)
# f <- ols(y ~ x1+x2+x3+x4, x=TRUE) # also try this
w <- gIndex(f)[,1]
gt <- w['Total']
points(gt, w['x1, x2'])
points(gt, w['x3'], col='green')
points(gt, w['x4'], col='blue')
big[1] <- big[1] + (w['x1, x2'] > gt)
big[2] <- big[2] + (w['x3'] > gt)
big[3] <- big[3] + (w['x4'] > gt)
}
print(big)
options(datadist=NULL)
}
<keyword>predictive accuracy</keyword>
<keyword>robust</keyword>
<keyword>univar</keyword>
Run the code above in your browser using DataLab