data(thermo)
## overall oxidation state of proteins exclusively localized
## to cytoplasm of S. cerevisiae with/without abundance weighting
y <- yeastgfp("cytoplasm")
aa <- more.aa(y$protein, "Sce")
aaavg <- aasum(aa, average=TRUE)
ZC(protein.formula(aaavg))
# the average composition weighted by abundance
waaavg <- aasum(aa, abundance=y$abundance, average=TRUE)
ZC(protein.formula(waaavg))
## read.expr using one of the provided data files,
## from Ishihama et al., 2008
file <- system.file("extdata/abundance/ISR+08.csv.xz", package="CHNOSZ")
# read all protein names and abundances in ID and emPAI columns
# (emPAI - exponentially modified protein abundance index)
expr <- read.expr(file, "ID", "emPAI")
# scatter plot of average oxidation state and emPAI
aa <- more.aa(expr$protein, "Eco")
pf <- protein.formula(aa)
zc <- ZC(pf)
# note we specify ylim here that excludes some high-emPAI values
plot(zc, expr$abundance, xlab=expr.property("ZC"), ylim=c(0, 90), ylab="emPAI",
main="Proteins in E. coli cytosol
Abundance vs oxidation state of carbon")
legend("topleft", pch=1, legend="Ishihama et al., 2008")
# what if we just want kinases?
# "description" is the name of the column where we search for "kinase"
expr.kinase <- read.expr(file, "ID", "emPAI", list(description="kinase"))
## read.expr using a different data file,
## from Anderson and Anderson, 2003
file <- system.file("extdata/abundance/AA03.csv", package="CHNOSZ")
# look for proteins described as "Complement"
read.expr(file, "name", "log10(pg/ml)", list(description="Complement"))
## speciation diagram for ER.to.Golgi proteins (COPII coat
## proteins) as a function of logfO2, after Dick, 2009
# add old parameters for [Met] sidechain to database
add.obigt()
y <- yeastgfp("ER.to.Golgi")
# don't use those with NA abundance
ina <- is.na(y$abundance)
# get the amino acid compositions of the proteins
aa <- more.aa(y$protein[!ina], "Sce")
# add proteins to thermo$protein
ip <- add.protein(aa)
# use logarithms of activities of proteins such
# that total activity of residues is unity
pl <- protein.length(ip)
logact <- unitize(rep(1, length(ip)), pl)
# load the proteins
basis("CHNOS+")
a <- affinity(O2=c(-80, -73), iprotein=ip, loga.protein=logact)
# make a speciation diagram
e <- equilibrate(a, normalize=TRUE)
diagram(e, ylim=c(-4.9, -2.9))
# where we are closest to experimental log activity
logfO2 <- rep(-78, length(ip))
abline(v=logfO2[1], lty=3)
# scale experimental abundances such that
# total activity of residues is unity
logact.expt <- unitize(log10(y$abundance[!ina]), pl)
# plot experimental log activity
points(logfO2, logact.expt, pch=16)
text(logfO2+0.5, logact.expt, y$protein[!ina])
# add title
title(main=paste("ER.to.Golgi; points - relative abundances",
"from YeastGFP. Figure after Dick, 2009",sep="\n"))
# restore default thermodynamic database
data(thermo)
#############################
## examples using stress() ##
#############################
## predominance fields for overall protein compositions
## induced and repressed in an/aerobic carbon limitation
## (experiments of Tai et al., 2005)
# the activities of ammonium and sulfate
# are similar to the non-growth-limiting concentrations
# used by Boer et al., 2003
basis(c("glucose", "H2O", "NH4+", "hydrogen", "SO4-2", "H+"),
c(-1, 0, -1.3, 999, -1.4, -7))
# the names of the experiments in thermo$stress
expt <- c("Clim.aerobic.down", "Clim.aerobic.up",
"Clim.anaerobic.down", "Clim.anaerobic.up")
# here we use abundance to indicate that the protein
# compositions should be summed together in equal amounts
for(i in 1:length(expt)) {
p <- stress(expt[i], "Sce")
aa <- more.aa(p$protein, "Sce")
aa <- aasum(aa, average=TRUE, protein=expt[i])
add.protein(aa)
}
species(expt, "Sce")
a <- affinity(C6H12O6=c(-30, 0), H2=c(-20, 0))
d <- diagram(a, normalize=TRUE, fill=NULL)
title(main=paste("Relative stabilities of proteins observed in\n",
"an/aerobic carbon limitation in yeast"))
# the equilibrium distribution favors the proteins upregulated
# by carbon limitation at low chemical potentials of C6H12O6 ...
stopifnot(c(d$predominant[1,1], d$predominant[1,128])==grep("up", expt))
# ... and favors proteins downregulated by aerobic conditions
# at high hydrogen fugacities
stopifnot(c(d$predominant[128, 128], d$predominant[128, 1])==grep("down", expt))
#################################
## examples using yeastgfp() ##
#################################
## Oxygen fugacity - activity of H2O predominance
## diagrams for proteologs for 23 YeastGFP localizations
# need old properties of [Met] to reproduce this example
data(thermo)
add.obigt()
# arranged by decreasing metastability:
# order of this list of locations is based on the
# (dis)appearance of species on the current set of diagrams
names <- c("vacuole", "early.Golgi", "ER", "lipid.particle",
"cell.periphery", "ambiguous", "Golgi", "mitochondrion",
"bud", "actin", "cytoplasm", "late.Golgi",
"endosome", "nucleus", "vacuolar.membrane", "punctate.composite",
"peroxisome", "ER.to.Golgi", "nucleolus", "spindle.pole",
"nuclear.periphery", "bud.neck", "microtubule")
nloc <- c(4, 5, 3, 4, 4, 3)
# define the system
basis("CHNOS+")
# get protein names and abundances in each location
gfp <- yeastgfp(names)
# get amino acid compositions of proteins
aa <- more.aa(gfp$protein, "Sce")
# calculate average amino acid compositions
for(i in 1:length(names)) {
avgaa <- aasum(aa[[i]], gfp$abundance[[i]], average=TRUE, protein=names[i])
add.protein(avgaa)
}
species(names, "Sce")
a <- affinity(H2O=c(-5, 0, 256), O2=c(-80, -66, 256))
# setup the plot
layout(matrix(c(1, 1,2:7), byrow=TRUE, nrow=4), heights=c(0.7, 3, 3, 3))
par(mar=c(0, 0, 0, 0))
plot.new()
text(0.5, 0.5, paste("Subcellular proteins of S. cerevisiae,",
"after Dick, 2009\n"), cex=1.5)
text(0.5, 0.2, describe.basis(ibasis=c(1, 3, 4, 6), oneline=TRUE), cex=1.5)
opar <- par(mar=c(3, 4, 1, 1), xpd=TRUE)
fill <- heat.colors(length(names))
inames <- 1:length(names)
for(i in 1:length(nloc)) {
diagram(a, normalize=TRUE, names=names[inames], groups=as.list(inames),
fill=fill[inames], cex.axis=0.75, cex.names=1)
label.plot(letters[i])
title(main=paste(length(inames), "locations"))
# take out the stable species
inames <- inames[-(1:nloc[i])]
}
# return to plot defaults
layout(matrix(1))
par(opar)
# reset thermodynamic database
data(thermo)
Run the code above in your browser using DataLab