c2s(x, sep = " ")
s2c(x, sep = NULL, keep.sep = TRUE, n = NULL, move.sep = FALSE)
GHS(species = NULL, DG = NA, DH = NA, S = NA, T = thermo$opt$Tr)
can.be.numeric(x)
expand.formula(elements, makeup)
ZC(x)
eos.args(eos, property = NULL, T = NULL, P = NULL)
TP.args(T = NULL, P = NULL)
state.args(state = NULL)
protein.length(protein)
aminoacids(seq, nchar=1)
nucleicacids(seq, type = "DNA", comp = NULL, comp2 = NULL)
MP90.cp(T, protein)
dPdTtr(x)
Ttr(x, P = 1, dPdT = NULL)
thermo.plot.new(xlim, ylim, xlab, ylab, cex = par("cex"),
mar = NULL, lwd = par("lwd"), side = c(1,2,3,4),
mgp = c(1.2, 0.3, 0), cex.axis = par("cex"), col = par("col"),
yline = NULL, axs = "i", do.box = TRUE, ticks = NULL)
thermo.postscript(file, family = "Helvetica", width = 8,
height = 6, horizontal = FALSE)
thermo.axis(lab = "x-axis", side = 1, line = 1.5, cex = par("cex"),
lwd = par("lwd"), T = NULL, col = par("col"))
axis.label(x, opt = NULL, do.state = TRUE, oldstyle = FALSE,
do.upper = FALSE, mol = "mol")
describe(x = NULL, T = NULL, P = NULL, use.name = FALSE,
as.reaction = NULL, digits = 1)
basis.comp(basis)
label.plot(x, xfrac = 0.95, yfrac = 0.9, cex = 1, paren = TRUE,
adj = 1)
water.lines(xaxis = "pH", yaxis = "Eh", T = 298.15, P = "Psat",
which = c("oxidation","reduction"), logaH2O = 0, lty = 2,
col = par("fg"), xpoints = NULL)
element(compound, property = c("mass","entropy"))
mod.obigt(species, ..., missingvalues = NA)
which.pmax(elts, na.rm = FALSE, pmin = FALSE)
nonideal(species, proptable, IS, T)
change(name, ...)
add.protein(file="protein.csv")
add.obigt(file="obigt.csv")
which.balance(species)
lsub(x, y)
lsum(x, y)
pprod(x, y)
psum(x)
mylapply(X, FUN, ...)
spearman(a, b)
unitize(logact = NULL, length = NULL, logact.tot = 0)
mtitle(main, ...)
grep.file(file, x = "", y = NULL, ignore.case = TRUE, startswith = ">",
lines = NULL, grep = "grep")
read.fasta(file, i = NULL, ret = "count", lines = NULL, ihead = NULL)
examples(do.png = FALSE, long = TRUE)
longex(which)
s2c
, c2s
, axis.label
), or object to be tested (can.be.numeric
), or numeric index of a mineral phase (dPdTtr
, Ttr
), or character object representing c2s
, s2c
).s2c
).s2c
.GHS
), or names of species to modify or add to the thermodynamic database (mod.obigt
), or names or indices of species for which to calculate nonidTP.args
, lines.water
, describe
, MP90.cp
, nonideal
, GHS
).TP.args
).eos.args
, element
).GHS
).expand.formula
).makeup
.protein.length
).aminoacids
) or base sequence of a nucleic acid (nucleicacids
).aminoacids
).nucleicads
).Ttr
).thermo.plot.new
).plot.label
.thermo.plot.new
,thermo.axis
).side
(retained for back-compatibility).thermo.postscript
, add.protein
, add.obigt
, grep.file
, read.fasta
).axis.label
).describe
).basis.comp
).water.lines
).water.lines
), or which examples to run (longex
).water.lines
, thermo.plot.new
, thermo.axis
).label.plot
).thermo.axis
).element
).mod.obigt
), or arguments to change
that are passed to mod.obigt
or mod.buffer
, or additional arguments for lappa
which.pmax
).nonideal
).change
).lapply
or mclapply
.lapply
or mclapply
.lsub
, lsum
), or numeric (pprod
), or character, term to exclude in searching sequence headers (grep.file
).grep.file
)grep
command.read.fasta
).spearman
).spearman
).unitize
).s2c
, c2s
and axis.label
return character values. Numeric returns are made by GHS
, protein.length
, dPdTtr
, Ttr
, ZC
, MP90.cp
, spearman
, mod.obigt
and grep.file
. A list is return by eos.args
and TP.args
, and character is returned by state.args
. can.be.numeric
returns logical. aminoacids
and nucleicacids
return character or dataframe. lsub
, lsum
and pprod
return lists. read.fasta
returns a list of sequences or lines (for ret
equal to seq or fas, respectively), or a data frame with amino acid compositions of proteins (for ret
equal to count) with columns corresponding to those in thermo$protein
. Functions with no (or unspecified) returns are thermo.plot.new
, thermo.postscript
, label.plot
and water.lines
.c2s
joins the elements of a character object into a character object of length $1$ (a string), and s2c
splits a string into elements of a character object of length $n+1$, where $n$ stands for the number of separators in the string. sep
gives the separator to insert between successive items (in c2s
) or the separator(s) to find in a string (in s2c
). The default value of sep
is a space (" ") in c2s
. The default value for sep
is NULL in s2c
, indicating a separator at every position of x
(the result in this case has length equal to nchar(x)
). Argument keep.sep
if TRUE
(the default) instructs s2c
to keep the separating values in the output, and move.sep
if TRUE
instructs s2c
to append the kept separators to the preceding items instead of prepending them to the following ones. The maximum length of the object returned by s2c
is determined by n
; the default value of NULL
indicates an unrestricted length. The *.args
functions are used to normalize user-input arguments, which are case-insensitive. eos.args
returns a list with elements named props
, for all the properties available for the specified equations-of-state, prop
for the lower-case version of property
, and Prop
, for the upper-case (of first letter) version of property
. eos.args
produces an error if one of the property
s is not in the list of available properties. (See water
and subcrt
for the available properties for different species.) TP.args
forces T
and P
to equal length. This function also looks for the keyword Psat in the value of P
and substitutes calculated values of the saturation vapor pressure (see water
). state.args
makes its argument lowercase, then transforms a, c, g, and l to aq, gas, cr, and liq, respectively.
GHS
computes one of the standard molal Gibbs energy or enthalpy of formation from the elements (DG
, DH
) or entropy (S
) at 298.15 K and 1 bar from values of the other two. If the species
argument is present, it is used to calculate the entropies of the elements (Se
) using element
, otherwise Se
is set to zero. The equation in effect can be written as ${\Delta}G^{\circ}={\Delta}H^{\circ}-T{\Delta}S^{\circ}$, where ${\Delta}S^{\circ}=S-S_e$ and $T$ denotes the reference temperature of 298.15 K. If two of DG
, DH
, and S
are provided, the value of the third is returned. If three are provided, the value of DG
in the arguments is ignored and the calculated value of DG
is returned. If none of DG
, DH
or S
are provided, the value of Se
is returned. If only one of the values is provided, an error results. Units of cal mol$^{-1}$ (DG
, DH
) and cal K$^{-1}$ mol$^{-1}$ (S
) are assumed. It T
is provided, it use used instead of the reference temperature.
can.be.numeric
returns a value of TRUE or FALSE for each element of x
.
expand.formula
converts a 1-column dataframe representing the elemental composition of a compound (see makeup
) to a numeric vector, each value of which is the coefficient of the elements
given in the argument. If any of these is not present in the makeup dataframe, its coefficient is set to zero. A non-zero coefficient of an element in the makeup dataframe does not appear in the output if that element is not one of elements
.
ZC
returns the nominal carbon oxidation state for the chemical formula represented by x
. (For discussion of nominal carbon oxidation state, see Hendrickson et al., 1970; Buvet, 1983.) If carbon is not present in the formula the result is NaN
.
The argument of protein.length
, if it is character, refers to the name of protein(s) (e.g., LYSC_CHICK) for which to calculate the length (number of amino acid residues). If the argument is numeric, it refers to the species index of a protein (value in thermo$species$ispecies
). For a positive numeric argument to work, the protein information must have been previously loaded into the species list (using info
). If the numeric value is negative, it refers to the rownumber of the protein in thermo$protein
.
aminoacids
takes a character argument containing a protein sequence and counts the number of occurrences of each type of amino acid. The output is a dataframe with 20 columns, each corresponding to an amino acid, ordered in the same way as thermo$protein
. If the first argument is NULL, the function returns the one-letter abbreviations (for nchar
equal to 1) or the three-letter ones (if nchar
is equal to 3) or the names of the amino acids (if nchar
is NA) of twenty amino acids in the order used in thermo$protein
.
nucleicacids
takes a DNA or RNA sequence and counts the numbers of bases of each type. Whether the sequence is DNA or RNA is specified by type
. Setting comp
to DNA or RNA tells the function to compute the base composition of that type of complement of the sequence. If comp2
is specified, another complement is taken. The two rounds of complementing can be used in a single function call e.g. to go from a sequence on DNA minus strand (given in seq
) to the plus strand (with comp="DNA"
) and then from the DNA plus strand to RNA (with comp2="RNA"
). The value returned by the function is a dataframe of base composition, which can be passed back to the function to obtain the overall chemical formula for the bases.
MP90.cp
takes T
(one or more temperatures in $^{\circ}$C) and protein
(name of protein) and returns the heat capacity of the unfolded protein using values of heat capacities of the residues taken from Makhatadze and Privalov, 1990. Those authors provided values of heat capacity at six points between 5 and 125 $^{\circ}$C; this function interpolates (using splinefun
) values at other temperatures.
dPdTtr
returns values of $(dP/dT)_{Ttr}$, where $Ttr$ represents the transition temperature, of the phase transition at the high-$T$ stability limit of the x
th species in thermo$obigt
(no checking is done to verify that the species represents in fact one phase of a mineral with phase transitions). dPdTtr
takes account of the Clapeyron equation, $(dP/dT)_{Ttr}$=${\Delta}S/{\Delta}V$, where ${\Delta}S$ and ${\Delta}V$ represent the changes in entropy and volume of phase transition, and are calculated using subcrt
at Ttr from the standard molal entropies and volumes of the two phases involved. Using values of dPdT
calculated using dPdTtr
or supplied in the arguments, Ttr
returns as a function of P
values of the upper transition temperature of the mineral phase represented by the x
th species.
thermo.plot.new
sets parameters for a new plot, creates a new plot using plot.new
, and adds axes and major and minor ticks to the plot. Plot parameters (see par
) including cex
, mar
, lwd
, mgp
and axs
can be given, as well as a numeric vector in ticks
identifying which sides of the plot receive tick marks. yline
, if present, denotes the margin line (default par('mgp')[1]
) where the y-axis name is plotted. thermo.postscript
calls postscript
with some custom parameters.
axis.label
returns an expression
to be used for plotting an axis label, which may be the symbol for a thermodynamic properties, chemical activity or fugacity, or one of T, P, Eh, pH, pe or logK. An expression for chemical activity or fugacity is returned if the first argument is the name of one of the basis species (e.g., O2). The expression in this case includes italic and subscripted symbols, unless oldstyle
is TRUE
, when labels with a simpler format (e.g. O2 (log f)) are returned. The default value of NULL of opt
means to use the state this basis species is in, or if this basis species is not present to use the value in thermo$opt$state
. Likewise, if x
is T or P the units of temperature or pressure are determined using nuts
(which also refers to thermo$opt
). do.upper
, if TRUE
, tells the function to print the label using uppercase letters. Labels for properties can be generated by using e.g. DGf or DG0r as arguments. mol
(default: mol) refers to the denominator of the units (default: molality); this can be changed to represent e.g. specific units, by setting mol
to g. opt
when generating labels for properties indicates the prefix to place on the units.
water.lines
plots lines representing the oxidation and reduction stability limits of water on yaxis
-xaxis
diagrams, where yaxis
can be Eh or O2, and xaxis
can be pH or T. which
controls which lines (oxidation, reduction, or both (the default)) are drawn, logaH2O
(default 0) denotes the logarithm of the activity of water, lty
(default 2) the line type, col
(default par('fg')
, the foreground color), and xpoints
an optional list of points on the x axis to which to restrict the plotting (default of NULL
refers to the axis limits).
label.plot
adds identifying text to the plot; the value given for x
is made into a label like $(a)$. The location of the label is controlled by xfrac
and yfrac
(the fractional locations along the respective axes) as well as adj
(the text alignment parameter, see text
).
thermo.axis
is used to add axes and axis labels to plots, with some default style settings (rotation of numeric labels) and conversions between oxidation-reduction scales (called by thermo.plot.new
). It also adds minor tick marks.
describe
generates a textual representation of the temperature, pressure, and logarithms of activities of the basis species, given in the arguments by x
(i.e. the dataframe in thermo$basis
) and T
and P
(given in Kelvin and bar and converted by the function to those specified by nuts
). The digits
argument tells to what decimal place the logarithms of activities should be rounded. If any of the supplied arguments is NULL its specification is not printed in the output; T and P, if present, are prepended to the basis summary. If x
instead is a dataframe representing a chemical reaction (as output by subcrt
and identified by having a column named coeff
), the function returns a textual summary of that reaction (i.e., showing reactants on the left, an equal sign, and products on the right; reactants and products are preceded by their reaction coefficient unless it is $1$). However, if only reactants or products can be found, or as.reaction
is set to FALSE, the names or formulas of the species are printed with their coefficients and interceding plus or minus signs, as appropriate. Whether the names or formulas are printed is controlled by use.name
(FALSE
by default), a logical vector the length of which corresponds to the number of rows in x
(but is expanded to the right length if needed).
element
returns a dataframe of the mass and entropy of one or more elements or formulas given in compound
. The property
can be mass and/or entropy.
mod.obigt
changes one or more of the properties of one or more species or adds species to the thermodynamic database. These changes are lost if you reload the database by calling data(thermo)
or if you quit the Rsession without saving it. To modify the properties of species, give the names in the species
argument and supply other arguments: if one of these arguments is state
, species in those states will be updated. Additional arguments refer to the name of the property(s) to be updated and correspond to the column names of thermo$obigt
(the names of the properties are matched to any part of compound column names, such as z.T). The values provided should be in the units specifed in the documentation for the thermo
data object. To add species, supply the new names in species
and provide an argument named formula
with the corresponding chemical formulas. Additional arguments refer to any of the properties you wish to specify. Properties that are not specified are assigned the value of missingvalues
which is NA
by default (however if state
is missing it is set to the value of thermo$opt$state
). The values returned (invisible
-y) by mod.obigt
are the rownumbers of the affected species.
which.pmax
takes a list of equal-length numeric vectors (or objects that can be coerced to numeric) in elts
and returns the index of the vector holding the maximum value at each position. If na.rm
is TRUE
, values of NA
are removed; if pmin
is TRUE
the function finds locations of the minimum values instead.
nonideal
takes a list of dataframes (in proptable
) containing the standard molal properties of the identified species
. For those species whose charge (determined by the number of Z in their makeup
) is not equal to zero, the values of IS
are combined with Alberty's (2003) equation 3.6-1 (Debye-Huckel equation) and its derivatives, to calculate apparent molal properties at the specified ionic strength(s) and temperature(s). The lengths of IS
and T
supplied in the arguments should be equal to the number of rows of each dataframe in proptable
, or one to use single values throughout. The apparent molal properties that can be calculated include G
, H
, S
and Cp
; any columns in the dataframes of proptable
with other names are left untouched. A column named loggam
(logarithm of gamma, the activity coefficient) is appended to the output dataframe of species properties.
change
is a wrapper function to mod.obigt
and mod.buffer
. The name
provided in the argument refers to the name or numeric index of the species to update or add using mod.obigt
, unless the name
begins with an underscore character, in which case the remaining part of the name (after the underscore) is passed to mod.buffer
. The arguments in ...
are sent without change to the subordinate function.
add.protein
and add.obigt
read data from the specified file
and add it to either thermo$protein
or thermo$obigt
, as appropriate. Both of these are functions are run, with the default file names, when
which.balance
returns, in order, which column(s) of species
all have non-zero values. It is used by diagram
and transfer
to determine a conservant (i.e. basis species that are conserved in transformation reactions) if none is supplied by the user.
lsub
subtracts the elements of list y
from the respective ones in list x
. lsum
sums the respective elements of lists x
and y
. pprod
multiplies each element of list x
by the respective numeric value in y
. psum
sums all elements of the list x
.
mylapply
passes the given arguments to lapply
, or to mclapply
if the X
is greater than 20. mylapply
is used in affinity
(in calculations for proteins activated by the iprotein
argument), abundance.new
(in parallel operations on list elements), and aminoacids
and protein.length
(in counting amino acids in sequences and determining lengths of proteins).
grep.file
is used to search for entries in a FASTA file. It returns the line numbers of the matching FASTA headers. It takes a search term in x
and optionally a term to exclude in y
. The ignore.case
option is passed to grep
, which does the work of finding lines that match. Only lines that start with the expression in startswith
are searched; the default setting reflects the format of the header line for each sequence in a FASTA file. If y
is NULL and a supported operating system is identified (right now, only Linux), the operating system's grep function (or other specified in the grep
argument) is applied directly to the file instead of R's grep
; this mitigates the potential speed penalty of having to read the file into R using readLines
. If the lines from the file were obtained in a preceding operation, they can be supplied to this function in the lines
argument (this overrides the use of the OS's egrep).
read.fasta
is used to retrieve entries from a FASTA file. The line numbers for the headers of the desired sequences are passed to the function in i
(they can be generated using grep.file
). The function returns various formats depending on the value of ret
; the default count returns a dataframe of amino acid counts (the dataframe can be given to add.protein
in order to add the proteins to thermo$protein
), seq returns a list of sequences, and fas returns a list of lines extracted from the FASTA file, including the headers (this can be used e.g. to generate a new FASTA file with only the selected sequences). Similarly to grep.file
, this function utilizes the OS's grep on supported operating systems in order to identify the header lines as well as cat to read the file, otherwise readLines
and R's substr
are used to read the file and locate the header lines. lines
, if it is given, bypasses the reading of the file and also overrides the use of the OS's tools. If the line numbers of the header lines were previously determined, they can be supplied in ihead
. When computing relative abundances of many proteins that might be found with grep.fasta
, consider using the iprotein
arugment of affinity
to speed things up. Examples of these operations can be found in the documentation for revisit
.
spearman
calculates Spearman's rank correlation coefficient for a
and b
.
unitize
scales the logarithms of activities given in logact
so that the logarithm of total activity of residues is equal to zero (i.e. total activity of residues is one), or to some other value set in logact.tot
. length
indicates the number of residues in each species. If logact
is NULL, the function takes the logarithms of activities from the current species definition. If any of those species are proteins, the function gets their lengths using protein.length
.
mtitle
can be used to add a multi-line title to a plot. It loops over each element of main
and places it on a separate margin line using mtext
. This function exists to facilitate using expression
s in multiline titles (see revisit
for an example.)
examples
runs all the examples in the package using example
(with ask
set to FALSE
). If do.png
is TRUE, the plots in the examples are saved as png
files having names beginning with the name of each of the help topics. If long
is TRUE (the default), additional examples are run using longex
. longex
contains the text of many of the dontrun examples in the documentation, which are marked as such in order to avoid long R CMD check timings.
Buvet, R., 1983. General criteria for the fulfillment of redox reactions, in Bioelectrochemistry I: Biological Redox Reactions, Milazzo, G. and Blank, M., eds., Plenum Press, New York, p. 15-50.
Makhatadze, G. I. and Privalov, P. L., 1990. Heat capacity of proteins. 1. Partial molar heat capacity of individual amino acid residues in aqueous solution: Hydration effect J. Mol. Biol., 213, 375-384.
paste
, substr
, tolower
, par
and text
.data(thermo)
## string to character
s2c("hello world")
s2c("hello world",sep="",keep.sep=FALSE)
s2c("3.141592",sep=c(".","9"))
s2c("3.141592",sep=c(".","9"),move.sep=TRUE)
# character to string
c2s(aminoacids())
c2s(aminoacids(),sep=".")
## Spearman's rho
spearman(c(1,2,3),c(2,3,4)) # 1
spearman(c(1,2,3),c(4,3,2)) # -1
## argument processing
eos.args("hkf",c("g","H","S","cP","V","kT","e"))
## produces an error because "Q" is not allowed in water.SUPCRT92
eos.args("hkf",c("G","H","S","Cp","V","kT","E","Q"))
thermo$opt$water <- "IAPWS" # needed for p and n in next line
eos.args("water",c("p","u","cv","psat","rho","n","q","x","y","epsilon"))
TP.args(c(273.15,373.15))
TP.args(c(273.15,373.15),"Psat")
TP.args(c(273.15,373.15),c(100,100,200,200))
state.args(c("AQ","GAS"))
state.args(c("a","l","liq"))
## converting among Gibbs, enthalpy, entropy
GHS("H") # entropy of H (element)
# calculate enthalpy of formation of arsenopyrite
GHS("FeAsS",DG=-33843,S=68.5)
# return the value of DG calculated from DH and S
# cf. -56687.71 from subcrt("water")
GHS("H2O",DH=-68316.76,S=16.7123)
## count selected elements in a formula
t <- makeup("H2O")
expand.formula(c("H","O"),t)
expand.formula(c("C","H","S"),t)
## count amino acids in a sequence
aminoacids("GGSGG")
aminoacids("WhatAmIMadeOf?")
## count nucleobases in a sequence
nucleicacids("ACCGGGTTT")
# the DNA complement of that sequence
nucleicacids("ACCGGGTTT",comp="DNA")
# the RNA complement of the DNA complement
n <- nucleicacids("ACCGGGTTT",comp="DNA",comp2="RNA")
# the formula of the RNA complement
nucleicacids(n,type="RNA")
## calculate protein length
protein.length("LYSC_CHICK")
# another way to do it
basis("CHNOS")
species("LYSC_CHICK")
protein.length(species()$ispecies)
# another way to do it
ip <- protein("LYSC","CHICK")
protein.length(-ip)
## heat capacity as a function of temperature
## (Makhatadze & Privalov, 1990) units: J mol-1
MP90.cp(c(5,25,50,75,100,125),"LYSC_CHICK")
## properties of phase transitions
t <- info("enstatite")
# (dP/dT) of transitions
dPdTtr(t) # first transition
dPdTtr(t+1) # second transition
# temperature of transitions (Ttr) as a function of P
Ttr(t,P=c(1,10,100,1000))
Ttr(t,P=c(1,10,100,1000))
## nominal carbon oxidation states
ZC("CHNOSZ")
t <- info(info("LYSC_CHICK"))
ZC(t$formula)
## the basis stoichiometry of a made-up species
# warns because P isn't in our basis
basis("CHNOS")
basis.comp("SPONCH")
## describing the basis species
basis("CHNOSe")
describe(thermo$basis)
describe(thermo$basis,T=NULL,P=NULL)
## mass and entropy of compounds of elements
element("CH4")
element(c("CH4","H2O"),"mass")
element("Z") # charge
# same mass, opposite entropy as charge
element("Z-1") # i.e., electron
## scale logarithms of activity
## suppose we have two proteins whose lengths
## are 100 and 200; what are the logarithms
## of activity of the proteins that are equal to
## each other and that give a total activity of
## residues equal to unity?
logact <- c(-3,-3) # could be any two equal numbers
length <- c(100,200)
logact.tot <- 0
loga <- unitize(logact,length,logact.tot)
# the proteins have equal activity
stopifnot(identical(loga[1],loga[2]))
# the sum of activity of the residues is unity
stopifnot(isTRUE(all.equal(sum(10^loga * length),1)))
## now, what if the activity of protein 2 is ten
## times that of protein 1?
logact <- c(-3,-2)
loga <- unitize(logact,length,logact.tot)
# the proteins have unequal activity
stopifnot(isTRUE(all.equal(loga[2]-loga[1],1)))
# but the activities of residues still add up to one
stopifnot(isTRUE(all.equal(sum(10^loga * length),1)))
## modify/add species
info(t <- info("alanine","cr"))
mod.obigt("alanine",state="cr",G=0,H=0,S=0)
# now the values of G, H, and S are inconsistent
# with the elemental composition of alanine
info(t)
# add a species
mod.obigt("myname",formula="CHNOSZ",G=0,H=0)
info(t <- info("myname"))
Run the code above in your browser using DataLab