utilities: Utility and Miscellaneous Functions

Description

Provide various utilities for the user and for other functions in CHNOSZ. Convert between strings and character objects, calculate one of Gibbs energy, enthalpy or entropy from the other two, test for ability to become numeric, write and extract parts of chemical formulas and calculate nominal carbon oxidation states of formulas, handle arguments referring to temperature, pressure, states, and equations of state, calculate protein length, count amino acids in protein sequences, calculate $dP/dT$ and temperature of phase transitions, calculate heat capacities of unfolded proteins using an equation from the literature, initialize a new plot window using preset parameters, open a postcript file for plotting, add an axis or title to a plot, generate labels for plot axes and for identification of subplots and physical and chemical conditions, add stability lines for water to a diagram, add or alter properties of species in the thermodynamic database, calculate non-ideal contributions to apparent standard molal properties, identify a conserved basis species, perform arithmetic on lists, execute multicore calculations, calculate Spearman's rank correlation coefficient, scale logarithms of activity to a desired total activity, search a FASTA file for protein sequences, read protein sequences from a file and count numbers of amino acids in each sequence, and run all the examples provided in the package.

Usage

c2s(x, sep = " ")
  s2c(x, sep = NULL, keep.sep = TRUE, n = NULL, move.sep = FALSE)
  GHS(species = NULL, DG = NA, DH = NA, S = NA, T = thermo$opt$Tr)
  can.be.numeric(x)
  expand.formula(elements, makeup)
  ZC(x)
  eos.args(eos, property = NULL, T = NULL, P = NULL)
  TP.args(T = NULL, P = NULL)
  state.args(state = NULL)
  protein.length(protein)
  aminoacids(seq, nchar=1)
  nucleicacids(seq, type = "DNA", comp = NULL, comp2 = NULL)
  MP90.cp(T, protein)
  dPdTtr(x)
  Ttr(x, P = 1, dPdT = NULL)
  thermo.plot.new(xlim, ylim, xlab, ylab, cex = par("cex"),
    mar = NULL, lwd = par("lwd"), side = c(1,2,3,4), 
    mgp = c(1.2, 0.3, 0), cex.axis = par("cex"), col = par("col"),
    yline = NULL, axs = "i", do.box = TRUE, ticks = NULL)
  thermo.postscript(file, family = "Helvetica", width = 8, 
    height = 6, horizontal = FALSE)
  thermo.axis(lab = "x-axis", side = 1, line = 1.5, cex = par("cex"),
    lwd = par("lwd"), T = NULL, col = par("col"))
  axis.label(x, opt = NULL, do.state = TRUE, oldstyle = FALSE,
    do.upper = FALSE, mol = "mol")
  describe(x = NULL, T = NULL, P = NULL, use.name = FALSE, 
    as.reaction = NULL, digits = 1)
  basis.comp(basis)
  label.plot(x, xfrac = 0.95, yfrac = 0.9, cex = 1, paren = TRUE, 
    adj = 1)
  water.lines(xaxis = "pH", yaxis = "Eh", T = 298.15, P = "Psat", 
    which = c("oxidation","reduction"), logaH2O = 0, lty = 2, 
    col = par("fg"), xpoints = NULL)
  element(compound, property = c("mass","entropy"))
  mod.obigt(species, ..., missingvalues = NA)
  which.pmax(elts, na.rm = FALSE, pmin = FALSE)
  nonideal(species, proptable, IS, T)
  change(name, ...)
  add.protein(file="protein.csv")
  add.obigt(file="obigt.csv")
  which.balance(species)
  lsub(x, y)
  lsum(x, y)
  pprod(x, y)
  psum(x)
  mylapply(X, FUN, ...)
  spearman(a, b)
  unitize(logact = NULL, length = NULL, logact.tot = 0)
  mtitle(main, ...)
  grep.file(file, x = "", y = NULL, ignore.case = TRUE, startswith = ">", 
    lines = NULL, grep = "grep")
  read.fasta(file, i = NULL, ret = "count", lines = NULL, ihead = NULL)
  examples(do.png = FALSE, long = TRUE)
  longex(which)

Arguments

character object to convert (s2c, c2s, axis.label), or object to be tested (can.be.numeric), or numeric index of a mineral phase (dPdTtr, Ttr), or character object representing

sep

character, the separator to insert or separator(s) to match (c2s, s2c).

keep.sep

logical, retain the separator in the output (TRUE) or discard it (FALSE) (s2c).

numeric, maximum number of items in the character object returned by s2c.

move.sep

logical, move the kept separator to the end of the preceding item.

species

character, formula of a compound from which to calculate entropies of the elements (GHS), or names of species to modify or add to the thermodynamic database (mod.obigt), or names or indices of species for which to calculate nonid

numeric, temperature (K) (TP.args, lines.water, describe, MP90.cp, nonideal, GHS).

numeric, pressure (bar) (can also be character, Psat in TP.args).

eos

character, name of equation of state (one of hkf, cgl, water).

property

character, name(s) of thermodynamic properties (eos.args, element).

state

character, name(s) of states (e.g., cr, aq).

numeric, standard molal Gibbs energy of formation (GHS).

numeric, standard molal enthalpy of formation.

numeric, standard molal molal entropy.

elements

character, name(s) of elements (expand.formula).

makeup

dataframe, elemental composition of a compound returned by makeup.

protein

character, name of protein species; numeric, species index of protein (protein.length).

seq

character, amino acid sequence of a protein (aminoacids) or base sequence of a nucleic acid (nucleicacids).

nchar

numeric, $1$ to return one-letter, $3$ to return three-letter abbreviations for amino acids (aminoacids).

type

character, type of nucleic acid sequence (DNA or RNA) (nucleicads).

comp

character, type of complement sequence.

comp2

character, type of second complement sequence.

dPdT

numeric, values of ($dP/dT$) of phase transitions (Ttr).

xlim

numeric, limits of the $x$-axis (thermo.plot.new).

ylim

numeric, limits of the $y$-axis.

xlab

character, $x$-axis label.

ylab

character, $y$-axis label.

cex

numeric, character expansion factor for labels, also in plot.label.

mar

numeric, width (number of lines) of margins on each side of plot.

lwd

numeric, line width.

side

numeric, which sides of plot to draw axes (thermo.plot.new,thermo.axis).

mgp

numeric, sizes of margins of plot.

cex.axis

numeric, character expansion factor for names of axes.

yline

numeric, margin line on which to plot $y$-axis name.

axs

character, setting for axis limit calculation.

do.box

logical, draw a box around the plot?

ticks

numeric, same effect as side (retained for back-compatibility).

file

character, path to a file (thermo.postscript, add.protein, add.obigt, grep.file, read.fasta).

family

character, font family.

width

numeric, width of plot.

height

numeric, height of plot.

horizontal

logical, create plot in landscape mode?

opt

character or numeric, options for axis labels (axis.label).

oldstyle

logical, use old style of axis labels?

do.state

logical, append state abbreviation to label?

do.upper

logical, use uppercase letters in axis label?

mol

character, string to use as the denominator of units in axis label.

use.name

logical, write names instead of formulas? (describe).

as.reaction

logical, interpret input as a reaction?

digits

numeric, how many digits to round logarithms of activities.

basis

numeric or character, species number or formula (basis.comp).

xaxis

character, description of $x$-axis (water.lines).

yaxis

character, description of $y$-axis.

which

character, which of oxidation/reduction lines to plot (water.lines), or which examples to run (longex).

logaH2O

numeric, logarithm of the activity of $\mathrm{H_2O}$.

lty

numeric, line type.

col

character, line color (water.lines, thermo.plot.new, thermo.axis).

xpoints

numeric, points to plot on $x$ axis.

xfrac

numeric, fractional location on $x$-axis for placement of label (label.plot).

yfrac

numeric, fractional location on $y$-axis for placement of label.

paren

logical, add parentheses around label text?

adj

numeric, parameter for text alignment.

lab

character, axis label (thermo.axis).

line

numeric, line (distance from axis) to place axis label.

compound

character, name of element(s) or compound(s) (element).

...

character or numeric, properties of species to modify in the thermodynamic database (mod.obigt), or arguments to change that are passed to mod.obigt or mod.buffer, or additional arguments for lappa

missingvalues

numeric, values to assign to undefined properties.

elts

list, numeric vectors for which to find maximum values (in parallel) (which.pmax).

na.rm

logical, remove missing values?

pmin

logical, find minimum values instead of maximum ones?

proptable

list of dataframes of species properties (nonideal).

numeric, ionic strength(s) used in nonideal calculations, mol kg$^{-1}$.

name

character or numeric, name (or numeric index) of species or name of buffer to be modified (change).

vector, argument for lapply or mclapply.

FUN

function, argument for lapply or mclapply.

list (lsub, lsum), or numeric (pprod), or character, term to exclude in searching sequence headers (grep.file).

ignore.case

logical, ignore differences between upper- and lower-case? (grep.file)

startswith

character, only lines starting with this expression are matched.

lines

list of character, supply the lines here instead of reading them from file.

grep

character, name of system grep command.

numeric, line numbers of sequence headers to read (read.fasta).

ret

character, specification for type of return (count, sequence, or FASTA format).

ihead

numeric, which lines are headers.

numeric values (spearman).

logact

numeric, logarithms of activity (unitize).

length

numeric, numbers of residues.

logact.tot

numeric, logarithm of total activity.

main

character, text for plot title.

do.png

logical, generate PNG plot files?

long

logical, run additional examples?

Value

s2c, c2s and axis.label return character values. Numeric returns are made by GHS, protein.length, dPdTtr, Ttr, ZC, MP90.cp, spearman, mod.obigt and grep.file. A list is return by eos.args and TP.args, and character is returned by state.args. can.be.numeric returns logical. aminoacids and nucleicacids return character or dataframe. lsub, lsum and pprod return lists. read.fasta returns a list of sequences or lines (for ret equal to seq or fas, respectively), or a data frame with amino acid compositions of proteins (for ret equal to count) with columns corresponding to those in thermo$protein. Functions with no (or unspecified) returns are thermo.plot.new, thermo.postscript, label.plot and water.lines.

Details

c2s joins the elements of a character object into a character object of length $1$ (a string), and s2c splits a string into elements of a character object of length $n+1$, where $n$ stands for the number of separators in the string. sep gives the separator to insert between successive items (in c2s) or the separator(s) to find in a string (in s2c). The default value of sep is a space (" ") in c2s. The default value for sep is NULL in s2c, indicating a separator at every position of x (the result in this case has length equal to nchar(x)). Argument keep.sep if TRUE (the default) instructs s2c to keep the separating values in the output, and move.sep if TRUE instructs s2c to append the kept separators to the preceding items instead of prepending them to the following ones. The maximum length of the object returned by s2c is determined by n; the default value of NULL indicates an unrestricted length.

The *.args functions are used to normalize user-input arguments, which are case-insensitive. eos.args returns a list with elements named props, for all the properties available for the specified equations-of-state, prop for the lower-case version of property, and Prop, for the upper-case (of first letter) version of property. eos.args produces an error if one of the propertys is not in the list of available properties. (See water and subcrt for the available properties for different species.) TP.args forces T and P to equal length. This function also looks for the keyword Psat in the value of P and substitutes calculated values of the saturation vapor pressure (see water). state.args makes its argument lowercase, then transforms a, c, g, and l to aq, gas, cr, and liq, respectively.

GHS computes one of the standard molal Gibbs energy or enthalpy of formation from the elements (DG, DH) or entropy (S) at 298.15 K and 1 bar from values of the other two. If the species argument is present, it is used to calculate the entropies of the elements (Se) using element, otherwise Se is set to zero. The equation in effect can be written as ${\Delta}G^{\circ}={\Delta}H^{\circ}-T{\Delta}S^{\circ}$, where ${\Delta}S^{\circ}=S-S_e$ and $T$ denotes the reference temperature of 298.15 K. If two of DG, DH, and S are provided, the value of the third is returned. If three are provided, the value of DG in the arguments is ignored and the calculated value of DG is returned. If none of DG, DH or S are provided, the value of Se is returned. If only one of the values is provided, an error results. Units of cal mol$^{-1}$ (DG, DH) and cal K$^{-1}$ mol$^{-1}$ (S) are assumed. It T is provided, it use used instead of the reference temperature.

can.be.numeric returns a value of TRUE or FALSE for each element of x.

expand.formula converts a 1-column dataframe representing the elemental composition of a compound (see makeup) to a numeric vector, each value of which is the coefficient of the elements given in the argument. If any of these is not present in the makeup dataframe, its coefficient is set to zero. A non-zero coefficient of an element in the makeup dataframe does not appear in the output if that element is not one of elements.

ZC returns the nominal carbon oxidation state for the chemical formula represented by x. (For discussion of nominal carbon oxidation state, see Hendrickson et al., 1970; Buvet, 1983.) If carbon is not present in the formula the result is NaN.

The argument of protein.length, if it is character, refers to the name of protein(s) (e.g., LYSC_CHICK) for which to calculate the length (number of amino acid residues). If the argument is numeric, it refers to the species index of a protein (value in thermo$species$ispecies). For a positive numeric argument to work, the protein information must have been previously loaded into the species list (using info). If the numeric value is negative, it refers to the rownumber of the protein in thermo$protein. aminoacids takes a character argument containing a protein sequence and counts the number of occurrences of each type of amino acid. The output is a dataframe with 20 columns, each corresponding to an amino acid, ordered in the same way as thermo$protein. If the first argument is NULL, the function returns the one-letter abbreviations (for nchar equal to 1) or the three-letter ones (if nchar is equal to 3) or the names of the amino acids (if nchar is NA) of twenty amino acids in the order used in thermo$protein.

nucleicacids takes a DNA or RNA sequence and counts the numbers of bases of each type. Whether the sequence is DNA or RNA is specified by type. Setting comp to DNA or RNA tells the function to compute the base composition of that type of complement of the sequence. If comp2 is specified, another complement is taken. The two rounds of complementing can be used in a single function call e.g. to go from a sequence on DNA minus strand (given in seq) to the plus strand (with comp="DNA") and then from the DNA plus strand to RNA (with comp2="RNA"). The value returned by the function is a dataframe of base composition, which can be passed back to the function to obtain the overall chemical formula for the bases.

MP90.cp takes T (one or more temperatures in $^{\circ}$C) and protein (name of protein) and returns the heat capacity of the unfolded protein using values of heat capacities of the residues taken from Makhatadze and Privalov, 1990. Those authors provided values of heat capacity at six points between 5 and 125 $^{\circ}$C; this function interpolates (using splinefun) values at other temperatures.

dPdTtr returns values of $(dP/dT)_{Ttr}$, where $Ttr$ represents the transition temperature, of the phase transition at the high-$T$ stability limit of the xth species in thermo$obigt (no checking is done to verify that the species represents in fact one phase of a mineral with phase transitions). dPdTtr takes account of the Clapeyron equation, $(dP/dT)_{Ttr}$=${\Delta}S/{\Delta}V$, where ${\Delta}S$ and ${\Delta}V$ represent the changes in entropy and volume of phase transition, and are calculated using subcrt at Ttr from the standard molal entropies and volumes of the two phases involved. Using values of dPdT calculated using dPdTtr or supplied in the arguments, Ttr returns as a function of P values of the upper transition temperature of the mineral phase represented by the xth species.

thermo.plot.new sets parameters for a new plot, creates a new plot using plot.new, and adds axes and major and minor ticks to the plot. Plot parameters (see par) including cex, mar, lwd, mgp and axs can be given, as well as a numeric vector in ticks identifying which sides of the plot receive tick marks. yline, if present, denotes the margin line (default par('mgp')[1]) where the y-axis name is plotted. thermo.postscript calls postscript with some custom parameters.

axis.label returns an expression to be used for plotting an axis label, which may be the symbol for a thermodynamic properties, chemical activity or fugacity, or one of T, P, Eh, pH, pe or logK. An expression for chemical activity or fugacity is returned if the first argument is the name of one of the basis species (e.g., O2). The expression in this case includes italic and subscripted symbols, unless oldstyle is TRUE, when labels with a simpler format (e.g. O2 (log f)) are returned. The default value of NULL of opt means to use the state this basis species is in, or if this basis species is not present to use the value in thermo$opt$state. Likewise, if x is T or P the units of temperature or pressure are determined using nuts (which also refers to thermo$opt). do.upper, if TRUE, tells the function to print the label using uppercase letters. Labels for properties can be generated by using e.g. DGf or DG0r as arguments. mol (default: mol) refers to the denominator of the units (default: molality); this can be changed to represent e.g. specific units, by setting mol to g. opt when generating labels for properties indicates the prefix to place on the units.

water.lines plots lines representing the oxidation and reduction stability limits of water on yaxis-xaxis diagrams, where yaxis can be Eh or O2, and xaxis can be pH or T. which controls which lines (oxidation, reduction, or both (the default)) are drawn, logaH2O (default 0) denotes the logarithm of the activity of water, lty (default 2) the line type, col (default par('fg'), the foreground color), and xpoints an optional list of points on the x axis to which to restrict the plotting (default of NULL refers to the axis limits).

label.plot adds identifying text to the plot; the value given for x is made into a label like $(a)$. The location of the label is controlled by xfrac and yfrac (the fractional locations along the respective axes) as well as adj (the text alignment parameter, see text).

thermo.axis is used to add axes and axis labels to plots, with some default style settings (rotation of numeric labels) and conversions between oxidation-reduction scales (called by thermo.plot.new). It also adds minor tick marks.

describe generates a textual representation of the temperature, pressure, and logarithms of activities of the basis species, given in the arguments by x (i.e. the dataframe in thermo$basis) and T and P (given in Kelvin and bar and converted by the function to those specified by nuts). The digits argument tells to what decimal place the logarithms of activities should be rounded. If any of the supplied arguments is NULL its specification is not printed in the output; T and P, if present, are prepended to the basis summary. If x instead is a dataframe representing a chemical reaction (as output by subcrt and identified by having a column named coeff), the function returns a textual summary of that reaction (i.e., showing reactants on the left, an equal sign, and products on the right; reactants and products are preceded by their reaction coefficient unless it is $1$). However, if only reactants or products can be found, or as.reaction is set to FALSE, the names or formulas of the species are printed with their coefficients and interceding plus or minus signs, as appropriate. Whether the names or formulas are printed is controlled by use.name (FALSE by default), a logical vector the length of which corresponds to the number of rows in x (but is expanded to the right length if needed).

element returns a dataframe of the mass and entropy of one or more elements or formulas given in compound. The property can be mass and/or entropy. mod.obigt changes one or more of the properties of one or more species or adds species to the thermodynamic database. These changes are lost if you reload the database by calling data(thermo) or if you quit the Rsession without saving it. To modify the properties of species, give the names in the species argument and supply other arguments: if one of these arguments is state, species in those states will be updated. Additional arguments refer to the name of the property(s) to be updated and correspond to the column names of thermo$obigt (the names of the properties are matched to any part of compound column names, such as z.T). The values provided should be in the units specifed in the documentation for the thermo data object. To add species, supply the new names in species and provide an argument named formula with the corresponding chemical formulas. Additional arguments refer to any of the properties you wish to specify. Properties that are not specified are assigned the value of missingvalues which is NA by default (however if state is missing it is set to the value of thermo$opt$state). The values returned (invisible-y) by mod.obigt are the rownumbers of the affected species.

which.pmax takes a list of equal-length numeric vectors (or objects that can be coerced to numeric) in elts and returns the index of the vector holding the maximum value at each position. If na.rm is TRUE, values of NA are removed; if pmin is TRUE the function finds locations of the minimum values instead.

nonideal takes a list of dataframes (in proptable) containing the standard molal properties of the identified species. For those species whose charge (determined by the number of Z in their makeup) is not equal to zero, the values of IS are combined with Alberty's (2003) equation 3.6-1 (Debye-Huckel equation) and its derivatives, to calculate apparent molal properties at the specified ionic strength(s) and temperature(s). The lengths of IS and T supplied in the arguments should be equal to the number of rows of each dataframe in proptable, or one to use single values throughout. The apparent molal properties that can be calculated include G, H, S and Cp; any columns in the dataframes of proptable with other names are left untouched. A column named loggam (logarithm of gamma, the activity coefficient) is appended to the output dataframe of species properties.

change is a wrapper function to mod.obigt and mod.buffer. The name provided in the argument refers to the name or numeric index of the species to update or add using mod.obigt, unless the name begins with an underscore character, in which case the remaining part of the name (after the underscore) is passed to mod.buffer. The arguments in ... are sent without change to the subordinate function.

add.protein and add.obigt read data from the specified file and add it to either thermo$protein or thermo$obigt, as appropriate. Both of these are functions are run, with the default file names, when CHNOSZ is first loaded.

which.balance returns, in order, which column(s) of species all have non-zero values. It is used by diagram and transfer to determine a conservant (i.e. basis species that are conserved in transformation reactions) if none is supplied by the user.

lsub subtracts the elements of list y from the respective ones in list x. lsum sums the respective elements of lists x and y. pprod multiplies each element of list x by the respective numeric value in y. psum sums all elements of the list x.

mylapply passes the given arguments to lapply, or to mclapply if the multicore package is loaded and the length of X is greater than 20. mylapply is used in affinity (in calculations for proteins activated by the iprotein argument), abundance.new (in parallel operations on list elements), and aminoacids and protein.length (in counting amino acids in sequences and determining lengths of proteins).

grep.file is used to search for entries in a FASTA file. It returns the line numbers of the matching FASTA headers. It takes a search term in x and optionally a term to exclude in y. The ignore.case option is passed to grep, which does the work of finding lines that match. Only lines that start with the expression in startswith are searched; the default setting reflects the format of the header line for each sequence in a FASTA file. If y is NULL and a supported operating system is identified (right now, only Linux), the operating system's grep function (or other specified in the grep argument) is applied directly to the file instead of R's grep; this mitigates the potential speed penalty of having to read the file into R using readLines. If the lines from the file were obtained in a preceding operation, they can be supplied to this function in the lines argument (this overrides the use of the OS's egrep).

read.fasta is used to retrieve entries from a FASTA file. The line numbers for the headers of the desired sequences are passed to the function in i (they can be generated using grep.file). The function returns various formats depending on the value of ret; the default count returns a dataframe of amino acid counts (the dataframe can be given to add.protein in order to add the proteins to thermo$protein), seq returns a list of sequences, and fas returns a list of lines extracted from the FASTA file, including the headers (this can be used e.g. to generate a new FASTA file with only the selected sequences). Similarly to grep.file, this function utilizes the OS's grep on supported operating systems in order to identify the header lines as well as cat to read the file, otherwise readLines and R's substr are used to read the file and locate the header lines. lines, if it is given, bypasses the reading of the file and also overrides the use of the OS's tools. If the line numbers of the header lines were previously determined, they can be supplied in ihead. When computing relative abundances of many proteins that might be found with grep.fasta, consider using the iprotein arugment of affinity to speed things up. Examples of these operations can be found in the documentation for revisit.

spearman calculates Spearman's rank correlation coefficient for a and b.

unitize scales the logarithms of activities given in logact so that the logarithm of total activity of residues is equal to zero (i.e. total activity of residues is one), or to some other value set in logact.tot. length indicates the number of residues in each species. If logact is NULL, the function takes the logarithms of activities from the current species definition. If any of those species are proteins, the function gets their lengths using protein.length.

mtitle can be used to add a multi-line title to a plot. It loops over each element of main and places it on a separate margin line using mtext. This function exists to facilitate using expressions in multiline titles (see revisit for an example.)

examples runs all the examples in the package using example (with ask set to FALSE). If do.png is TRUE, the plots in the examples are saved as png files having names beginning with the name of each of the help topics. If long is TRUE (the default), additional examples are run using longex. longex contains the text of many of the dontrun examples in the documentation, which are marked as such in order to avoid long R CMD check timings.

References

Alberty, R. A., 2003. Thermodynamics of Biochemical Reactions, John Wiley & Sons, Hoboken, New Jersey, 397 p. http://www.worldcat.org/oclc/51242181

Buvet, R., 1983. General criteria for the fulfillment of redox reactions, in Bioelectrochemistry I: Biological Redox Reactions, Milazzo, G. and Blank, M., eds., Plenum Press, New York, p. 15-50. http://www.worldcat.org/oclc/9282370 Hendrickson, J. B., Cram, D. J., and Hammond, G. S., 1970. Organic Chemistry, 3rd ed., McGraw-Hill, New York, 1279 p. http://www.worldcat.org/oclc/78308

Makhatadze, G. I. and Privalov, P. L., 1990. Heat capacity of proteins. 1. Partial molar heat capacity of individual amino acid residues in aqueous solution: Hydration effect J. Mol. Biol., 213, 375-384. http://dx.doi.org/10.1016/S0022-2836(05)80197-4

Examples

Run this code

data(thermo)
  ## string to character
  s2c("hello world")
  s2c("hello world",sep="",keep.sep=FALSE)
  s2c("3.141592",sep=c(".","9"))
  s2c("3.141592",sep=c(".","9"),move.sep=TRUE)
  # character to string
  c2s(aminoacids())
  c2s(aminoacids(),sep=".")

  ## Spearman's rho
  spearman(c(1,2,3),c(2,3,4))  # 1
  spearman(c(1,2,3),c(4,3,2))  # -1

  ## argument processing
  eos.args("hkf",c("g","H","S","cP","V","kT","e"))
  ## produces an error because "Q" is not allowed in water.SUPCRT92
  eos.args("hkf",c("G","H","S","Cp","V","kT","E","Q"))
  thermo$opt$water <- "IAPWS"  # needed for p and n in next line
  eos.args("water",c("p","u","cv","psat","rho","n","q","x","y","epsilon"))
  TP.args(c(273.15,373.15))
  TP.args(c(273.15,373.15),"Psat")
  TP.args(c(273.15,373.15),c(100,100,200,200))
  state.args(c("AQ","GAS"))
  state.args(c("a","l","liq"))

  ## converting among Gibbs, enthalpy, entropy
  GHS("H") # entropy of H (element)
  # calculate enthalpy of formation of arsenopyrite 
  GHS("FeAsS",DG=-33843,S=68.5) 
  # return the value of DG calculated from DH and S
  # cf. -56687.71 from subcrt("water")
  GHS("H2O",DH=-68316.76,S=16.7123)  
 
  ## count selected elements in a formula
  t <- makeup("H2O")
  expand.formula(c("H","O"),t)
  expand.formula(c("C","H","S"),t)

  ## count amino acids in a sequence
  aminoacids("GGSGG")
  aminoacids("WhatAmIMadeOf?")

  ## count nucleobases in a sequence
  nucleicacids("ACCGGGTTT")
  # the DNA complement of that sequence
  nucleicacids("ACCGGGTTT",comp="DNA")
  # the RNA complement of the DNA complement
  n <- nucleicacids("ACCGGGTTT",comp="DNA",comp2="RNA")
  # the formula of the RNA complement
  nucleicacids(n,type="RNA")

  ## calculate protein length
  protein.length("LYSC_CHICK")
  # another way to do it
  basis("CHNOS")
  species("LYSC_CHICK")
  protein.length(species()$ispecies)
  # another way to do it
  ip <- protein("LYSC","CHICK")
  protein.length(-ip)

  ## heat capacity as a function of temperature
  ## (Makhatadze & Privalov, 1990) units: J mol-1
  MP90.cp(c(5,25,50,75,100,125),"LYSC_CHICK")

  ## properties of phase transitions
  t <- info("enstatite")
  # (dP/dT) of transitions
  dPdTtr(t)  # first transition
  dPdTtr(t+1) # second transition
  # temperature of transitions (Ttr) as a function of P
  Ttr(t,P=c(1,10,100,1000))
  Ttr(t,P=c(1,10,100,1000))
  
  ## nominal carbon oxidation states
  ZC("CHNOSZ")
  t <- info(info("LYSC_CHICK"))
  ZC(t$formula)

  ## the basis stoichiometry of a made-up species
  # warns because P isn't in our basis
  basis("CHNOS")
  basis.comp("SPONCH")

  ## describing the basis species
  basis("CHNOSe")
  describe(thermo$basis)
  describe(thermo$basis,T=NULL,P=NULL)

  ## mass and entropy of compounds of elements
  element("CH4")
  element(c("CH4","H2O"),"mass")
  element("Z")   # charge
  # same mass, opposite entropy as charge
  element("Z-1") # i.e., electron

  ## scale logarithms of activity
  ## suppose we have two proteins whose lengths
  ## are 100 and 200; what are the logarithms
  ## of activity of the proteins that are equal to
  ## each other and that give a total activity of
  ## residues equal to unity?
  logact <- c(-3,-3)  # could be any two equal numbers
  length <- c(100,200)
  logact.tot <- 0
  loga <- unitize(logact,length,logact.tot)
  # the proteins have equal activity
  stopifnot(identical(loga[1],loga[2]))
  # the sum of activity of the residues is unity
  stopifnot(isTRUE(all.equal(sum(10^loga * length),1)))
  ## now, what if the activity of protein 2 is ten
  ## times that of protein 1?
  logact <- c(-3,-2)
  loga <- unitize(logact,length,logact.tot)
  # the proteins have unequal activity
  stopifnot(isTRUE(all.equal(loga[2]-loga[1],1)))
  # but the activities of residues still add up to one
  stopifnot(isTRUE(all.equal(sum(10^loga * length),1)))

  ## modify/add species
  info(t <- info("alanine","cr"))
  mod.obigt("alanine",state="cr",G=0,H=0,S=0)
  # now the values of G, H, and S are inconsistent
  # with the elemental composition of alanine
  info(t)
  # add a species
  mod.obigt("myname",formula="CHNOSZ",G=0,H=0)
  info(t <- info("myname"))

Run the code above in your browser using DataLab