# NOT RUN {
## reading a protein FASTA file
# the path to the file
file <- system.file("extdata/protein/EF-Tu.aln", package="CHNOSZ")
# read the sequences, and print the first one
read.fasta(file, ret="seq")[[1]]
# count the amino acids in the sequences
aa <- read.fasta(file)
# compute lengths (number of amino acids)
protein.length(aa)
# }
# NOT RUN {
# download amino acid composition of a protein
# start at position 2 to remove the initiator methionine
aa <- uniprot.aa("ALAT1_HUMAN", start=2)
# change the name from "sp|P24298" to "ALAT1" 20201110
aa$protein <- "ALAT1"
# add it to thermo()$protein
ip <- add.protein(aa)
# now it's possible to calculate some properties
protein.length(ip)
protein.formula(ip)
subcrt("ALAT1_HUMAN", c("cr", "aq"), c(-1, 1))
# the amino acid composition can be saved for future use
write.csv(aa, "saved.aa.csv", row.names=FALSE)
# in another R session, the protein can be loaded without using uniprot.aa()
aa <- read.csv("saved.aa.csv", as.is=TRUE)
add.protein(aa)
## count amino acids in a sequence
count.aa("GGSGG")
# warnings are issued for unrecognized characters
atest <- count.aa("WhatAmIMadeOf?")
# there are 3 "A" (alanine)
atest[, "A"]
# }
Run the code above in your browser using DataLab