# process some of the BLAST output for proteins
# from Bison Pool metagenome (JGI, 2007)
# read the file that connects taxids with the sequence identifier
tfile <- system.file("extdata/gi.taxid.txt",package="CHNOSZ")
gi.taxid <- scan(tfile,what=as.list(character(2)),flush=TRUE)
# read the file that connects names with the taxids
nfile <- system.file("extdata/taxid.phylum.csv",package="CHNOSZ")
taxid.phylum <- read.csv(nfile)
# the BLAST files
bfile <- paste("extdata/bison",c("N","R","P"),"_vs_refseq39.blastp",sep="")
for(i in 1:3) {
file <- system.file(bfile[i],package="CHNOSZ")
# process the blast file -- get taxon names
b <- count.taxa(file,gi.taxid,taxid.phylum,min.taxon=2^-6)
# count each of the phyla
bd <- as.matrix(sapply(unique(b$phylum),function(x) (length(which(x==b$phylum)))))
# make a matrix -- each column for a different file
if(i==1) bardata <- bd else {
bardata <- merge(bardata,bd,all=TRUE,by="row.names")
rownames(bardata) <- bardata$Row.names
bardata <- bardata[,-1]
}
}
# normalize the counts
bardata[is.na(bardata)] <- 0
bardata <- t(t(bardata)/colSums(bardata))
colnames(bardata) <- c("N","R","P")
# make a bar chart
bp <- barplot(as.matrix(bardata),col=rainbow(nrow(bardata)),
xlab="location",ylab="fractional abundance")
# add labels to the bars
names <- substr(row.names(bardata),1,3)
for(i in 1:3) {
bd <- bardata[,i]
ib <- bd!=0
y <- (cumsum(bd) - bd/2)[ib]
text(bp[i],y,names[ib])
}
title(main=paste("Most Abundant Phyla in a Portion",
"of the Bison Pool Metagenome",sep=""))
Run the code above in your browser using DataLab