data(Hadza)
data(USiCGs)
### Let's look at the Universal Single Copy Gene distribution in our samples.
KEGG.tpm = Hadza$functions$KEGG$tpm
all(USiCGs %in% rownames(KEGG.tpm)) # Are all the USiCGs present in our dataset?
# Plot a boxplot of USiCGs tpms and calculate median USiCGs tpm.
# This looks weird in the test dataset because it contains only a small subset of the metagenomes.
# In a set of complete metagenomes USiCGs should have fairly similar TPM averages
# and low dispersion across samples.
boxplot(t(KEGG.tpm[USiCGs,]), names=USiCGs, ylab="TPM", col="slateblue2")
### Now let's calculate the average copy numbers of each function.
# We do it for KEGG annotations here, but we could also do it for COGs or PFAMs.
USiCGs.cov = apply(Hadza$functions$KEGG$cov[USiCGs,], 2, median)
# Sample-wise division by the median USiCG coverage.
KEGG.copynumber = t(t(Hadza$functions$KEGG$cov) / USiCGs.cov)
Run the code above in your browser using DataLab