cat("H1 H2 H3 H4 H5",
"Population1 1 2 1 0 0",
"Population2 0 0 0 4 1",
"Population3 0 1 0 0 3",
file = "4_Example3_HapPerPop_Weighted.txt", sep = "")
cat("H1 H2 H3 H4 H5",
"H1 0 1 2 3 1",
"H2 1 0 3 4 2",
"H3 2 3 0 1 1",
"H4 3 4 1 0 2",
"H5 1 2 1 2 0",
file = "4_Example3_IndelDistanceMatrixMullerMod.txt", sep = "")
example3_2 <- read.table("4_Example3_IndelDistanceMatrixMullerMod.txt",header=TRUE)
cat(">Population1_sequence1",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population1_sequence2",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population1_sequence3",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population1_sequence4",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population2_sequence1",
"TTATAGCTGTCGGGCTAGTAGCTGTATCAGTC--------------------TCGATGGCGCGGCGCATCAATATTATATCGGCGATGCGTAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population2_sequence2",
"TTATAGCTGTCGGGCTAGTAGCTGTATCAGTC--------------------TCGATGGCGCGGCGCATCAATATTATATCGGCGATGCGTAGCGCTAGCTGA----------GTAGAGTATG",
">Population2_sequence3",
"TTATAGCTGTCGGGCTAGTAGCTGTATCAGTC--------------------TCGATGGCGCGGCGCATCAATATTATATCGGCGATGCGTAGCGCTAGCTGATGCTAGTAGCGTAGAAAAAA",
">Population2_sequence4",
"TTATAGCTGTCGGGCTAGTAGCTGTATCAGTC--------------------TCGATGGCGCGGCGCATCAATATTATATCGGCGATGCGTAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population3_sequence1",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population3_sequence2",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population3_sequence3",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
">Population3_sequence4",
"GGGGAGCTGTCGGGCTAGTAGCTGTATCAGTCGTACGTAGTAGTCGTGTCGATCGATGGCGCGGCGCATC--------------------TAGCGCTAGCTGATGCTAGTAGCGTAGAGTATG",
file = "5_Example4.fas", sep = "")
example4 <- read.table("5_Example4.fas",header=TRUE)
# Reading files. Distance matrix must contain haplotype names. Abundance matrix must contain both, haplotype and population names:
pop.dist (DistFile=TRUE, inputDist="4_Example3_IndelDistanceMatrixMullerMod.txt", HaploFile=TRUE, inputHaplo="4_Example3_HapPerPop_Weighted.txt", outType="O", logfile=FALSE, saveFile=FALSE)
# It may be convenient to manually modify files to get the appropriate names. However, an automated example from fasta sequence names (using the substr function) is shown below:
# Estimating distances between unique haplotypes
uniqueHaplo<-GetHaplo(input="5_Example4.fas",saveFile=FALSE)
distGap<-MCIC(readfile=FALSE,align=uniqueHaplo,saveFile=FALSE)
dist.nt <-dist.dna(uniqueHaplo,model="raw",pairwise.deletion=TRUE)
DISTnt<-as.matrix(dist.nt)
# Replacing sequence names by haplotype names in both distance matrices
for (Hi in 1:length(colnames(distGap)))
colnames(distGap)[Hi]<-FindHaplo(input="5_Example4.fas",saveFile=FALSE)[which(colnames(distGap)[Hi]==FindHaplo(input="5_Example4.fas",saveFile=FALSE)[,1]),2]
row.names(distGap)<-colnames(distGap)
for (Hi in 1:length(colnames(DISTnt)))
colnames(DISTnt)[Hi]<-FindHaplo(input="5_Example4.fas",saveFile=FALSE)[which(colnames(DISTnt)[Hi]==FindHaplo(input="5_Example4.fas",saveFile=FALSE)[,1]),2]
row.names(DISTnt)<-colnames(DISTnt)
#Combining distance matrices and setting haplotype names
CombinedDistance<-as.data.frame(nt.gap.comb(DISTgap=distGap, range=0.5, method="Corrected", saveFile=FALSE, DISTnuc=DISTnt)[[2]])
colnames(CombinedDistance)<-row.names(CombinedDistance)
# Estimating haplotype abundance per population and setting population names:
Haplotypes<-FindHaplo(input="5_Example4.fas",saveFile=FALSE)
Haplotypes[,1]<-substr(Haplotypes[,1],1,11)
Weighted<-as.data.frame(HapPerPop(readfile=FALSE,header=TRUE,input=Haplotypes)[1])
colnames(Weighted)<-substr(colnames(Weighted),10,11)
# Estimating population distances
pop.dist (DistFile=FALSE, distances=CombinedDistance, HaploFile=FALSE, Haplos=Weighted, outType="O", logfile=FALSE, saveFile=FALSE)Run the code above in your browser using DataLab