###################################################
#Observe that the final result from the code below is "OK". That is because the only
#mismatched residue at position 61, was documented in the CIF file as well.
#Thus it is considered a "reconciled" mismatch. It is up to the user to decide if
#they want to include it in the position sequence or remove it.
CIF<-"http://www.pdb.org/pdb/files/3GFT.cif"
Fasta<-"http://www.uniprot.org/uniprot/P01116-2.fasta"
KRAS.extracted.positions<- get.Positions(CIF, Fasta, "A")
###################################################
###################################################
#Observe that the final result from the code below is "FAILURE". For PIK3CA there were
#10 mismatched residues between the CIF file and the canonical sequence.
#However, none of these residues are explained within the actual CIF file.
CIF<- "http://www.pdb.org/pdb/files/2RD0.cif"
Fasta<-"http://cancer.sanger.ac.uk/cosmic/sequence?width=700&ln=PIK3CA&type=protein&height=500"
PIK3CA.extracted.positions<- get.Positions(CIF,Fasta , "A")
###################################################
###################################################
#Observe that the final result from the code below is "OK". Here we use a different file
#location for the canonical sequence -- the UNIPROT database. The canonical sequence is slightly
#different and matches up exactly to the extracted positions. As there is only 1 isoform listed
#on UNIPROT for PIK3CA we suggest that users obtain the mutational data and the canonical
#sequence information from the same source. For example, if your mutation data was obtained from
#COSMIC, you should use COSMIC to get the canonical protein sequence.
CIF <- "http://www.pdb.org/pdb/files/2RD0.cif"
Fasta <- "http://www.uniprot.org/uniprot/P42336.fasta"
PIK3CA.extracted.positions<- get.Positions(CIF, Fasta, "A")
###################################################
Run the code above in your browser using DataLab