# -----------------------------------------------------------------------
# Example with Clustal alignment file
# -----------------------------------------------------------------------
# Create a temporary Clustal file
clustal_file <- tempfile(fileext = ".aln")
writeLines(c(
"CLUSTAL W (1.83) multiple sequence alignment",
"",
"WT EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
"Mutant1 EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
"Mutant2 EQKLISEEDLMKTAYIAKQRQRSFVKSHFSRQLERIEKKWEAHFDDLHP",
"Mutant3 EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLER----IEAHFDDLHP",
"Mutant4 EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQAERIEKKIEAHFDDLHP",
"Mutant5 EQKLISEEDLAKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDRHP",
"Mutant6 EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
" *********** ***************** * ******* *******:**",
"",
"WT DIVALSGHTFGKTHGAGKQSSHHHHHH",
"Mutant1 DIVALSGHTFGKTHGAGKQSSHHHHHH",
"Mutant2 DIVALSGHTFGKTHGAGKQSSHHHHHH",
"Mutant3 DIVALSGHTFGKTHGAGKQSS------",
"Mutant4 DIVALSGHTFGKTHGAGKQSSHHHHHH",
"Mutant5 DIVALSGHTFGKTHGAGKQSSHHHHHH",
"Mutant6 DRVALSGHTFAKTHGAGKQSS------",
" * ******** ********** "
), clustal_file)
# Plot Clustal alignment
gg_seqdiff(
clustal = clustal_file,
ref = paste0("EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
"DIVALSGHTFGKTHGAGKQSSHHHHHH"),
color = c(K = "#285bb8", R = "#285bb8", # Basic
E = "#a12b20", D = "#a12b20", # Acidic
W = "#9b59b6", F = "#9b59b6", # Aromatic
H = "#f39c12"), # Histidine
highlight = list(
"#94d104" = 1:10, # N-terminal c-Myc tag
"#FFE0B2" = 30:45, # Active site
"#94d104" = 72:77 # C-terminal His-tag
),
annotate = list(
list(label = "c-Myc", pos = 5),
list(label = "Active site", pos = 37),
list(label = "6xHis", pos = 74)
),
wrap = 60
)
# Clean up
unlink(clustal_file)
# -----------------------------------------------------------------------
# Example with DNA sequences - gene structure with regulatory elements
# -----------------------------------------------------------------------
dna_ref <- paste0(
"TATAAA", # TATA box (promoter)
"ATGCGATCGATCGATCGTAGCTAGCT", # Exon 1
"GTAAGTATCGATCGAT", # Intron 1 (splice sites: GT...AG)
"ACGTACGTACGTAGCTAGCTAGCTAC", # Exon 2
"GTACGTACGTACGTAC", # Intron 2
"GTACGTACGTAGCTAGCTAGCTACGT", # Exon 3
"ACGTACGTAAATAA" # 3'UTR with poly-A signal
)
dna_df <- data.frame(
sequence = c(
dna_ref,
sub("TATAAA", "TATATA", dna_ref),
gsub("GTAAGT", "ATAAGT", dna_ref),
gsub("CGATAG", "CGATAA", dna_ref),
sub("ATG", "AAG", dna_ref),
gsub("AATAA$", "AACAA", dna_ref),
sub("GCGATCGATCGATCG", "GCGATCAATCGATCG", dna_ref),
gsub("ACGTACGTACGTAG", "ACGTACATACGTAG", dna_ref)
),
id = c("WT", "Promoter_mut", "Splice_donor",
"Splice_acceptor", "Start_codon", "PolyA_mut",
"Exon1_missense", "Exon2_frameshift")
)
# Highlight gene structure elements
gg_seqdiff(
data = dna_df,
ref = dna_ref,
name = "id",
color = c(G = "#4e8fb5", C = "#845cab"),
highlight = list(
"#FFE0B2" = 1:6, # TATA box (promoter)
"#C8E6C9" = c(7:32, 49:74, 91:116), # Exons
"#FFCCBC" = 117:130 # 3'UTR with poly-A
),
annotate = list(
list(label = "TATA", pos = 1, angle = 90),
list(label = "ATG", pos = 7, angle = 90, color = "red"),
list(label = "Exon1", pos = 19),
list(label = "GT", pos = 33, angle = 90, size = 2.5),
list(label = "GA", pos = 46, angle = 90, size = 2.5),
list(label = "Exon2", pos = 61),
list(label = "GT", pos = 75, angle = 90, size = 2.5),
list(label = "AC", pos = 89, angle = 90, size = 2.5),
list(label = "Exon3", pos = 103),
list(label = "AATAAA", pos = 125, angle = 90, color = "blue")
),
wrap = 80
)
# -----------------------------------------------------------------------
# Example with antibody sequences with CDR mutations
# -----------------------------------------------------------------------
ref_seq <- paste0(
"QVQLVESGGGLVQAGGSLRLSCAASGRTFSSYAMGWFRQAPGKEREFVAAINSGGSTYYP",
"DSVKGRFTISRDNAKNTVYLQMNSLKPEDTAVYYCAADLRGTTVKDYWGQGTQVTVSSEQKLISEEDL"
)
# All sequences must be same length as reference
mutant_df <- data.frame(
sequence = c(
ref_seq, # Wild-type
# CDR1 mutations (27-33)
sub("GRTFSSYAMG", "GRTASSYAMG", ref_seq),
# CDR2 mutations (51-57)
sub("AINSGGS", "AINSAGS", ref_seq),
# CDR3 mutations (96-107)
sub("AADLRGTTVKDY", "AADLRGTTAKDY", ref_seq),
# Framework mutations
sub("QVQLVES", "EVQLVAS", ref_seq),
# Multiple CDR mutations
sub("AADLRGTTVKDY", "AADWRGTTVKDY",
sub("GRTFSSYAMG", "GYTASSAAMG", ref_seq))
),
id = c("WT", "CDR1_F30A", "CDR2_G54A", "CDR3_V104A",
"FR1_E5A", "CDR1+3_multi")
)
# Highlight CDRs and tags, color key residues
gg_seqdiff(
data = mutant_df,
ref = ref_seq,
name = "id",
color = c(R = "#285bb8", K = "#285bb8", # positive
E = "#a12b20", D = "#a12b20", # negative
W = "#9b59b6", F = "#9b59b6"), # aromatic
highlight = list(
"#70bcfa" = 1, # N-terminal
"#ffb4b4" = c(27:33, 51:57, 96:107), # CDRs
"#94d104" = 119:128 # c-Myc tag
),
annotate = list(
list(label = "N-term", pos = 1, angle = 90),
list(label = "CDR1", pos = 30),
list(label = "CDR2", pos = 54),
list(label = "CDR3", pos = 102),
list(label = "c-Myc", pos = 123)
),
wrap = 66
)
Run the code above in your browser using DataLab