if (FALSE) {
# Search for ids
seq.data.ids <- bold.public.search(taxonomy = list("Oreochromis tanganicae",
"Oreochromis karongae"))
# Fetch the data using the ids.
#1. api_key must be obtained from BOLD support before using `bold.fetch()` function.
#2. Use the `bold.apikey()` function to set the apikey in the global env.
bold.apikey('apikey')
seq.data<-bold.fetch(get_by = "processid",
identifiers = seq.data.ids$processid)
# R packages `msa` and `Biostrings` are required for this function to run.
# For `align_method` = "Muscle", package `muscle` is required as well.
# Both the packages are installed using `BiocManager`.
# Align the data (using bin_uri as the name for each sequence)
seq.align <- bold.analyze.align(seq.data,
cols_for_seq_names = c("bin_uri"),
align_method="ClustalOmega")
# Dataframe of the sequences (aligned) with their corresponding names
head(seq.align[,c("aligned_seq","msa.seq.name")])
}
Run the code above in your browser using DataLab