Free Access Week-  Data Engineering + BI
Data engineering and BI courses are free!
Free AI Access Week from June 2-8

cobindR (version 1.10.0)

rtfbs: function performs TFBS prediction using the package rtfbs

Description

function performs TFBS prediction using the package rtfbs

Usage

"rtfbs"(x, append = F, background_scan = FALSE, n.cpu = NA)

Arguments

x
an object of the class "cobindr", which will hold all necessary information about the sequences and the hits.
append
logical flag, if append=TRUE the binding sites will be appended to already existing results
background_scan
logical flag, if background_scan=TRUE the background sequences will be searched for transcription factor binding sites
n.cpu
number of CPUs to be used for parallelization. Default value is 'NA' in which case the number of available CPUs is checked and than used.

Value

x
an object of the class "cobindr" including the predicted transcription factor binding sites

References

uses the package "rtfbs" (http://cran.r-project.org/web/packages/rtfbs/index.html)

See Also

search.pwm, search.gadem

Examples

Run this code

############################################################
# use simulated sequences
library(Biostrings)

n <- 400 # number of input sequences
l <- 500 # length of sequences
n.hits <- 250 # number of 'true' binding sites
bases <- c("A","C","G","T") # alphabet
# generate random input sequences with two groups with differing GC content
seqs <- sapply(1:(3*n/4), function(x) paste(sample(bases, l, replace=TRUE, 
		prob=c(.3,.22,.2,.28)), collapse=""))
seqs <- append(seqs, sapply(1:(n/4), function(x) paste(sample(bases, l, 
		replace=TRUE, prob=c(.25,.25,.25,.25)), collapse="")))
path <- system.file('extdata/pfms/myod.tfpfm',package='cobindR')
motif <- read.transfac.pfm(path)[[1]] # get PFM of binding site 
# add binding sites with distance specificity
for(position in c(110, 150)) {
	hits <- apply(apply(motif, 2, function(x) sample(x=bases, size=n.hits, 
			prob=x, replace=TRUE)), 1, paste, collapse='')
	pos.hits <- round(rnorm(n.hits, mean=position, sd=8))
	names(pos.hits) <- sample(1:n, n.hits)
	for(i in 1:n.hits) substr(seqs[as.integer(names(pos.hits)[i])], 
						start=pos.hits[i], stop=pos.hits[i]+ncol(motif)) <- hits[i] 
}
#save sample sequences in fasta file
tmp.file <- tempfile(pattern = "cobindr_sample_seq", tmpdir = tempdir(), fileext = ".fasta")
writeXStringSet(DNAStringSet(seqs), tmp.file)
#run cobindr
cfg <- cobindRConfiguration()
sequence_type(cfg) <- 'fasta'
sequence_source(cfg) <- tmp.file
sequence_origin(cfg) <- 'artificial sequences'
pfm_path(cfg) <- system.file('extdata/pfms',package='cobindR')
pairs(cfg) <- 'V$MYOD_01 V$MYOD_01'
fdrThreshold(cfg) <- 0 
runObj <- cobindr(cfg, name='cobindr test using sampled sequences')
# perform tfbs prediction using rtfbs
runObj.bs <- rtfbs(runObj)
# show results
plot.positionprofile(runObj.bs)

#clean up
unlink(tmp.file)

Run the code above in your browser using DataLab