Learn R Programming

sched (version 1.0.3)

Scheduler: Class for scheduling web requests.

Description

Class for scheduling web requests.

Class for scheduling web requests.

Arguments

Methods


Method new()

New instance initializer.

There should be only one Scheduler instance in an application. There is no sense in having two or more instances, since they will ignore each other and break the access frequency rules when they contact the same sites.

Usage

Scheduler$new(
  default_rule = Rule$new(),
  ssl_verifypeer = TRUE,
  nb_max_tries = 10L,
  cache_dir = tools::R_user_dir("sched", which = "cache"),
  user_agent = NULL,
  dwnld_timeout = 3600
)

Arguments

default_rule

The default_rule to use when none has been defined for a site.

ssl_verifypeer

If set to TRUE (default), SSL certificate will be checked, otherwise certificates will be ignored.

nb_max_tries

Maximum number of tries when running a request.

cache_dir

Set the path to the file system cache. Set to NULL to disable the cache system. The cache system will save downloaded content and reuse it later for identical requests.

user_agent

The application name and contact address to send to the contacted web server.

dwnld_timeout

The timeout used by downloadFile() method, in seconds.

Returns

Nothing.

Examples

# Create a scheduler instance with a custom default_rule
scheduler <- sched::Scheduler$new(default_rule=sched::Rule$new(10, 1),
                                  cache_dir = NULL)


Method setRule()

Defines a rule for a site.

Defines a rule for a site. The site is identified by its hostname. Each time a request will be made to this host (i.e.: the URL contains the defined hostname), the scheduling rule will be applied in order to wait (sleep) if nedeed before sending the request.

If a rule already exists for this hostname, it will be replaced.

Usage

Scheduler$setRule(host, n = 3L, lap = 1)

Arguments

host

The hostname of the site.

n

Number of events during a time lap.

lap

Duration of a time lap, in seconds.

Returns

Nothing.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Define a rule with default values scheduler$setRule('www.ebi.ac.uk')

# Define a rule with custome values scheduler$setRule('my.other.site', n=10, lap=3)


Method sendRequest()

Sends a request, and retrieves content result.

Usage

Scheduler$sendRequest(request, cache_read = TRUE)

Arguments

request

A sched::Request instance.

cache_read

If set to TRUE and the cache system is enabled, the cache system will be searched for the request and the cached result returned. In any case, if the the cache system is enabled, and the request sent, the retrieved content will be stored into the cache.

Returns

The results returned by the contacted server, as a single string value.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Define a scheduling rule of 7 requests every 2 seconds scheduler$setRule('www.ebi.ac.uk', n=7, lap=2)

# Create a request object u <- 'https://www.ebi.ac.uk/webservices/chebi/2.0/test/getCompleteEntity' url <- sched::URL$new(url=u, params=c(chebiId=15440)) request <- sched::Request$new(url)

# Send the request and get the content result content <- scheduler$sendRequest(request)


Method downloadFile()

Downloads the content of a URL and save it into the specified destination file.

This method works for any URL, even if it has been written with heavy files in mind. Since it uses utils::download.file() which saves the content directly on disk, the cache system is not used.

Usage

Scheduler$downloadFile(url, dest_file, quiet = FALSE, timeout = NULL)

Arguments

url

The URL to access, as a sched::URL object.

dest_file

A path to a destination file.

quiet

The quiet parameter for utils::download.file().

timeout

The timeout in seconds. Defaults to value provided in initializer.

Returns

Nothing.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Create a temporary directory tmp_dir <- tempdir()

# Download a file u <- sched::URL$new( 'https://gitlab.com/cnrgh/databases/r-sched/-/raw/main/README.md', c(ref_type='heads')) scheduler$downloadFile(u, file.path(tmp_dir, 'README.md'))

# Remove the temporary directory unlink(tmp_dir, recursive = TRUE)


Method getUrlString()

Builds a URL string, using a base URL and parameters to be passed.

The provided base URL and parameters are combined into a full URL string.

DEPRECATED. Use the sched::URL class and its method toString() instead.

Usage

Scheduler$getUrlString(url, params = list())

Arguments

url

A URL string.

params

A list of URL parameters.

Returns

The full URL string as a single character value.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Create a URL string url.str <- scheduler$getUrlString( 'https://www.ebi.ac.uk/webservices/chebi/2.0/test/getCompleteEntity', params=c(chebiId=15440))


Method getUrl()

Sends a request and get the result.

DEPRECATED. Use method sendRequest() instead.

Usage

Scheduler$getUrl(
  url,
  params = list(),
  method = c("get", "post"),
  header = NULL,
  body = NULL,
  encoding = NULL
)

Arguments

url

A URL string.

params

A list of URL parameters.

method

The method to use. Either 'get' or 'post'.

header

The header to send.

body

The body to send.

encoding

The encoding to use.

Returns

The results of the request.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Send request content <- scheduler$getUrl( 'https://www.ebi.ac.uk/webservices/chebi/2.0/test/getCompleteEntity', params=c(chebiId=15440))


Method deleteRules()

Removes all defined rules, including the ones automatically defined using default_rule.

Usage

Scheduler$deleteRules()

Returns

Nothing.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Define a rule with custome values scheduler$setRule('my.other.site', n=10, lap=3)

# Delete all defined rules scheduler$deleteRules()


Method getNbRules()

Gets the number of defined rules, including the ones automatically defined using default_rule.

Usage

Scheduler$getNbRules()

Returns

The number of rules defined.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Get the number of defined rules print(scheduler$getNbRules())


Method setOffline()

Enables or disables offline mode.

If the offline mode is enabled, an error will be raised when the class attemps to send a request. This mode is mainly useful when debugging the usage of the cache system.

Usage

Scheduler$setOffline(offline)

Arguments

offline

Set to TRUE to enable offline mode, and FALSE otherwise.

Returns

Nothing.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Enable offline mode scheduler$setOffline(TRUE)


Method isOffline()

Tests if offline mode is enabled.

Usage

Scheduler$isOffline()

Returns

TRUE is offline mode is enabled, FALSE otherwise.

Examples

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Test if offline mode is enabled if (scheduler$isOffline()) print("Scheduler is offline.")


Method clone()

The objects of this class are cloneable with this method.

Usage

Scheduler$clone(deep = FALSE)

Arguments

deep

Whether to make a deep clone.

Details

The Scheduler class controls the frequency of access to web sites, through the definiton of access rules (Rule class). It handles GET and POST requests, as well as file downloading. It can use a cache system to store request results and avoid resending identical requests.

Examples

Run this code
# Create a scheduler instance without cache
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Define a rule with default values
scheduler$setRule('www.ebi.ac.uk')

# Create a request object
u <- 'https://www.ebi.ac.uk/webservices/chebi/2.0/test/getCompleteEntity'
url <- sched::URL$new(url=u, params=c(chebiId=15440))
request <- sched::Request$new(url)

# Send the request and get the content result
content <- scheduler$sendRequest(request)


## ------------------------------------------------
## Method `Scheduler$new`
## ------------------------------------------------

# Create a scheduler instance with a custom default_rule
scheduler <- sched::Scheduler$new(default_rule=sched::Rule$new(10, 1),
                                  cache_dir = NULL)


## ------------------------------------------------
## Method `Scheduler$setRule`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Define a rule with default values
scheduler$setRule('www.ebi.ac.uk')

# Define a rule with custome values
scheduler$setRule('my.other.site', n=10, lap=3)


## ------------------------------------------------
## Method `Scheduler$sendRequest`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Define a scheduling rule of 7 requests every 2 seconds
scheduler$setRule('www.ebi.ac.uk', n=7, lap=2)

# Create a request object
u <- 'https://www.ebi.ac.uk/webservices/chebi/2.0/test/getCompleteEntity'
url <- sched::URL$new(url=u, params=c(chebiId=15440))
request <- sched::Request$new(url)

# Send the request and get the content result
content <- scheduler$sendRequest(request)


## ------------------------------------------------
## Method `Scheduler$downloadFile`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Create a temporary directory
tmp_dir <- tempdir()

# Download a file
u <- sched::URL$new(
    'https://gitlab.com/cnrgh/databases/r-sched/-/raw/main/README.md',
    c(ref_type='heads'))
scheduler$downloadFile(u, file.path(tmp_dir, 'README.md'))

# Remove the temporary directory
unlink(tmp_dir, recursive = TRUE)


## ------------------------------------------------
## Method `Scheduler$getUrlString`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Create a URL string
url.str <- scheduler$getUrlString(
  'https://www.ebi.ac.uk/webservices/chebi/2.0/test/getCompleteEntity',
  params=c(chebiId=15440))


## ------------------------------------------------
## Method `Scheduler$getUrl`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Send request
content <- scheduler$getUrl(
  'https://www.ebi.ac.uk/webservices/chebi/2.0/test/getCompleteEntity',
  params=c(chebiId=15440))


## ------------------------------------------------
## Method `Scheduler$deleteRules`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Define a rule with custome values
scheduler$setRule('my.other.site', n=10, lap=3)

# Delete all defined rules
scheduler$deleteRules()


## ------------------------------------------------
## Method `Scheduler$getNbRules`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Get the number of defined rules
print(scheduler$getNbRules())


## ------------------------------------------------
## Method `Scheduler$setOffline`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Enable offline mode
scheduler$setOffline(TRUE)


## ------------------------------------------------
## Method `Scheduler$isOffline`
## ------------------------------------------------

# Create a scheduler instance
scheduler <- sched::Scheduler$new(cache_dir = NULL)

# Test if offline mode is enabled
if (scheduler$isOffline())
  print("Scheduler is offline.")

Run the code above in your browser using DataLab