## NOTE: Drop 'dryrun = TRUE' below in order to actually connect. Add
## 'verbose = TRUE' if you run into problems and need to troubleshoot.
## EXAMPLE: Two workers on the local machine
workers <- c("localhost", "localhost")
cl <- makeClusterPSOCK(workers, dryrun = TRUE, quiet = TRUE)
## EXAMPLE: Three remote workers
## Setup of three R workers on two remote machines are set up
workers <- c("n1.remote.org", "n2.remote.org", "n1.remote.org")
cl <- makeClusterPSOCK(workers, dryrun = TRUE, quiet = TRUE)
## EXAMPLE: Local and remote workers
## Same setup when the two machines are on the local network and
## have identical software setups
cl <- makeClusterPSOCK(
workers,
revtunnel = FALSE, homogeneous = TRUE,
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Three remote workers 'n1', 'n2', and 'n3' that can only be
## accessed via jumphost 'login.remote.org'
workers <- c("n1", "n2", "n1")
cl <- makeClusterPSOCK(
workers,
rshopts = c("-J", "login.remote.org"),
homogeneous = FALSE,
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Remote workers with specific setup
## Setup of remote worker with more detailed control on
## authentication and reverse SSH tunneling
cl <- makeClusterPSOCK(
"remote.server.org", user = "johnny",
## Manual configuration of reverse SSH tunneling
revtunnel = FALSE,
rshopts = c("-v", "-R 11000:gateway:11942"),
master = "gateway", port = 11942,
## Run Rscript nicely and skip any startup scripts
rscript = c("nice", "/path/to/Rscript"),
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Two workers running in Docker on the local machine
## Setup of 2 Docker workers running rocker/r-parallel
cl <- makeClusterPSOCK(
rep("localhost", times = 2L),
## Launch Rscript inside Docker container
rscript = c(
"docker", "run", "--net=host", "rocker/r-parallel",
"Rscript"
),
## IMPORTANT: Because Docker runs inside a virtual machine (VM) on macOS
## and Windows (not Linux), when the R worker tries to connect back to
## the default 'localhost' it will fail, because the main R session is
## not running in the VM, but outside on the host. To reach the host on
## macOS and Windows, make sure to use master = "host.docker.internal"
# master = "host.docker.internal", # <= macOS & Windows
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Two workers running in Singularity on the local machine
## Setup of 2 Singularity workers running rocker/r-parallel
cl <- makeClusterPSOCK(
rep("localhost", times = 2L),
## Launch Rscript inside Linux container
rscript = c(
"singularity", "exec", "docker://rocker/r-parallel",
"Rscript"
),
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: One worker running in udocker on the local machine
## Setup of a single udocker.py worker running rocker/r-parallel
cl <- makeClusterPSOCK(
"localhost",
## Launch Rscript inside Docker container (using udocker)
rscript = c(
"udocker.py", "run", "rocker/r-parallel",
"Rscript"
),
## Manually launch parallel workers
## (need double shQuote():s because udocker.py drops one level)
rscript_args = c(
"-e", shQuote(shQuote("parallel:::.workRSOCK()"))
),
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: One worker running in Wine for Linux on the local machine
## To install R for MS Windows in Wine, do something like:
## winecfg # In GUI, set 'Windows version' to 'Windows 10'
## wget https://cran.r-project.org/bin/windows/base/R-4.1.2-win.exe
## wine R-4.1.2-win.exe /SILENT
## Prevent packages from being installed to R's system library:
## chmod ugo-w "$HOME/.wine/drive_c/Program Files/R/R-4.1.2/library/"
## Verify it works:
## wine "C:/Program Files/R/R-4.1.2/bin/x64/Rscript.exe" --version
cl <- makeClusterPSOCK(1L,
rscript = c(
## Silence Wine warnings
"WINEDEBUG=fixme-all",
## Don't pass LC_*** environments from Linux to Wine
sprintf("%s=", grep("LC_", names(Sys.getenv()), value = TRUE)),
"wine",
"C:/Program Files/R/R-4.1.2/bin/x64/Rscript.exe"
),
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Launch 124 workers on MS Windows 10, where half are
## running on CPU Group #0 and half on CPU Group #1.
## (https://lovickconsulting.com/2021/11/18/
## running-r-clusters-on-an-amd-threadripper-3990x-in-windows-10-2/)
ncores <- 124
cpu_groups <- c(0, 1)
cl <- lapply(cpu_groups, FUN = function(cpu_group) {
parallelly::makeClusterPSOCK(ncores %/% length(cpu_groups),
rscript = I(c(
Sys.getenv("COMSPEC"), "/c", "start", "/B",
"/NODE", cpu_group, "/AFFINITY", "0xFFFFFFFFFFFFFFFE",
"*"
)),
dryrun = TRUE, quiet = TRUE
)
})
## merge the two 62-node clusters into one with 124 nodes
cl <- do.call(c, cl)
## EXAMPLE: Remote worker running on AWS
## Launching worker on Amazon AWS EC2 running one of the
## Amazon Machine Images (AMI) provided by RStudio
## (https://www.louisaslett.com/RStudio_AMI/)
public_ip <- "1.2.3.4"
ssh_private_key_file <- "~/.ssh/my-private-aws-key.pem"
cl <- makeClusterPSOCK(
## Public IP number of EC2 instance
public_ip,
## User name (always 'ubuntu')
user = "ubuntu",
## Use private SSH key registered with AWS
rshopts = c(
"-o", "StrictHostKeyChecking=no",
"-o", "IdentitiesOnly=yes",
"-i", ssh_private_key_file
),
## Set up .libPaths() for the 'ubuntu' user
## and then install the future package
rscript_startup = quote(local({
p <- Sys.getenv("R_LIBS_USER")
dir.create(p, recursive = TRUE, showWarnings = FALSE)
.libPaths(p)
install.packages("future")
})),
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Remote worker running on GCE
## Launching worker on Google Cloud Engine (GCE) running a
## container based VM (with a #cloud-config specification)
public_ip <- "1.2.3.4"
user <- "johnny"
ssh_private_key_file <- "~/.ssh/google_compute_engine"
cl <- makeClusterPSOCK(
## Public IP number of GCE instance
public_ip,
## User name (== SSH key label (sic!))
user = user,
## Use private SSH key registered with GCE
rshopts = c(
"-o", "StrictHostKeyChecking=no",
"-o", "IdentitiesOnly=yes",
"-i", ssh_private_key_file
),
## Launch Rscript inside Docker container
rscript = c(
"docker", "run", "--net=host", "rocker/r-parallel",
"Rscript"
),
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Remote worker running on Linux from Windows machine
## Connect to remote Unix machine 'remote.server.org' on port 2200
## as user 'bob' from a Windows machine with PuTTY installed.
## Using the explicit special rshcmd = "", will force
## makeClusterPSOCK() to search for and use the PuTTY plink software,
## preventing it from using other SSH clients on the system search PATH.
cl <- makeClusterPSOCK(
"remote.server.org", user = "bob",
rshcmd = "",
rshopts = c("-P", 2200, "-i", "C:/Users/bobby/.ssh/putty.ppk"),
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: Remote worker running on Linux from RStudio on Windows
## Connect to remote Unix machine 'remote.server.org' on port 2200
## as user 'bob' from a Windows machine via RStudio's SSH client.
## Using the explicit special rshcmd = "", will force
## makeClusterPSOCK() to use the SSH client that comes with RStudio,
## preventing it from using other SSH clients on the system search PATH.
cl <- makeClusterPSOCK(
"remote.server.org", user = "bob", rshcmd = "",
dryrun = TRUE, quiet = TRUE
)
## EXAMPLE: The 'Fujitsu Technical Computing Suite' is a high-performance
## compute (HPC) job scheduler where one can request compute resources on
## multiple nodes, each running multiple cores. For example,
##
## pjsub -L vnode=3 -L vnode-core=18 script.sh
##
## reserves 18 cores on three nodes. The job script runs on the first
## with enviroment variables set to infer the other nodes, resulting in
## availableWorkers() to return 3 * 18 workers. When the HPC environment
## does not support SSH between compute nodes, one can use the 'pjrsh'
## command to launch the parallel workers.
cl <- makeClusterPSOCK(
availableWorkers(),
rshcmd = "pjrsh",
dryrun = TRUE, quiet = TRUE
)
Run the code above in your browser using DataLab