#
# BASIC USAGE ####
#
x = c("Romeo", "Juliet")
# {x} inserts x
string_magic("Hello {x}!")
# elements in ... are collapsed with "" (default)
string_magic("Hello {x[1]}, ",
"how is {x[2]} doing?")
# Splitting a comma separated string
# The mechanism is explained later
string_vec("J. Mills, David, Agnes, Dr Strong")
# Nota: this is equivalent to (explained later)
string_magic("{', *'S ! J. Mills, David, Agnes, Dr Strong}")
#
# Applying low level operations to strings
#
# Two main syntax:
# A) expression evaluation
# {operation ? x}
# | |
# | \-> the expression to be evaluated
# \-> ? means that the expression will be evaluated
# B) verbatim
# {operation ! x}
# | |
# | \-> the expression taken as verbatim (here 'x')
# \-> ! means that the expression is taken as verbatim
# operation: usually 'arg'op with op an operation code.
# Example: splitting
x = "hello dear"
string_magic("{' 's ? x}")
# x is split by ' '
string_magic("{' 's ! hello dear}")
# 'hello dear' is split by ' '
# had we used ?, there would have been an error
# There are 50+ string operators
# Operators usually have a default value
# Operations can have options
# Operations can be chained by separating them with a comma
# Example: default of 's' is ' ' + chaining with collapse
string_magic("{s, ' my 'c ! hello dear}")
#
# Nesting
#
# {operations ! s1{expr}s2}
# | |
# | \-> expr will be interpolated then added to the string
# \-> nesting requires verbatim evaluation: '!'
string_magic("The variables are: {C ! x{1:4}}.")
# This one is ugly but it shows triple nesting
string_magic("The variables are: {ws, C ! {2 times ! x{1:4}}{','s, 4 each ! ,_sq}}.")
#
# Splitting
#
# s: split with fixed pattern, default is ' '
string_magic("{s ! a b c}")
string_magic("{' b 's !a b c}")
# S: same as 's' but default is ',[ \t\n]*'
string_magic("{S !a, b, c}")
string_magic("{'[[:punct:] ]+'S ! a! b; c}")
# add regex flags: e.g. fixed search
string_magic("{'f/.'s ! hi.there}")
#
# Collapsing
#
# c and C do the same, their default is different
# syntax: 's1|s2' with
# - s1 the string used for collapsing
# - s2 (optional) the string used for the last collapse
# c: default is ' '
string_magic("{c ? 1:3}")
# C: default is ', | and '
string_magic("{C ? 1:3}")
string_magic("{', | or 'c ? 1:4}")
#
# Extraction
#
# extract: to extract patterns (option first)
# x: alias to extract.first
# X: alias to extract
# syntax: 'pattern'x
# Default is '[[:alnum:]]+'
x = "This years is... 2020"
string_magic("{x ? x}") # similar to string_magic("{extract.first ? x}")
string_magic("{X ? x}") # similar to string_magic("{extract ? x}")
string_magic("{'\\d+'x ? x}")
#
# STRING FORMATTING ####
#
#
# upper, lower, title
# upper case the first letter
string_magic("{upper.first ! julia mills}")
# title case
string_magic("{title ! julia mills}")
# upper all letters
string_magic("{upper ! julia mills}")
# lower case
string_magic("{lower ! JULIA MILLS}")
#
# q, Q, bq: single, double, back quote
string_magic("{S, q, C ! Julia, David, Wilkins}")
string_magic("{S, Q, C ! Julia, David, Wilkins}")
string_magic("{S, bq, C ! Julia, David, Wilkins}")
#
# format, Format: formats the string to fit the same length
# format: the right side is filled with blanks
# Format: the left side is filled with blanks, the string is right aligned
score = c(-10, 2050)
nm = c("Wilkins", "David")
string_magic("Monopoly scores:\n{'\n'c ! - {format ? nm}: {Format ? score} US$}")
# OK that example may have been a bit too complex,
# let's make it simple:
string_magic("Scores: {format ? score}")
string_magic("Names: {Format ? nm}")
#
# ws: white space normalization
# ws: suppresses trimming white spaces + normalizes successive white spaces
# Add the following options in any order to:
# - punct: remove punctuation
# - digit: remove digits
# - isolated: remove isolated characters
string_magic("{ws ! The white spaces are now clean. }")
string_magic("{ws.punct ! I, really -- truly; love punctuation!!!}")
string_magic("{ws.digit ! 1, 2, 12, a microphone check!}")
string_magic("{ws.i ! 1, 2, 12, a microphone check!}")
string_magic("{ws.d.i ! 1, 2, 12, a microphone check!}")
string_magic("{ws.p.d.i ! 1, 2, 12, a microphone check!}")
#
# %: applies sprintf formatting
# add the formatting as a regular argument
string_magic("pi = {'.2f'% ? pi}")
# or right after the %
string_magic("pi = {%.2f ? pi}")
#
# paste: appends text on each element
# Accepts the options: right, both, front and back
# It accepts the special values :1:, :i:, :I:, :a:, :A: to create enumerations
# adding '|' on both sides
string_magic("{'|'paste.both, ' + 'c ! x{1:4}}")
# Enumerations
acad = string_vec("you like admin, you enjoy working on weekends, you really love emails")
string_magic("Main reasons to pursue an academic career:\n {':i:) 'paste, C ? acad}.")
# You can also use the enum command
string_magic("Main reasons to pursue an academic career:\n {enum.i ? acad}.")
#
# stopwords: removes basic English stopwords
# the list is from the Snowball project:
# http://snowball.tartarus.org/algorithms/english/stop.txt
string_magic("{stop, ws ! It is a tale told by an idiot, ",
"full of sound and fury, signifying nothing.}")
#
# k: keeps the first n characters
# syntax: nk: keeps the first n characters
# 'n|s'k: same + adds 's' at the end of shortened strings
# 'n||s'k: same but 's' counts in the n characters kept
words = string_vec("short, constitutional")
string_magic("{5k ? words}")
string_magic("{'5|..'k ? words}")
string_magic("{'5||..'k ? words}")
#
# K: keeps the first n elements
# syntax: nK: keeps the first n elements
# 'n|s'K: same + adds the element 's' at the end
# 'n||s'K: same but 's' counts in the n elements kept
#
# Special values :rest: and :REST:, give the number of items dropped
bx = string_vec("Pessac Leognan, Saint Emilion, Marguaux, Saint Julien, Pauillac")
string_magic("Bordeaux wines I like: {3K, ', 'C ? bx}.")
string_magic("Bordeaux wines I like: {'3|etc..'K, ', 'C ? bx}.")
string_magic("Bordeaux wines I like: {'3||etc..'K, ', 'C ? bx}.")
string_magic("Bordeaux wines I like: {'3|and at least :REST: others'K, ', 'C ? bx}.")
#
# Ko, KO: special operator which keeps the first n elements and adds "others"
# syntax: nKo
# KO gives the rest in letters
string_magic("Bordeaux wines I like: {4KO, C ? bx}.")
#
# r, R: string replacement
# syntax: 's'R: deletes the content in 's' (replaces with the empty string)
# 's1 => s2'R replaces s1 into s2
string_magic("{'e'r, ws ! The letter e is deleted}")
# adding a perl look-behind
string_magic("{'(? a'r !The letter e becomes a}")
string_magic("{'([[:alpha:]]{3})[[:alpha:]]+ => \\1.'r ! Trimming the words}")
# Alternative way with simple operations: split, shorten, collapse
string_magic("{s, '3|.'k, c ! Trimming the words}")
#
# times, each
# They accept the option c to collapse with the empty string
string_magic("N{10 times.c ! o}!")
string_magic("{3 times.c ? 1:3}")
string_magic("{3 each.c ? 1:3}")
#
# erase: replaces the items by the empty string
# -> useful in conditions
string_magic("{erase ! I am going to be annihilated}")
#
# ELEMENT MANIPULATION ####
#
#
# rm: removes the elements
# Its (optional) argument is a regular expression giving which element to remove
# Many options: "empty", "blank", "noalpha", "noalnum", "all"
x = c("Destroy", "All")
string_magic("{'A'rm ? x}")
string_magic("{rm.all ? x}")
x = string_vec("1, 12, 123, 1234, 123456, 1234567")
# we delete elements whose number of characters is lower or equal to 3
# => see later section CONDITIONS
string_magic("{if(.nchar > 3 ; nuke) ? x}")
#
# PLURALIZATION ####
#
# Two ways to enable pluralization:
# {$ command}: means the plural is decuced from the length of the variable
# {# command}: means the plural is decuced from the value of the variable
# Explanatory example
x = c("Eschyle", "Sophocle", "Euripide")
n = 37
string_magic("The author{$s, enum, have ? x} written {#N ? n} play{#s}.")
x = "Laurent Berge"
n = 0
string_magic("The author{$s, enum, have ? x} written {#N ? n} play{#s}.")
# How does it work?
# First is {$s, enum, have ? x}.
# The commands `s`, `enum` and `have` are applied to `x` which must come after a `?`
# => there the plural (whether an s is added and how to conjugate the verb have) depends
# on the **length** of the vector `x`
#
# Second comes {#N ? n}.
# The double dollar sign means that the command `N` will be applied to the **value** n.
# The value must come after the `?`
#
# Third is {#s}.
# The object to which `s` should be applied is missing (there is no `? n`).
# The default is to apply the command to the previous object. In this case,
# this is `n`.
# Another similar example illustrating that we need not express the object several times:
x = c("Eschyle", "Sophocle", "Euripide")
string_magic("The {Len ? x} classic author{$s, are, enum}.")
#
# ARGUMENTS FROM THE ENVIRONMENT ####
#
# Arguments can be evaluated from the calling environment.
# Simply use backticks instead of quotes.
dollar = 6
reason = "glory"
string_magic("Why do you develop packages? For {`dollar`times.c ! $}?",
"For money? No... for {upper,''s, c ? reason}!", .sep = "\n")
#
# Alias generation
#
# Let's create a formula filler
# - we use .local_ops to create the ad hoc operation "add" which adds variables
# - we transform into a formula ex post
fml = string_magic_alias(.post = as.formula, .local_ops = list(add = "' + 'collapse"))
# example with mtcars
lhs = "mpg"
rhs = c("hp", "drat")
fml("{lhs} ~ {add?rhs} + am")
Run the code above in your browser using DataLab