# NOT RUN {
# First we produce the wrongly formatted character string
# using Unicode symbols is only necessary to create a portable example in the R package
# you don't need to use Unicode characters like this in your data
string <- c("Qu\u00B6ng Tr\u00DE", "An \u00A7\u00ABn", "Th\u00F5a Thi\u00AAn Hu\u00D5")
# Below we have a look at the wrongly formatted character string.
# This is what it would look like when you load TCVN3 encoded data as UTF8
string
# convert character vector from TCVN3 > UTF-8
decodeVN(string)
decodeVN(string, diacritics = FALSE)
# # convert data frame columns from TCVN3 > UTF-8
df <- data.frame(id = c(1,2,3),
name = string)
df_decode <- decodeVN(df)
df_decode
# NOTE: some characters may be displayed as unicode in the R console
# check the individual column to see if they are correct:
df_decode[,2]
decodeVN(df, diacritics = FALSE)
# using the built-in sample data
data(vn_samples)
decodeVN(vn_samples$TCVN3) # TCVN -> Unicode # TCVN3 -> Unicode
decodeVN(vn_samples$TCVN3, diacritics = FALSE) # TCVN3 -> Unicode (ASCII characters only)
decodeVN(vn_samples$VISCII, from = "VISCII") # VISCII -> Unicode
# Demonstration for sf object
# create sf object (just for demonstration)
require(sf)
df_geom <- st_sfc(st_point(c(3,4)), st_point(c(10,11)), st_point(c(15,13)))
df_spatial <- st_set_geometry(df, df_geom)
# convert Vietnamese characters
df_spatial_decode <- decodeVN(df_spatial)
df_spatial_decode
df_spatial_decode$name
# }
Run the code above in your browser using DataLab