## - Download example DOCX file
d<-'https://github.com/ingmarboeschen/tableParser/raw/refs/heads/main/tableExamples.docx'
download.file(d,paste0(tempdir(),"/","tableExamples.docx"))
# Extract tables from example file as matrices
table2matrix(paste0(tempdir(),"/","tableExamples.docx"))
## - Download example HTML file
h<-'https://github.com/ingmarboeschen/tableParser/raw/refs/heads/main/tableExamples.html'
download.file(h,paste0(tempdir(),"/","tableExamples.html"))
# Extract tables from example file as matrices
table2matrix(paste0(tempdir(),"/","tableExamples.html"),rm.html=TRUE)
## - Download example PDF file
p<-'https://github.com/ingmarboeschen/tableParser/raw/refs/heads/main/tableExamples.pdf'
download.file(p,paste0(tempdir(),"/","tableExamples.pdf"))
# Extract tables from example file as matrices
# \donttest{
table2matrix(paste0(tempdir(),"/","tableExamples.pdf"))
# Note: The extraction of tables within PDF documents with tabulapdf::extract_tables()
# does not work properly here.
# Also, the table captions and footnotes cannot be used for decoding (e.g., p-values).
tabulapdf::extract_tables(paste0(tempdir(),"/","tableExamples.pdf"))
# }
## Another example with a website that contains simple and nested HTML-tables
# download file
x<-readLines("https://en.wikipedia.org/wiki/R_(programming_language)",warn=FALSE)
# apply function
table2matrix(x,rm.html=TRUE,unifyMatrix=TRUE)
Run the code above in your browser using DataLab