Learn R Programming

qmrparser (version 0.1.6)

pcAxisParser: Parser for PC-AXIS format files

Description

Reads and creates the syntactical tree from a PC-AXIS format file or text.

Usage

pcAxisParser(streamParser)

Arguments

streamParser

stream parse associated to the file/text to be recognised

Value

Returns a list with "status" "node" "stream":

status

"ok" or "fail"

stream

Stream situation after recognition

node

List, one node element for each "keyword" in PC-AXIS file. Each node element is a list with: "keyword" "language" "parameters" "ruleRight":

  • keyword PC-AXIS keyword

  • language language code or ""

  • parameters null or string list with parenthesised values associated to keyword

  • ruleRight is a list of two elements, "type" "value" :

    If type = "symbol", value = symbol

    If type = "liststring", value = string vector, originally delimited by ","

    If type = "stringstring", value = string vector, originally delimited by blanks, new line, ...

    If type = "list" , value = numerical vector, originally delimited by ","

    If type = "tlist" , value = (frequency, "limit" keyword , lower-limit , upper-limit) or (frequency, "list" keyword , periods list )

Details

Grammar definition, wider than the strict PC-AXIS definition

pcaxis = { rule } , eof ;

rule = keyword , [ '[' , language , ']' ] , [ '(' , parameterList , ')' ] , = , ruleRight ;

parameterList = parameter , { ',' , parameterList } ;

ruleRight = string , string , { string } , ';' | string , { ',' , string } , ';' | number , sepearator , { , number } , ( ';' | eof ) | symbolic | 'TLIST' , '(' , symbolic , ( ( ')' , { ',' , string }) | ( ',' , string , '-' , string , ')' ) ) , ';' ;

keyword = symbolic ;

language = symbolic ;

parameter = string ;

separator = ' ' | ',' | ';' ;

eof = ? eof ? ;

string = ? string ? ;

symbolic = ? symbolic ? ;

number = ? number ? ;

Normally, this function is a previous step in order to eventually call pcAxisCubeMake:

cstream <- pcAxisParser(stream) if ( cstream$status == 'ok' ) cube <- pcAxisCubeMake(cstream)

References

PC-Axis file format.

https://www.scb.se/en/services/statistical-programs-for-px-files/px-file-format/

PC-Axis file format manual. Statistics of Finland.

https://tilastokeskus.fi/tup/pcaxis/tiedostomuoto2006_laaja_en.pdf

Examples

Run this code
# NOT RUN {
  
# }
# NOT RUN {
    ## significant time reductions may be achieve by doing:
    library("compiler")
    enableJIT(level=3)
  
# }
# NOT RUN {
  name     <- system.file("extdata","datInSFexample6_1.px", package = "qmrparser")
  stream   <- streamParserFromFileName(name,encoding="UTF-8")
  cstream  <-  pcAxisParser(stream)
  if ( cstream$status == 'ok' ) {
    
    ## HEADING 
    print(Filter(function(e) e$keyword=="HEADING",cstream$node)[[1]] $ruleRight$value)  
  
    ## STUB
    print(Filter(function(e) e$keyword=="STUB",cstream$node)[[1]] $ruleRight$value)  
  
    ## DATA
    print(Filter(function(e) e$keyword=="DATA",cstream$node)[[1]] $ruleRight$value)
    
  }

  
# }
# NOT RUN {
      #
      # Error messages like
      #                " ... invalid multibyte string ... "
      # or warnings
      #                " input string ...  is invalid in this locale"
      #
      # For example, in Linux the error generated by this code:
       name     <-     "https://www.ine.es/pcaxisdl//t20/e245/p04/a2009/l0/00000008.px" 
      stream   <- streamParserFromString( readLines( name ) )    
      cstream  <- pcAxisParser(stream)
      if ( cstream$status == 'ok' )  cube <- pcAxisCubeMake(cstream)
      #
      # is caused by files with a non-readable 'encoding'.
      # In the case where it could be read, there may also be problems 
      # with string-handling functions, due to multibyte characters. 
      # In Windows, according to \code{link{Sys.getlocale}()},
      # file may be read but accents, <U+00F1>, ... may not be correctly recognised.
      #
      #
      # There are, at least, the following options:
      #  - File conversion to utf-8, from the OS, with
      # "iconv - Convert encoding of given files from one encoding to another"
      #
      #  - File conversion in R:
      name    <- "https://www.ine.es/pcaxisdl//t20/e245/p04/a2009/l0/00000008.px" 
      stream   <- streamParserFromString( iconv( readLines( name ), "IBM850", "UTF-8") )
      cstream  <- pcAxisParser(stream)
      if ( cstream$status == 'ok' )  cube <- pcAxisCubeMake(cstream)
      #
      # In the latter case, latin1 would also work, but accents, <U+00F1>, ... would not be
      # correctly read.
      #
      #  - Making the assumption that the file does not contain multibyte characters:
      #
      localeOld <- Sys.getlocale("LC_CTYPE")
      Sys.setlocale(category = "LC_CTYPE", locale = "C")
      #
      name     <-
        "https://www.ine.es/pcaxisdl//t20/e245/p04/a2009/l0/00000008.px" 
      stream   <- streamParserFromString( readLines( name ) )
      cstream  <- pcAxisParser(stream)
      if ( cstream$status == 'ok' )  cube <- pcAxisCubeMake(cstream)
      #
      Sys.setlocale(category = "LC_CTYPE", locale = localeOld)
      #
      # However, some characters will not be correctly read (accents, <U+00F1>, ...)

    
# }
# NOT RUN {
# }

Run the code above in your browser using DataLab