x<-" ki a. jal2 (e2{kur}) ra. gaba jal2. an ki a"
token <- split_sumerian(as.cuneiform(x))$signs
df <- sumer:::init_substr_info(token)
df
# Verify that substr_position recovers the row indices
N <- length(token)
all(seq_len(nrow(df)) == sumer:::substr_position(df$start, df$n_tokens, N))
Run the code above in your browser using DataLab