# NOT RUN {
# Since the extraction of bg-en.tgz in Europarl corpus is time consuming,
# so the aforementioned unzip files have been temporarily exported to
# http://www.um.ac.ir/~sarmad/... .
# }
# NOT RUN {
w1 = word_alignIBM1 ('http://www.um.ac.ir/~sarmad/word.a/euro.bg',
'http://www.um.ac.ir/~sarmad/word.a/euro.en',
nrec = 30, encode.sorc = 'UTF-8')
w2 = word_alignIBM1 ('http://www.um.ac.ir/~sarmad/word.a/euro.bg',
'http://www.um.ac.ir/~sarmad/word.a/euro.en',
nrec = 30, encode.sorc = 'UTF-8', removePt = FALSE)
# }
Run the code above in your browser using DataLab