# NOT RUN {
text1<-"R is a free software environment for statistical computing and graphics.
It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS"
text2<-"R is a language and environment for statistical computing and graphics.
It is a GNU project which is similar to the S language and at Bell Laboratories"
text3<-" Astronomy is the scientific study of all objects beyond our world,
and a way to understand the physical laws and origins of the universe."
dist1<-getDistance(getsimHash(text1,64),getsimHash(text2,64))
#dist1 is equal to 7, means the two strings are near-duplicate.
dist2<-getDistance(getsimHash(text1,64),getsimHash(text3,64))
#dist2 is equal to 21, means the two strings are not similar.
# }
Run the code above in your browser using DataLab