if(interactive()){
# initialize connection to Dallas database in Aster
conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
# compute term-document-matrix of all 2-word Ngrams of Dallas police open crime reports
tdm1 = computeTf(channel=conn, tableName="public.dallaspoliceall", docId="offensestatus",
textColumns=c("offensedescription", "offensenarrative"),
parser=nGram(2),
where="offensestatus NOT IN ('System.Xml.XmlElement', 'C')")
# compute term-document-matrix of all 2-word combinations of Dallas police crime reports
# by time of day (4 documents corresponding to 4 parts of day)
tdm2 = computeTf(channel=conn, tableName="public.dallaspoliceall",
docId="(extract('hour' from offensestarttime)/6)::int%4",
textColumns=c("offensedescription", "offensenarrative"),
parser=token(2, punctuation="[-.,?\\!:;~()]+", stopWords=TRUE),
where="offensenarrative IS NOT NULL")
# include only top 100 ranked 2-word ngrams for each offense status
# into resulting term-document-matrix using dense rank function
tdm3 = computeTf(channel=NULL, tableName="public.dallaspoliceall", docId="offensestatus",
textColumns=c("offensedescription", "offensenarrative"),
parser=nGram(2), top=100, rankFunction="denserank",
where="offensestatus NOT IN ('System.Xml.XmlElement', 'C')")
}
Run the code above in your browser using DataLab