languageR (version 0.2)

regularity: Regular and irregular Dutch verbs

Description

Regular and irregular Dutch verbs and selected lexical and distributional properties.

Usage

data(regularity)

Arguments

References

Baayen, R. H. and Moscoso del Prado Martin, F. (2005) Semantic density and past-tense formation in three Germanic languages, Language, 81, 666-698.

Tabak, W., Schreuder, R. and Baayen, R. H. (2005) Lexical statistics and lexical processing: semantic density, information complexity, sex, and irregularity in Dutch, in Kepser, S. and Reis, M., Linguistic Evidence - Empirical, Theoretical, and Computational Perspectives, Berlin: Mouton de Gruyter, pp. 529-555.

Examples

Run this code
data(regularity)

# ---- predicting regularity with a logistic regression model

library(Design)
regularity.dd = datadist(regularity)
options(datadist = 'regularity.dd')

regularity.lrm = lrm(Regularity ~ WrittenFrequency + 
rcs(FamilySize, 3) + NcountStem + InflectionalEntropy + 
Auxiliary + Valency + NVratio + WrittenSpokenRatio, 
data = regularity, x = TRUE, y = TRUE)

anova(regularity.lrm)

# ---- model validation

validate(regularity.lrm, bw = TRUE, B = 200)
pentrace(regularity.lrm, seq(0, 0.8, by = 0.05))
regularity.lrm.pen = update(regularity.lrm, penalty = 0.6)
regularity.lrm.pen

# ---- a plot of the partial effects

par(mfrow = c(3, 3))
plot(regularity.lrm.pen, fun = plogis, ylab = "Pr(regular)", 
adj.subtitle = FALSE, ylim = c(0, 1))
par(mfrow = c(1, 1))


# predicting regularity with a support vector machine

library(e1071)
regularity$AuxNum = as.numeric(regularity$Auxiliary)
regularity.svm = svm(regularity[, -c(1,8,10)], regularity$Regularity, cross=10)
summary(regularity.svm)

Run the code above in your browser using DataLab