# NOT RUN {
### simples examples:
f=factor(c("A","A","B","B","C","D","E"))
print(table(f))
# replace "A" with "a":
f1=delevels(f,"A","a")
print(table(f1))
# merge c("C","D","E") into "CDE":
f2=delevels(f,c("C","D","E"),"CDE")
print(table(f2))
# merge c("B","C","D","E") into _OTHER:
f3=delevels(f,c("B","C","D","E"))
print(table(f3))
# }
# NOT RUN {
# larger factor:
x=factor(c(1,rep(2,2),rep(3,3),rep(4,4),rep(5,5),rep(10,10),rep(100,100)))
print(table(x))
# IDF: frequent values are close to zero and
# infrequent ones are more close to each other:
x1=delevels(x,"idf")
print(table(x1))
# PCP: infrequent values are merged
x2=delevels(x,c("pcp",0.1)) # around 10<!-- % -->
print(table(x2))
# example with a data.frame:
y=factor(c(rep("a",100),rep("b",20),rep("c",5)))
z=1:125 # numeric
d=data.frame(x=x,y=y,z=z,x2=x)
print(summary(d))
# IDF:
d1=delevels(d,"idf")
print(summary(d1))
# PCP:
d2=delevels(d,"pcp")
print(summary(d2))
# delevels:
L=vector("list",ncol(d)) # one per attribute
L[[1]]=c("1","2","3","4","5")
L[[2]]=c("b","c")
L[[4]]=c("1","2","3") # different on purpose
d3=delevels(d,levels=L,label="other")
print(summary(d3))
# }
# NOT RUN {
# end dontrun
# }
Run the code above in your browser using DataLab