gap finds the optimal number of clusters based on the "gap statistic" (Tibshirani et al. 2001)
gap(X, Kmax = 10, B = 100, ref.gen = "PC", cl.lab = NULL, ...)
list of gaps found
example_dta<-create_example_data_for_R()
X=as.matrix(example_dta$counts)
#Default ... values:
default.par <- list(q=0.25,Kmax.rec=5,B=100,ref.gen="PC",dist.method="euclidean",cl.method="hclust",linkage="average",cor.method="pearson",nstart=10)
#Check for user modifications:
fixed.par <- c(minDist=NULL,minSize=2,modifyList(default.par,list(cor.method='pearson',linkage='average')))
#Find stopping threshold if minDist is NULL
minDist <- get.threshold(X,q=fixed.par$q,fixed.par)
fixed.par$minDist <- minDist
cl.lab <- findPartition(X=X,Kmax=10,dX=NULL,fixed.par)
gap.res <- gap(X=X,Kmax=10,cl.lab=cl.lab,B=fixed.par$B,ref.gen=fixed.par$ref.gen,fixed.par=fixed.par)