5-10 more algorithms

Mthrun · Mthrun · commit e4c946d9a602 · 2023-05-28T15:03:28.000+02:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,13 +1,13 @@
 Package: FCPS
 Type: Package
 Title: Fundamental Clustering Problems Suite
-Version: 1.3.2
-Date: 2023-03-18
+Version: 1.3.3
+Date: 2023-05-28
 Authors@R: c(person("Michael", "Thrun", email= "m.thrun@gmx.net",role=c("aut","cre","cph"), comment = c(ORCID = "0000-0001-9542-5543")),person("Peter", "Nahrgang",role=c("ctr","ctb")),person("Felix", "Pape",role=c("ctr","ctb")),person("Vasyl","Pihur", role=c("ctb")),person("Guy","Brock", role=c("ctb")),person("Susmita","Datta", role=c("ctb")),person("Somnath","Datta", role=c("ctb")),person("Luis","Winckelmann", role=c("com")),person("Alfred", "Ultsch",role=c("dtc","ctb")),person("Quirin", "Stier",role=c("ctb","rev")))
 Maintainer: Michael Thrun <m.thrun@gmx.net>
 Description: Over sixty clustering algorithms are provided in this package with consistent input and output, which enables the user to try out algorithms swiftly. Additionally, 26 statistical approaches for the estimation of the number of clusters as well as the mirrored density plot (MD-plot) of clusterability are implemented. The packages is published in Thrun, M.C., Stier Q.: "Fundamental Clustering Algorithms Suite" (2021), SoftwareX, <DOI:10.1016/j.softx.2020.100642>. Moreover, the fundamental clustering problems suite (FCPS) offers a variety of clustering challenges any algorithm should handle when facing real world data, see Thrun, M.C., Ultsch A.: "Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems" (2020), Data in Brief, <DOI:10.1016/j.dib.2020.105501>.
 Imports: mclust, ggplot2, DataVisualizations
-Suggests: kernlab, cclust, dbscan, kohonen, MCL, ADPclust, cluster, DatabionicSwarm, orclus, subspace, flexclust, ABCanalysis, apcluster, pracma,EMCluster, pdfCluster, parallelDist, plotly, ProjectionBasedClustering, GeneralizedUmatrix, mstknnclust, densityClust, parallel, energy, R.utils, tclust, Spectrum, genie, protoclust, fastcluster, clusterability, signal, reshape2, PPCI, clustrd, smacof, rgl,prclust, CEC, dendextend, moments,prabclus, VarSelLCM, sparcl, mixtools, HDclassif, clustvarsel, yardstick, knitr, rmarkdown, igraph, leiden, clusterSim, NetworkToolbox, randomForest, ConsensusClusterPlus, RWeka
+Suggests: mlpack, kernlab, cclust, dbscan, kohonen, MCL, ADPclust, cluster, DatabionicSwarm, orclus, subspace, flexclust, ABCanalysis, apcluster, pracma,EMCluster, pdfCluster, parallelDist, plotly, ProjectionBasedClustering, GeneralizedUmatrix, mstknnclust, densityClust, parallel, energy, R.utils, tclust, Spectrum, genie, protoclust, fastcluster, clusterability, signal, reshape2, PPCI, clustrd, smacof, rgl,prclust, CEC, dendextend, moments,prabclus, VarSelLCM, sparcl, mixtools, HDclassif, clustvarsel, yardstick, knitr, rmarkdown, igraph, leiden,clustMixType, clusterSim, NetworkToolbox, randomForest, ConsensusClusterPlus, RWeka
 Depends: R (>= 3.5.0)
 License: GPL-3
 LazyData: TRUE
diff --git a/R/DBscan.R b/R/DBscan.R
@@ -1,4 +1,4 @@
-DBSCAN = DBscan=function(Data,Radius,minPts,PlotIt=FALSE,UpperLimitRadius,...){
+DBSCAN = DBscan=function(Data,Radius,minPts,Rcpp=TRUE,PlotIt=FALSE,UpperLimitRadius,...){
   # Cls=DBSCAN(FCPS$Hepta$Data,sqrt(min(res$withinss)))
   # DBSCAN based on [Ester et al., 1996]
   #
@@ -10,6 +10,7 @@ DBSCAN = DBscan=function(Data,Radius,minPts,PlotIt=FALSE,UpperLimitRadius,...){
   # minPts           In principle minimum number of points in the unit disk, if the unit disk is within the cluster (core) [Ester et al., 1996, p. 228].
   #                  number of minimum points in the eps region (for core points). 
   #                  Default is 5 points.
+  # Rcpp              TRUE: uses rcpp fast version
   # PlotIt           Boolean. Decision to plot or not
   # UpperLimitRadius Limit for radius search, experimental
   #
@@ -21,6 +22,23 @@ DBSCAN = DBscan=function(Data,Radius,minPts,PlotIt=FALSE,UpperLimitRadius,...){
   #
   # [Ester et al., 1996]  Ester, M., Kriegel, H.-P., Sander, J., & Xu, X.: A density-based algorithm for discovering clusters in large spatial databases with noise, Proc. Kdd, Vol. 96, pp. 226-231, 1996.
 
+  if(isTRUE(Rcpp)){
+    if (!requireNamespace('mlpack',quietly = TRUE)) {
+      message(
+        'Subordinate clustering package (mlpack) is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+      )
+      return(
+        list(
+          Cls = rep(1, nrow(Data)),
+          Object = "Subordinate clustering package (mlpack) is missing.
+                Please install the package which is defined in 'Suggests'."
+        )
+      )
+    }
+  }else{
+    
+
   if (!requireNamespace('dbscan',quietly = TRUE)) {
     message(
       'Subordinate clustering package (dbscan) is missing. No computations are performed.
@@ -34,7 +52,7 @@ DBSCAN = DBscan=function(Data,Radius,minPts,PlotIt=FALSE,UpperLimitRadius,...){
       )
     )
   }
-  
+  }
  if(is.null(nrow(Data))){# Then we get a vector
     return(cls <- rep(1,length(Data)))
   }
@@ -55,14 +73,14 @@ DBSCAN = DBscan=function(Data,Radius,minPts,PlotIt=FALSE,UpperLimitRadius,...){
   if(missing(UpperLimitRadius))
     UpperLimitRadius=1.1*Radius
 
-  liste=dbscan::dbscan(x = Data,eps = Radius,minPts = minPts,...)
-  Cls=liste$cluster
+  if(isTRUE(Rcpp)){
+    liste=mlpack::dbscan(input =  Data,epsilon =  Radius,min_size = minPts,...)
+    Cls=as.vector(liste$assignments)
+  }else{
+    liste=dbscan::dbscan(x = Data,eps = Radius,minPts = minPts,...)
+    Cls=liste$cluster
+  }
   ind=which(Cls==0)
-  # if(length(ind)>0)
-  #   Cls[ind]=999
-	#Cls=NormalizeCls(Cls)$normalizedCls
-	#if(length(ind)>0)
-	#  Cls[ind]=NaN
   Cls[!is.finite(Cls)]=0
   # Per Definition are not clustered objects in searching for
   # distance and density based structures not allowed.
@@ -74,7 +92,6 @@ DBSCAN = DBscan=function(Data,Radius,minPts,PlotIt=FALSE,UpperLimitRadius,...){
     liste=out$DBscanObject
   }
 	if(isTRUE(PlotIt)){
-	 
 	  Cls2=Cls
 	  Cls2[Cls2==0]=999
 	  p=ClusterPlotMDS(Data,Cls2)
diff --git a/R/MeanShiftClustering.R b/R/MeanShiftClustering.R
@@ -0,0 +1,39 @@
+MeanShiftClustering=function(Data,PlotIt=FALSE,...){
+  # Cls=MeanShiftClustering(Data,ClusterNo=2)
+  # Clustering by mean shift
+  #
+  # INPUT
+  # Data[1:n,1:d]     Data set with n observations and d features
+  # 
+  # OPTIONAL
+  # PlotIt            Boolean. Decision to plot or not
+  #
+  # OUTPUT
+  # Cls[1:n]    Clustering of data
+  # Object      Object of mlpack::mean_shift algorithm
+  #
+  # Author: MT 05/2023
+  #Cheng, Yizong ( 1995). "Mean Shift, Mode Seeking, and Clustering". IEEE Transactions on Pattern Analysis and Machine Intelligence. 17 (8): 790–799. CiteSeerX 10.1.1.510.1222. doi:10.1109/34.400568.
+  if (!requireNamespace('mlpack',quietly = TRUE)) {
+    message(
+      'Subordinate clustering package (mlpack) is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+    )
+    return(
+      list(
+        Cls = rep(1, nrow(Data)),
+        Object = "Subordinate clustering package (mlpack) is missing.
+                Please install the package which is defined in 'Suggests'."
+      )
+    )
+  }
+  res = mlpack::mean_shift(input = Data,labels_only = T, ...)
+  Cls = as.vector(res$output)+1
+
+  if (PlotIt) {
+    ClusterPlotMDS(Data , Cls)
+  }
+  Cls = ClusterRename(Cls, Data)
+  
+  return(list(Cls=Cls,Object=res))
+}
diff --git a/R/kmeansClustering.R b/R/kmeansClustering.R
@@ -1,4 +1,4 @@
-kmeansClustering <-function(DataOrDistances,ClusterNo=2,Type='LBG',RandomNo=5000,PlotIt=FALSE,Verbose=FALSE,...){
+kmeansClustering <-function(DataOrDistances,ClusterNo=2,Type='LBG',RandomNo=5000,CategoricalData,PlotIt=FALSE,Verbose=FALSE,...){
   # Cls <- kmeansClustering(DataOrDistances,ClusterNo,Verbose);
   # calls one of two common approaches for kmeans
   #
@@ -9,6 +9,7 @@ kmeansClustering <-function(DataOrDistances,ClusterNo=2,Type='LBG',RandomNo=5000
   # Type       Kind of kmeans algorithm. Choose one of the two following strings:
   #            "Hartigan": Hartigan, J. A. and Wong, M. A. A K-means clustering algorithm. Applied Statistics 28, 100-108, 1979.
   #            "LBG": Linde,Y.,Buzo,A.,Gray,R.M., An algorithm for vector quantizer design. IEEE Transactions on Communications, COM-28, 84-95, 1980
+  #             ’pelleg-moore’, ’elkan’, ’hamerly’,’dualtree’, or ’dualtree-covertree’
   # RandomNo   Only for Steinley method or in case of distance matrix, number of random initializations with
   #            searching for minimal SSE, see [Steinley/Brusco, 2007]
   # PlotIt     Boolean. Decision tgo plot or not
@@ -22,7 +23,10 @@ kmeansClustering <-function(DataOrDistances,ClusterNo=2,Type='LBG',RandomNo=5000
   # Adaption to Mdbt and documentation standards
   if (!isSymmetric(unname(DataOrDistances))) {
     #Data = DataOrDistances
-    
+    if(missing(CategoricalData)&Type=="kprototypes"){
+      warning("kmeansClustering: CategoricalData cannot be missing if Type is 'kprototypes'. Setting type to default")
+      Type="Hartigan"
+    }
     if (ClusterNo < 2) {
       warning("ClusterNo should be an integer > 2. Now, all of your data is in one cluster.")
       if (is.null(nrow(DataOrDistances))) {
@@ -83,6 +87,41 @@ kmeansClustering <-function(DataOrDistances,ClusterNo=2,Type='LBG',RandomNo=5000
           Object = res,
           Centroids = Centroids
         ))
+      },'kprototypes' = {
+        if (!requireNamespace('clustMixType',quietly = TRUE)) {
+          message(
+            'Subordinate clustering (clustMixType) package is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+          )
+          return(
+            list(
+              Cls = rep(1, nrow(DataOrDistances)),
+              Object = "Subordinate clustering (clustMixType) package is missing.
+                Please install the package which is defined in 'Suggests'."
+            )
+          )
+        }
+        DataOrDistancesWithFactors=as.data.frame(DataOrDistances)
+        CategoricalData=as.data.frame(CategoricalData)
+        for(i in 1:ncol(CategoricalData)){
+          CategoricalData[,i]=as.factor(CategoricalData[,i])
+        }
+        DataOrDistancesWithFactors=cbind(DataOrDistancesWithFactors,CategoricalData)
+        
+        res = clustMixType::kproto(x = DataOrDistancesWithFactors, k = ClusterNo, ...)#verbose=FALSE,
+        
+        Cls = as.numeric((res$cluster))
+        
+        Centroids=res$centers
+        if (PlotIt) {
+          ClusterPlotMDS(DataOrDistances, Cls)
+        }
+        Cls = ClusterRename(Cls, DataOrDistances)
+        return(list(
+          Cls = Cls,
+          Object = res,
+          Centroids = Centroids
+        ))
       },
       'LBG' = {
         if (!requireNamespace('cclust',quietly = TRUE)) {
@@ -149,6 +188,131 @@ kmeansClustering <-function(DataOrDistances,ClusterNo=2,Type='LBG',RandomNo=5000
           ),
           Centroids = res$centers
         ))
+      },"Pelleg-moore"={
+        if (!requireNamespace('mlpack',quietly = TRUE)) {
+          message(
+            'Subordinate clustering package (mlpack) is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+          )
+          return(
+            list(
+              Cls = rep(1, nrow(DataOrDistances)),
+              Object = "Subordinate clustering package (mlpack) is missing.
+                Please install the package which is defined in 'Suggests'."
+            )
+          )
+        }
+        res = mlpack::kmeans(input = DataOrDistances, clusters = ClusterNo, algorithm = tolower(Type),labels_only = T, ...)
+        Cls = as.vector(res$output)+1
+        if (PlotIt) {
+          ClusterPlotMDS(DataOrDistances, Cls)
+        }
+        Cls = ClusterRename(Cls, DataOrDistances)
+        return(list(
+          Cls = Cls,
+          Object = res,
+          Centroids = res$centroid
+        ))
+      },"Elkan"={
+        if (!requireNamespace('mlpack',quietly = TRUE)) {
+          message(
+            'Subordinate clustering package (mlpack) is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+          )
+          return(
+            list(
+              Cls = rep(1, nrow(DataOrDistances)),
+              Object = "Subordinate clustering package (mlpack) is missing.
+                Please install the package which is defined in 'Suggests'."
+            )
+          )
+        }
+        res = mlpack::kmeans(input = DataOrDistances, clusters = ClusterNo, algorithm = tolower(Type),labels_only = T, ...)
+        Cls = as.vector(res$output)+1
+        if (PlotIt) {
+          ClusterPlotMDS(DataOrDistances, Cls)
+        }
+        Cls = ClusterRename(Cls, DataOrDistances)
+        return(list(
+          Cls = Cls,
+          Object = res,
+          Centroids = res$centroid
+        ))
+      },"Hamerly"={
+        if (!requireNamespace('mlpack',quietly = TRUE)) {
+          message(
+            'Subordinate clustering package (mlpack) is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+          )
+          return(
+            list(
+              Cls = rep(1, nrow(DataOrDistances)),
+              Object = "Subordinate clustering package (mlpack) is missing.
+                Please install the package which is defined in 'Suggests'."
+            )
+          )
+        }
+        res = mlpack::kmeans(input = DataOrDistances, clusters = ClusterNo, algorithm = tolower(Type),labels_only = T, ...)
+        Cls = as.vector(res$output)+1
+        if (PlotIt) {
+          ClusterPlotMDS(DataOrDistances, Cls)
+        }
+        Cls = ClusterRename(Cls, DataOrDistances)
+        return(list(
+          Cls = Cls,
+          Object = res,
+          Centroids = res$centroid
+        ))
+      },"Dualtree"={
+        if (!requireNamespace('mlpack',quietly = TRUE)) {
+          message(
+            'Subordinate clustering package (mlpack) is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+          )
+          return(
+            list(
+              Cls = rep(1, nrow(DataOrDistances)),
+              Object = "Subordinate clustering package (mlpack) is missing.
+                Please install the package which is defined in 'Suggests'."
+            )
+          )
+        }
+        res = mlpack::kmeans(input = DataOrDistances, clusters = ClusterNo, algorithm = tolower(Type),labels_only = T, ...)
+        Cls = as.vector(res$output)+1
+        if (PlotIt) {
+          ClusterPlotMDS(DataOrDistances, Cls)
+        }
+        Cls = ClusterRename(Cls, DataOrDistances)
+        return(list(
+          Cls = Cls,
+          Object = res,
+          Centroids = res$centroid
+        ))
+      },"Dualtree-covertree"={
+        if (!requireNamespace('mlpack',quietly = TRUE)) {
+          message(
+            'Subordinate clustering package (mlpack) is missing. No computations are performed.
+            Please install the package which is defined in "Suggests".'
+          )
+          return(
+            list(
+              Cls = rep(1, nrow(DataOrDistances)),
+              Object = "Subordinate clustering package (mlpack) is missing.
+                Please install the package which is defined in 'Suggests'."
+            )
+          )
+        }
+        res = mlpack::kmeans(input = DataOrDistances, clusters = ClusterNo, algorithm = tolower(Type),labels_only = T, ...)
+        Cls = as.vector(res$output)+1
+        if (PlotIt) {
+          ClusterPlotMDS(DataOrDistances, Cls)
+        }
+        Cls = ClusterRename(Cls, DataOrDistances)
+        return(list(
+          Cls = Cls,
+          Object = res,
+          Centroids = res$centroid
+        ))
       },
       {#lloyd, forgy, mac queen
         res = kmeans(DataOrDistances, centers = ClusterNo, algorithm = Type, ...)
diff --git a/man/DBscan.Rd b/man/DBscan.Rd
@@ -63,7 +63,6 @@ DBSGrid <- expand.grid(
 )
 BestAcc = c()
 for (i in seq_len(nrow(DBSGrid))) {
-  print(i)
   parameters <- DBSGrid[i,]
   Cls9 = DBSCAN(
     Data,
diff --git a/man/MeanShiftClustering.Rd b/man/MeanShiftClustering.Rd
@@ -0,0 +1,45 @@
+\name{MeanShiftClustering}
+\alias{MeanShiftClustering}
+\title{Mean Shift Clustering}
+\description{
+Mean Shift Clustering of  [Cheng, 1995]	
+}
+\usage{
+MeanShiftClustering(Data,
+
+PlotIt=FALSE,...)
+}
+
+\arguments{
+\item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
+
+
+\item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
+
+\item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
+}
+
+\details{
+the radius used for search can be specified with the "\code{radius}" parameter. The maximum number of iterations before algorithm termination is controlled with the "\code{max_iterations}" parameter.
+
+If the distance between two centroids is less than the given radius, one will be removed. A radius of 0 or less means an estimate will be calculated and used for the radius. Default value "0" (numeric).
+}
+\value{
+List of
+\item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
+\item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
+}
+
+ \examples{
+data('Hepta')
+out=MeanShiftClustering(Hepta$Data,PlotIt=FALSE)
+}
+\author{Michael Thrun}
+
+\references{
+[Cheng, 1995]	Cheng, Yizong: Mean Shift, Mode Seeking, and Clustering, IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 17 (8), pp. 790–799, doi:10.1109/34.400568, 1995.
+}
+\keyword{MeanShiftClustering}
+\keyword{Clustering}
+\concept{Large Application Clusteringg}
+\keyword{clara}
diff --git a/man/kmeansClustering.Rd b/man/kmeansClustering.Rd

Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,6 @@ DBSGrid <- expand.grid(`
`63`	`63`	`)`
`64`	`64`	`BestAcc = c()`
`65`	`65`	`for (i in seq_len(nrow(DBSGrid))) {`
`66`		`- print(i)`
`67`	`66`	`parameters <- DBSGrid[i,]`
`68`	`67`	`Cls9 = DBSCAN(`
`69`	`68`	`Data,`