PEC-CSS · abhinavmalhotra01 · Feb 12, 2023 · Feb 12, 2023 · Feb 12, 2023 · Feb 12, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -60,4 +60,6 @@ add_library(slowmokit
         src/slowmokit/methods/metrics/recall.hpp
         src/slowmokit/methods/metrics/recall.cpp
         src/slowmokit/methods/metrics/mean_squared_error.hpp
-        src/slowmokit/methods/metrics/mean_squared_error.cpp)
+        src/slowmokit/methods/metrics/mean_squared_error.cpp
+        src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.cpp
+        src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp)
diff --git a/docs/methods/neighbors/categorical_nb/categorical_nb.md b/docs/methods/neighbors/categorical_nb/categorical_nb.md
@@ -0,0 +1,44 @@
+# Categorical Naive Bayes
+
+Categorical Naive Bayes model.
+
+Categorical Naive Bayes computes likelihood by : occurences of instance in current label divided by total occurences of instance
+
+It computes the prior probability by : occurence of current class divided by size of training array
+
+And finally prior*likelihood gives the probabilty of class according to which output class is predicted.
+
+
+## Parameters
+
+| Name   | Definition                             | Type              |
+|--------|----------------------------------------|-------------------|
+| `xTrain` |The training set containing the features|`vector<vector<T>>`|
+| `yTrain` |The set containing the class correspoding to respective xTrain instance|`vector<string`|
+| `xTest` |The sample set , whose class will be predicted|`vector<T>`|
+
+## Methods
+
+| Name                                             | Definition                                  | Return value |
+|--------------------------------------------------|---------------------------------------------|-----------|
+| `fit(vector<vector<T>> xTrain,vector<T> yTrain)` |fit the class instance with the training data|`NULL|
+| `predict(vector<T> xTest`                        |predict the label for xTest vector of features|`string`|
+
+## Example
+
+```
-```
+```cpp
-```
+```cpp
+cpp
-```
-cpp
+```cpp
-```
-cpp
+```cpp
+std::vector <std::vector<std::string>> xTrain = {{"fifa", "yes", "no",  "no"},
+                                                     {"fifa", "no",  "yes", "no"},
+                                                     {"fifa", "no",  "no",  "yes"},
+                                                     {"cc",   "no",  "no",  "yes"},
+                                                     {"fifa", "yes", "yes", "yes"},
+                                                     {"cc",   "yes", "yes", "yes"},
+                                                     {"cc",   "no",  "no",  "yes"},
+                                                     {"cc",   "yes", "no",  "no"}};
+std::vector <std::string> yTrain = {"m", "m", "m", "m", "f", "f", "f", "f"};
+std::vector <std::string> xTest = {"fifa", "no", "yes", "yes"};
+categoricalNB<std::string> classifier;
+classifier.fit(xTrain,yTrain);
+std::cout<<classifier.predict(xTest);
+```
diff --git a/examples/neighbors/categorical_nb.cpp b/examples/neighbors/categorical_nb.cpp
@@ -0,0 +1,20 @@
+//#include "../../src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp"
+//#include "../../src/slowmokit/core.hpp"
+//
+//signed main() {
+//
+//    std::vector <std::vector<std::string>> xTrain = {{"fifa", "yes", "no",  "no"},
+//                                                     {"fifa", "no",  "yes", "no"},
+//                                                     {"fifa", "no",  "no",  "yes"},
+//                                                     {"cc",   "no",  "no",  "yes"},
+//                                                     {"fifa", "yes", "yes", "yes"},
+//                                                     {"cc",   "yes", "yes", "yes"},
+//                                                     {"cc",   "no",  "no",  "yes"},
+//                                                     {"cc",   "yes", "no",  "no"}};
+//    std::vector <std::string> yTrain = {"m", "m", "m", "m", "f", "f", "f", "f"};
+//    std::vector <std::string> xTest = {"fifa", "no", "yes", "yes"};
+//    categoricalNB<std::string> classifier;
+//    classifier.fit(xTrain,yTrain)
+//    std::cout<<classifier.predict(xTest);
+//
+//}
diff --git a/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.cpp b/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.cpp
@@ -0,0 +1,79 @@
+/**
+ * @file methods/neighbors/gaussian_nb/categorical_nb.cpp
+ *
+ * Implementation of the Categorical Naive Bayes main program
+ */
+
+#include "categorical_nb.hpp"
+std::map<std::string, double> priors;
+std::map<std::string, std::map<std::string, double>>
+    likelihoods; // for each label we will store a map , containing n features
+                 // and their corresponding probability
+template<class T>
+void fit(std::vector<std::vector<T>> xTrain, std::vector<std::string> yTrain)
+{
+  // posterior = (prior * likelihood)/evidence
+  // since, evidence is same among all instances -> we can ignore it
+
+  if (xTrain.size() == 0 || yTrain.size() == 0)
+    throw "Make sure that you have atleast one train example";
+  if (xTrain.size() != yTrain.size())
+    throw "Number of features and target must be equal";
+
+  std::map<std::string, int> occurences;
+
+  for (auto category : yTrain)
+  {
+    occurences[category]++;
+  }
+  for (auto current : occurences)
+  {
+    priors[current.first] = double(current.second) / yTrain.size();
+  }
+
+  std::map<std::string, std::map<T, int>> counts;
+
+  for (int i = 0; i < xTrain.size(); i++)
+  {
+    std::vector<T> current = xTrain[i]; // current row
+    for (auto curr : current)
+    {
+      counts[yTrain[i]][curr]++;
+      // inc count of curr instance corresponding to ith label
+    }
+  }
+  for (auto current : counts)
+  {
+    for (auto e : current.second)
+    {
+      likelihoods[current.first][e.first] =
+          ((double(e.second)) / (occurences[current.first]));
+      // likelihood[label][current feature]=occ in current/total no of occ
+    }
+  }
+}
+
+template<class T> std::string predict(std::vector<T> xTest)
+{
+  std::map<std::string, double> probs;
+  for (auto curr : priors)
+  {
+    probs[curr.first] = curr.second;
+    for (auto feature : xTest)
+    {
+      probs[curr.first] *= likelihoods[curr.first][feature];
+    }
+  }
+  double maxProb = 0;
+  std::string out;
+  for (auto prob : probs)
+  {
+    if (prob.second > maxProb)
+    {
+      maxProb = prob.second;
+      out = prob.first;
+    }
+  }
+
+  return out;
+}
diff --git a/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp b/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp
@@ -0,0 +1,37 @@
+/**
+ * @file methods/neighbors/categorical_nb/categorical_naive_bayes.hpp
+ *
+ * The header file including the Categorical Naive Bayes algorithm
+ */
+
+#ifndef SLOWMOKIT_CATEGORICAL_NB_HPP
+#define SLOWMOKIT_CATEGORICAL_NB_HPP
+
+#include "../../../core.hpp"
+
+template<class T> class categoricalNB
+{
+  categoricalNB()
+  {
+    std::map<std::string, double> priors;
+    std::map<std::string, std::map<T, double>> likelihoods;
+  }
+
+  public:
+  /**
+   * @brief Fitting the training set into instance of class
+   * @param xTrain all training 2-d feature x values
+   * @param yTrain all training 1-d string y values
+   * @return NULL
+   */
+  void fit(std::vector<std::vector<T>> xTrain, std::vector<std::string> yTrain);
+
+  /**
+   * @brief Predicting the class for xTest on the basis of training set
+   * @param xTest all testing feature x values
+   * @return string denoting the class label of xTest
+   */
+  std::string predict(std::vector<T> xTest);
+};
+
+#endif //   SLOWMOKIT_CATEGORICAL_NB_HPP