-
Notifications
You must be signed in to change notification settings - Fork 15
Categorical naive bayes #103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 16 commits
f954413
030c1df
bc9b3f6
0162b34
9ed78a7
69ba1a3
c8c8da3
e5ac202
a02a433
67aec95
39f867c
53bfca4
4e8fa93
32e096a
4efac62
fcc3623
9f30155
462a229
8ac9880
5822850
f78c85f
d5c782a
1587a44
6f54c87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,44 @@ | ||||||||
| # Categorical Naive Bayes | ||||||||
|
|
||||||||
| Categorical Naive Bayes model. | ||||||||
|
|
||||||||
| Categorical Naive Bayes computes likelihood by : occurences of instance in current label divided by total occurences of instance | ||||||||
|
|
||||||||
| It computes the prior probability by : occurence of current class divided by size of training array | ||||||||
|
|
||||||||
| And finally prior*likelihood gives the probabilty of class according to which output class is predicted. | ||||||||
|
|
||||||||
|
|
||||||||
| ## Parameters | ||||||||
|
|
||||||||
| | Name | Definition | Type | | ||||||||
| |--------|----------------------------------------|-------------------| | ||||||||
| | `xTrain` |The training set containing the features|`vector<vector<T>>`| | ||||||||
| | `yTrain` |The set containing the class correspoding to respective xTrain instance|`vector<string`| | ||||||||
| | `xTest` |The sample set , whose class will be predicted|`vector<T>`| | ||||||||
|
|
||||||||
| ## Methods | ||||||||
|
|
||||||||
| | Name | Definition | Return value | | ||||||||
| |--------------------------------------------------|---------------------------------------------|-----------| | ||||||||
| | `fit(vector<vector<T>> xTrain,vector<T> yTrain)` |fit the class instance with the training data|`NULL| | ||||||||
| | `predict(vector<T> xTest` |predict the label for xTest vector of features|`string`| | ||||||||
|
|
||||||||
| ## Example | ||||||||
|
|
||||||||
| ``` | ||||||||
| cpp | ||||||||
|
||||||||
| ``` | |
| cpp | |
| ```cpp |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed sir
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| //#include "../../src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp" | ||
| //#include "../../src/slowmokit/core.hpp" | ||
| // | ||
| //signed main() { | ||
| // | ||
| // std::vector <std::vector<std::string>> xTrain = {{"fifa", "yes", "no", "no"}, | ||
| // {"fifa", "no", "yes", "no"}, | ||
| // {"fifa", "no", "no", "yes"}, | ||
| // {"cc", "no", "no", "yes"}, | ||
| // {"fifa", "yes", "yes", "yes"}, | ||
| // {"cc", "yes", "yes", "yes"}, | ||
| // {"cc", "no", "no", "yes"}, | ||
| // {"cc", "yes", "no", "no"}}; | ||
| // std::vector <std::string> yTrain = {"m", "m", "m", "m", "f", "f", "f", "f"}; | ||
| // std::vector <std::string> xTest = {"fifa", "no", "yes", "yes"}; | ||
| // categoricalNB<std::string> classifier; | ||
| // classifier.fit(xTrain,yTrain) | ||
| // std::cout<<classifier.predict(xTest); | ||
| // | ||
| //} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| /** | ||
| * @file methods/neighbors/gaussian_nb/categorical_nb.cpp | ||
| * | ||
| * Implementation of the Categorical Naive Bayes main program | ||
| */ | ||
|
|
||
| #include "categorical_nb.hpp" | ||
| std::map<std::string, double> priors; | ||
| std::map<std::string, std::map<std::string, double>> | ||
| likelihoods; // for each label we will store a map , containing n features | ||
| // and their corresponding probability | ||
| template<class T> | ||
| void fit(std::vector<std::vector<T>> xTrain, std::vector<std::string> yTrain) | ||
| { | ||
| // posterior = (prior * likelihood)/evidence | ||
| // since, evidence is same among all instances -> we can ignore it | ||
|
|
||
| if (xTrain.size() == 0 || yTrain.size() == 0) | ||
| throw "Make sure that you have atleast one train example"; | ||
| if (xTrain.size() != yTrain.size()) | ||
| throw "Number of features and target must be equal"; | ||
|
|
||
| std::map<std::string, int> occurences; | ||
|
|
||
| for (auto category : yTrain) | ||
| { | ||
| occurences[category]++; | ||
| } | ||
| for (auto current : occurences) | ||
| { | ||
| priors[current.first] = double(current.second) / yTrain.size(); | ||
| } | ||
|
|
||
| std::map<std::string, std::map<T, int>> counts; | ||
|
|
||
| for (int i = 0; i < xTrain.size(); i++) | ||
| { | ||
| std::vector<T> current = xTrain[i]; // current row | ||
| for (auto curr : current) | ||
| { | ||
| counts[yTrain[i]][curr]++; | ||
| // inc count of curr instance corresponding to ith label | ||
| } | ||
| } | ||
| for (auto current : counts) | ||
| { | ||
| for (auto e : current.second) | ||
| { | ||
| likelihoods[current.first][e.first] = | ||
| ((double(e.second)) / (occurences[current.first])); | ||
| // likelihood[label][current feature]=occ in current/total no of occ | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template<class T> std::string predict(std::vector<T> xTest) | ||
| { | ||
| std::map<std::string, double> probs; | ||
| for (auto curr : priors) | ||
| { | ||
| probs[curr.first] = curr.second; | ||
| for (auto feature : xTest) | ||
| { | ||
| probs[curr.first] *= likelihoods[curr.first][feature]; | ||
| } | ||
| } | ||
| double maxProb = 0; | ||
| std::string out; | ||
| for (auto prob : probs) | ||
| { | ||
| if (prob.second > maxProb) | ||
| { | ||
| maxProb = prob.second; | ||
| out = prob.first; | ||
| } | ||
| } | ||
|
|
||
| return out; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| /** | ||
| * @file methods/neighbors/categorical_nb/categorical_naive_bayes.hpp | ||
| * | ||
| * The header file including the Categorical Naive Bayes algorithm | ||
| */ | ||
|
|
||
| #ifndef SLOWMOKIT_CATEGORICAL_NB_HPP | ||
| #define SLOWMOKIT_CATEGORICAL_NB_HPP | ||
|
|
||
| #include "../../../core.hpp" | ||
|
|
||
| template<class T> class categoricalNB | ||
|
||
| { | ||
| categoricalNB() | ||
| { | ||
| std::map<std::string, double> priors; | ||
| std::map<std::string, std::map<T, double>> likelihoods; | ||
| } | ||
|
|
||
| public: | ||
| /** | ||
| * @brief Fitting the training set into instance of class | ||
| * @param xTrain all training 2-d feature x values | ||
| * @param yTrain all training 1-d string y values | ||
| * @return NULL | ||
| */ | ||
| void fit(std::vector<std::vector<T>> xTrain, std::vector<std::string> yTrain); | ||
|
|
||
| /** | ||
| * @brief Predicting the class for xTest on the basis of training set | ||
| * @param xTest all testing feature x values | ||
| * @return string denoting the class label of xTest | ||
| */ | ||
| std::string predict(std::vector<T> xTest); | ||
| }; | ||
Ishwarendra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| #endif // SLOWMOKIT_CATEGORICAL_NB_HPP | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.