1
1
using MyML . Abstracts ;
2
- using MyML . Interfaces ;
3
2
4
3
namespace MyML
5
4
{
6
5
public class MultinomialNaiveBayesClassifier : NaiveBayesClassifier
7
6
{
8
- public override string Predict ( string text )
7
+ private int _vocabularySize ;
8
+
9
+ public MultinomialNaiveBayesClassifier ( )
9
10
{
10
- double maxProbability = double . MinValue ;
11
+ CalculateVocabularySize ( ) ;
12
+ }
13
+
14
+ /// <summary>
15
+ /// Predicts class probabilities for a given text input using Multinomial Naive Bayes classification.
16
+ /// Returns normalized probabilities (as percentages) for each class label.
17
+ /// </summary>
18
+ /// <remarks>
19
+ /// The prediction process consists of four main steps:
20
+ ///
21
+ /// 1. Calculate posterior probabilities in log space:
22
+ /// - Combines class prior probability with word likelihoods
23
+ /// - P(class|text) ∝ log(P(class)) + Σ log(P(word|class))
24
+ ///
25
+ /// 2. Find maximum log probability for numerical stability
26
+ /// - Used for log-sum-exp trick to prevent overflow
27
+ ///
28
+ /// 3. Convert log probabilities to normal space:
29
+ /// - Uses log-sum-exp trick to prevent numerical overflow
30
+ /// - Shifts all log probabilities by subtracting max value
31
+ /// - exp(log(p) - maxLogP) / Σ exp(log(p) - maxLogP)
32
+ ///
33
+ /// 4. Normalize probabilities to percentages:
34
+ /// - Ensures all probabilities sum to 100%
35
+ /// </remarks>
36
+ /// <param name="text">Input text to classify</param>
37
+ /// <returns>Dictionary mapping class labels to their predicted probabilities (as percentages)</returns>
38
+ public override Dictionary < string , double > Predict ( string text )
39
+ {
40
+ double maxLogProbability = double . MinValue ;
11
41
string ? predictedLabel = null ;
12
42
int totalWordCount = totalWordsPerLabel . Values . Sum ( ) ;
43
+ IEnumerable < string > words = Tokenize ( text ) ;
44
+ Dictionary < string , double > logProbabilities = new Dictionary < string , double > ( ) ;
13
45
14
- var words = Tokenize ( text ) ;
15
46
foreach ( var label in wordCountsPerLabel . Keys )
16
47
{
17
48
var labelWordCounts = wordCountsPerLabel [ label ] ;
18
49
var totalClassCount = totalWordsPerLabel [ label ] ;
19
50
20
- var probability = CalculateProbability ( words , labelWordCounts , totalClassCount , totalWordCount ) ;
21
- var evidence = CalculateEvidence ( words , wordCountsPerLabel , totalWordCount ) ;
22
- var labelProbability = probability / evidence ;
51
+ double logLikelihood = CalculateProbability (
52
+ words ,
53
+ labelWordCounts ,
54
+ totalClassCount ,
55
+ _vocabularySize
56
+ ) ;
23
57
24
- if ( labelProbability > maxProbability )
58
+ double logPrior = Math . Log ( ( double ) totalClassCount / totalWordCount ) ;
59
+ double logPosterior = logLikelihood + logPrior ;
60
+ logProbabilities . Add ( label , logPosterior ) ;
61
+ if ( logPosterior > maxLogProbability )
25
62
{
26
- maxProbability = labelProbability ;
63
+ maxLogProbability = logPosterior ;
27
64
predictedLabel = label ;
28
65
}
29
66
}
30
67
31
- return predictedLabel ! ;
32
- }
33
-
68
+ var result = new Dictionary < string , double > ( ) ;
69
+ double sumExp = 0.0 ;
34
70
35
- private double CalculateProbability ( IEnumerable < string > words , Dictionary < string , int > wordCounts , int totalClassCount , int totalWordCount )
36
- {
37
- double probability = 1 ;
71
+ foreach ( var kvp in logProbabilities )
72
+ {
73
+ double shiftedLogProb = kvp . Value - maxLogProbability ;
74
+ sumExp += Math . Exp ( shiftedLogProb ) ;
75
+ }
38
76
39
- foreach ( var word in words )
77
+ foreach ( var kvp in logProbabilities )
40
78
{
41
- // Laplace smoothing to add one just to ensure that every word contributes a small, non-zero probability
42
- if ( wordCounts . TryGetValue ( word , out var count ) )
43
- {
44
- probability *= ( double ) ( count + 1 ) / ( totalClassCount + totalWordCount ) ;
45
- }
46
- else
47
- {
48
- probability *= 1.0 / ( totalClassCount + totalWordCount ) ;
49
- }
79
+ double shiftedLogProb = kvp . Value - maxLogProbability ;
80
+ double normalizedProb = ( Math . Exp ( shiftedLogProb ) / sumExp ) * 100 ;
81
+ result . Add ( kvp . Key , normalizedProb ) ;
50
82
}
51
- return probability ;
83
+
84
+ return result ;
52
85
}
86
+
53
87
/// <summary>
54
- /// Computes the evidence term P(B) in Bayes' Theorem, which is the probability of the observed features (words, in this case) across all classes
88
+ /// Calculates the log probability of a document belonging to a specific class using
89
+ /// the Multinomial Naive Bayes algorithm with Laplace (add-one) smoothing.
55
90
/// </summary>
56
- /// <param name="words"></param>
57
- /// <param name="wordCountsPerLabel"></param>
58
- /// <param name="totalWordCount"></param>
59
- /// <returns></returns>
60
- private double CalculateEvidence ( IEnumerable < string > words , Dictionary < string , Dictionary < string , int > > wordCountsPerLabel , int totalWordCount )
91
+ /// <remarks>
92
+ ///
93
+ /// 1. Uses log probabilities to prevent numerical underflow
94
+ /// 2. Applies Laplace smoothing to handle unseen words
95
+ /// 3. Assumes word independence (naive assumption)
96
+ ///
97
+ /// The probability is calculated as:
98
+ /// P(class|document) ∝ log(P(class)) + Σ log(P(word|class))
99
+ ///
100
+ /// Where P(word|class) is smoothed using Laplace smoothing:
101
+ /// P(word|class) = (count(word,class) + 1) / (totalWords + vocabularySize)
102
+ /// </remarks>
103
+ /// <param name="words">Collection of words from the document to classify</param>
104
+ /// <param name="wordCountsForClass">Dictionary containing word counts for the current class</param>
105
+ /// <param name="totalWordsInClass">Total number of words in the training data for this class</param>
106
+ /// <param name="vocabularySize">Size of the entire vocabulary across all classes</param>
107
+ /// <returns>
108
+ /// Log probability of the document belonging to the class. Higher values indicate
109
+ /// stronger association with the class.
110
+ /// </returns>
111
+ private double CalculateProbability (
112
+ IEnumerable < string > words ,
113
+ Dictionary < string , int > wordCountsForClass ,
114
+ int totalWordsInClass ,
115
+ int vocabularySize )
61
116
{
62
- double evidence = 1 ;
63
-
117
+ double logProbability = 0.0 ;
64
118
foreach ( var word in words )
65
119
{
66
- double wordProbability = 0 ;
67
- foreach ( var label in wordCountsPerLabel . Keys )
68
- {
69
- if ( wordCountsPerLabel [ label ] . TryGetValue ( word , out var count ) )
70
- {
71
- wordProbability += ( double ) count / totalWordCount ;
72
- }
73
- }
74
- double dealUnseenWord = wordProbability > 0 ? wordProbability : 1.0 ;
75
- evidence *= dealUnseenWord / totalWordCount ;
120
+ int count = wordCountsForClass . TryGetValue ( word , out int c ) ? c : 0 ;
121
+
122
+ // Laplace smoothing in log space
123
+ double smoothedProb = Math . Log ( ( count + 1.0 ) / ( totalWordsInClass + vocabularySize ) ) ;
124
+ logProbability += smoothedProb ;
125
+ }
126
+
127
+ return logProbability ;
128
+ }
129
+ private void CalculateVocabularySize ( )
130
+ {
131
+ HashSet < string > uniqueWords = new ( ) ;
132
+ foreach ( var labelDict in wordCountsPerLabel . Values )
133
+ {
134
+ uniqueWords . UnionWith ( labelDict . Keys ) ;
76
135
}
77
- return evidence ;
136
+ _vocabularySize = uniqueWords . Count ;
78
137
}
79
138
}
80
139
}
0 commit comments