-
Notifications
You must be signed in to change notification settings - Fork 0
/
tweetFilterNew.cpp
300 lines (247 loc) · 10.4 KB
/
tweetFilterNew.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
using namespace std; // Using standard namespace
// Declare class called tweetFilter
class tweetFilter
{
private:
// Vector to store frequency of banned words
vector<int> bannedWordFrequency;
vector<string> bannedWords, /* a vector that stores the number of times each banned word appears in the file being filtered.*/
positiveWords, /* a vector that stores positive words. */
negativeWords, /* a vector that stores negative words. */
allWords, /* a vector that stores all words in the file being filtered. */
mostFrequentWords; /* a vector to store the most frequent words in the file being filtered. */
// Private function to replace the middle of a banned word with asterisks
string getAstriskReplacable(string bannedWord)
{
return bannedWord.replace( int( bannedWord.length()/2) , 1, "*");
}
public:
// Constructor to initialize banned, positive, and negative words
tweetFilter(string bannedFileName, string positiveFileName, string negativeFileName)
{
// Opening banned words file
ifstream bannedFile(bannedFileName);
// Opening positive words file
ifstream positiveFile(positiveFileName);
// Opening negative words file
ifstream negativeFile(negativeFileName);
// Declare an empty string to read words from file
string word = "";
// Read each word from banned file
while(bannedFile >> word)
{
// Add word to banned words vector
bannedWords.push_back(word);
// Add word frequency to banned words frequency vector
bannedWordFrequency.push_back(0);
}
// Read each word from positive file
while(positiveFile >> word)
{
// Add word to positive words vector
positiveWords.push_back(word);
}
// Read each word from negative file
while(negativeFile >> word)
{
// Add word to negative words vector
negativeWords.push_back(word);
}
// Closing banned words file
bannedFile.close();
// Closing positive words file
positiveFile.close();
// Closing negative words file
negativeFile.close();
}
// a method that sets all elements of the bannedWordFrequency vector to zero.
void resetbannedWordFrequency()
{
int index=0;
// Looping through each banned word
for(auto bannedword : bannedWords)
{
// Reset banned word frequency to 0 and increment index for next iteration
bannedWordFrequency.at(index++) = 0;
}
}
// method to convert a string to lowercase (here a word to lowercase).
string wordToLower(string word)
{
// Looping through each character in the word
for (char& c : word)
{
// Convert character to lowercase
c = std::tolower(c);
}
// Return lowercase word
return word;
}
// method to extract individual words from a tweet and stores them in a vector called allWords.
void saveWords(string tweet)
{
// Declare an empty string to store a word
string word="";
// Looping through each character in the tweet
for(int i=0; i<tweet.length(); i++)
{
// check if the current character is a delimiter
if(tweet[i]==' '||tweet[i]=='.'||tweet[i]==','||tweet[i]=='!'||tweet[i]=='?'||tweet[i]==':'||tweet[i]==';')
{
// if word is not empty, then...
if(word!="")
{
// push it into the allWords vector
allWords.push_back(word);
}
// reset the word
word="";
}
else
{
// append the current character to the word
word += tweet[i];
}
}
}
/* a method that replaces banned words in a tweet with asterisks
and updates the bannedWordFrequency vector with the
number of times each banned word appears in the tweet.*/
string filterTweet(string tweet)
{
int wordindex = 0;
// create a copy of the original tweet for processing
string filteredTweet = tweet;
// looping through the banned words
for (auto bannedWord : bannedWords)
{
// search for the banned word in the filtered tweet converted to lower case (case-insensitive)
int index = wordToLower(filteredTweet).find(bannedWord);
// while the banned word is found in the filtered tweet
while (index != string::npos)
{
// replace the banned word with asterisks
filteredTweet.replace(index, bannedWord.length(), getAstriskReplacable(bannedWord));
// increment the frequency count for the banned word
bannedWordFrequency.at(wordindex)++ ;
// search for the banned word again from the next position
index = filteredTweet.find(bannedWord, index+1);
}
// increment the word index
wordindex++ ;
}
// return the filtered tweet
return filteredTweet;
}
/* a method that filters a file by reading it line by line,
filtering each line using the filterTweet method,
writing the filtered lines to a new file,
and displaying the sentiment analysis of each filtered line.
It also calls the saveWords method to store all words in the file in the allWords vector,
and the displayResults method to display the banned words in the file and their frequencies. */
void filterFile(string inputFileName, string outputFileName)
{
// reset the frequency count for banned words
resetbannedWordFrequency();
// open the input file
ifstream inputFile(inputFileName);
// open the output file
ofstream outputFile(outputFileName);
string line="";
// check if the input file is opened successfully
if(inputFile)
{
// print the heading for the sentiment analysis report
cout << "\n\n>> The sentiment analysis of the file \'" << inputFileName << "\' is :-\n" << endl;
// loop through each line in the input file
while(getline(inputFile, line))
{
// filter the line using the filterTweet function
string filteredLine = filterTweet(line);
// write the filtered line to the output file
outputFile << filteredLine << endl;
// save the words from the line to the allWords vector
saveWords(line);
// print the filtered line and its sentiment analysis
cout << filteredLine << " : " << sentimentAnalysis(filteredLine) << endl;
}
// display the final results of the sentiment analysis
displayResults();
}
else
{
// print an error message if the input file cannot be opened
cout << "The file named \'" << inputFileName << "\' doesn't exist or is missing from this folder!" << endl;
}
// close the input file
inputFile.close();
// close the output file
outputFile.close();
}
// Define a function to display the usage frequency of banned words in filtered tweets
void displayResults()
{
int i=0;
cout << "\n>> And the usage of banned words in it are :-\n" << endl;
// Looping through each banned word
for(auto bannedWord : bannedWords)
{
// Display the banned word and its frequency
cout << "\'" << bannedWord << "\' found " << bannedWordFrequency.at(i++) << " times" << endl;
}
}
// Defining a function to perform sentiment analysis on a tweet and return the result
string sentimentAnalysis(string tweet)
{
// Initialize a counter variable to analyse the check through +1 and -1 pointing on sentiments
int sentimentCount = 0;
// Looping through each positive word
for (auto word : positiveWords)
{
// Find the index of the positive word in the tweet
int index = wordToLower(tweet).find(word);
// If the positive word is found in the tweet
while (index != string::npos)
{
// Increase the sentiment count
sentimentCount++ ;
// Search for the next occurrence of the positive word
index = wordToLower(tweet).find(word, index+1);
}
}
// Looping through each negative word
for (auto word : negativeWords)
{
// Find the index of the negative word in the tweet
int index = wordToLower(tweet).find(word);
// If the negative word is found in the tweet
while (index != string::npos)
{
// Decrease the sentiment count
sentimentCount-- ;
// Search for the next occurrence of the negative word
index = wordToLower(tweet).find(word, index+1);
}
}
// Determining the sentiment of the tweet based on the sentiment count and returning it
return (sentimentCount>=0)?(sentimentCount>0)?"Positive":"Neutral":"Negative";
}
};
int main(){
// Create a tweetFilter object with specified files for banned, positive and negative words
tweetFilter tf("banned.txt", "positive.txt", "negative.txt");
// Filter the first input tweet file and save the result to the first output tweet file
tf.filterFile("tweets1.txt","tweets1Filtered.txt");
// Filter the second input tweet file and save the result to the second output tweet file
tf.filterFile("tweets2.txt","tweets2Filtered.txt");
// Filter the third input tweet file and save the result to the third output tweet file
tf.filterFile("tweets3.txt","tweets3Filtered.txt");
// Filter the fourth input tweet file and save the result to the fourth output tweet file
tf.filterFile("tweets4.txt","tweets4Filtered.txt");
// End of the program
return 0;
}