-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsearchPagerank.c
More file actions
147 lines (116 loc) · 3.94 KB
/
searchPagerank.c
File metadata and controls
147 lines (116 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
* COMP2521 - Data Structures and Algorithms
* Assignment 2 - Simple Search Engines
*
* Group: TwoBrothers
*
* Partners: Kurt Banwell-Pachernegg (Z5022859)
* Sam Eager (Z3414861)
*
*
* Part 1. C - Search Engine
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "collection.h"
#include "BST.h"
#include "HashMap.h"
#include "PriorityQueue.h"
#define MAX_WORD_SIZE 100
#define MAX_RESULTS 30
struct Match {
char * url;
int count;
double pagerank;
};
typedef struct Match * Match;
static Match newMatch(char * url);
static int matchCompare(void * a, void * b);
static int stringCompare(void * a, void * b);
int main (int argc, char * argv[])
{
/* A Priority Queue is used to load the search terms into so that the
terms can be itterated over in alphabetical order. This means we only
need to read through invertedIndex.txt once to get all the urls and
we can also escape early if all the words have been found. */
PriorityQueue words = newPriorityQueue(stringCompare);
int i, n;
// Convert search terms to lowercase and load into words.
for(i = 1; i < argc; i++) {
n = 0;
while(argv[i][n] != '\0') {
argv[i][n] = tolower(argv[i][n]);
n++;
}
addPriorityQueue(words, argv[i]);
}
/* Gets a HashMap with the url name as the key and the
corresponding number of matching terms from the given Priority Queue
or words.
i.e. If the queue has the words 'mars' and 'planet' and url23
contains both those words then 'url23' will map to 2. If url34 contains
'mars' but does not contain 'planet' then 'url34' will map to 1. */
HashMap urls = getSearchUrls(words);
// This will be all the urls that contain one or more of the search terms.
char ** keys = (char **) getKeySetHashMap(urls);
/* This priority queue compares matches (each match is a url that
matched at least one of the search terms and also contains the number
of matches as well as the url's pagerank) using the comparison specs
given in Part 1. C of the assignment.*/
PriorityQueue matches = newPriorityQueue(matchCompare);
Match match;
/* Fills the priority queue with the matching set of unique urls, their
number of matches for this search and their corresponding pagerank. */
for(i = 0; i < sizeHashMap(urls); i++) {
match = newMatch(keys[i]);
match->count = getInteger(getHashMap(urls, keys[i]));
addPriorityQueue(matches, match);
}
int count = 0; // Counter for MAX_RESULTS
// Prints out the urls in the correct order.
while(!emptyPriorityQueue(matches)) {
match = nextPriorityQueue(matches);
printf("%s\n", match->url);
if(++count == MAX_RESULTS) break;
}
return 1;
}
static Match newMatch(char * url)
{
Match match = malloc(sizeof(struct Match));
match->url = malloc(sizeof(char) * strlen(url));
strcpy(match->url, url);
match->count = 1;
match->pagerank = 0;
FILE * file = fopen("pagerankList.txt", "r");
char * urlName = malloc(sizeof(char) * MAX_WORD_SIZE);
int outgoing;
float pagerank;
while (fscanf(file, "%s %d, %f", urlName, &outgoing, &pagerank) != EOF) {
urlName[strlen(urlName)-1] = 0; // Remove comma from url name
if(!strcmp(url, urlName)) {
match->pagerank = pagerank;
break;
}
}
fclose(file);
return match;
}
static int matchCompare(void * a, void * b)
{
Match A = (Match) a;
Match B = (Match) b;
int result = A->count - B->count;
if(!result) {
if(A->pagerank - B->pagerank > 0) result = 1;
else if(A->pagerank - B->pagerank < 0) result = -1;
else result = strcmp(A->url, B->url);
}
return result;
}
static int stringCompare(void * a, void * b)
{
return strcmp((char *) b, (char *) a);
}