diff --git a/src/hash_functions.c b/src/hash_functions.c new file mode 100644 index 0000000..cdee613 --- /dev/null +++ b/src/hash_functions.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include + +#include "hash_functions.h" +#include "memory.h" + +size_t hash_map_default_hash_func(const void *key, size_t capacity, int len) { + return *((size_t *) key) % capacity; +} + +size_t additive(const void *key, size_t capacity, int len) { + const char *key2 = (const char *) key; + int hash = len; + + for (int i = 0; i < len; ++i) { + hash += key2[i]; + } + + + return (hash % capacity); +} + +size_t pearson_hash(const void *x_copy, size_t capacity, int len) { + int h, i, j; + unsigned char ch; + size_t hex; + + // to store h values + struct { + int a; + } hh[8]; + + // 256 values 0-255 in any (random) order suffices + struct DAT { + int a; + } + + T[256] = { + 98, 6, 85, 150, 36, 23, 112, 164, 135, 207, 169, 5, 26, 64, 165, 219, // 1 + 61, 20, 68, 89, 130, 63, 52, 102, 24, 229, 132, 245, 80, 216, 195, 115, // 2 + 90, 168, 156, 203, 177, 120, 2, 190, 188, 7, 100, 185, 174, 243, 162, 10, // 3 + 237, 18, 253, 225, 8, 208, 172, 244, 255, 126, 101, 79, 145, 235, 228, 121, // 4 + 123, 251, 67, 250, 161, 0, 107, 97, 241, 111, 181, 82, 249, 33, 69, 55, // 5 + 59, 153, 29, 9, 213, 167, 84, 93, 30, 46, 94, 75, 151, 114, 73, 222, // 6 + 197, 96, 210, 45, 16, 227, 248, 202, 51, 152, 252, 125, 81, 206, 215, 186, // 7 + 39, 158, 178, 187, 131, 136, 1, 49, 50, 17, 141, 91, 47, 129, 60, 99, // 8 + 154, 35, 86, 171, 105, 34, 38, 200, 147, 58, 77, 118, 173, 246, 76, 254, // 9 + 133, 232, 196, 144, 198, 124, 53, 4, 108, 74, 223, 234, 134, 230, 157, 139, // 10 + 189, 205, 199, 128, 176, 19, 211, 236, 127, 192, 231, 70, 233, 88, 146, 44, // 11 + 183, 201, 22, 83, 13, 214, 116, 109, 159, 32, 95, 226, 140, 220, 57, 12, // 12 + 221, 31, 209, 182, 143, 92, 149, 184, 148, 62, 113, 65, 37, 27, 106, 166, // 13 + 3, 14, 204, 72, 21, 41, 56, 66, 28, 193, 40, 217, 25, 54, 179, 117, // 14 + 238, 87, 240, 155, 180, 170, 242, 212, 191, 163, 78, 218, 137, 194, 175, 110, // 15 + 43, 119, 224, 71, 122, 142, 42, 160, 104, 48, 247, 103, 15, 11, 138, 239 // 16 + }; + + char *x = safe_malloc(len); + memcpy(x, x_copy, len); + + int r; + ch = x[0]; // save first byte + for (j = 0 ; j < 8; j++) { + // standard Pearson hash (output is h) + h = 0; + + for (i = 0; i < len; i++) { + r = abs(h ^ x[i]); + h = T[r].a; + } + + hh[j].a = h; // store result + x[0] = x[0] + 1; // increment first data byte by 1 + } + + x[0] = ch; // restore first byte + + safe_free(x); + + // concatenate the 8 stored values of h + /*sprintf(hex,"%02X%02X%02X%02X%02X%02X%02X%02X", + hh[0].a, hh[1].a, + hh[2].a, hh[3].a, + hh[4].a, hh[5].a, + hh[6].a, hh[7].a);*/ + + // For now let me do it this way, but Liu doesn't think this is a good thing + for (int i = 0; i < 8; i++) { + hex = hex << 8; + hex = hex & hh[i].a; + } + + return hex; // output 64-bit 16 hex bytes hash +} diff --git a/src/hash_functions.h b/src/hash_functions.h new file mode 100644 index 0000000..58ef0c9 --- /dev/null +++ b/src/hash_functions.h @@ -0,0 +1,37 @@ +/** + * @file + * Implementation of different hash functions. + */ + +#ifndef HASH_FUNCTIONS_H +#define HASH_FUNCTIONS_H + +/** + * The default hash function. This is the function to be used if the user does not explicitly provide any hash function. It is a basic `mod` operator. + * @param key key to hash + * @param capacity maximum size of the map + * @param len the size of the key passed as the first argument + * @return an offset within the range `[0, capacity)` + */ +size_t hash_map_default_hash_func(const void *key, size_t capacity, int len); + +/** + * The additive hash function. Both the explanation and code are based on [Chasan Chouse](http://www.chasanc.com/old/hashing_func.htm) + * @param key key to hash + * @param capacity maximum size of the map + * @param len the size of the key passed as the first argument + * @return an offset within the range `[0, capacity)` + */ +size_t additive(const void *x, size_t capacity, int len); + +/** + * The Pearson hash function. Both the explanation and code are based on [Wikipedia](http://en.wikipedia.org/wiki/Pearson_hashing) + * @param key key to hash + * @param capacity maximum size of the map + * @param len the size of the key passed as the first argument + * @return an offset within the range `[0, capacity)` + */ +size_t pearson_hash(const void *x, size_t capacity, int len); + + +#endif diff --git a/src/hash_map.c b/src/hash_map.c index 4eca419..3ecd32f 100644 --- a/src/hash_map.c +++ b/src/hash_map.c @@ -2,11 +2,13 @@ #include #include #include +#include #include "memory.h" #include "linked_list.h" #include "hash_map.h" +#include "hash_functions.h" // when "-g" is used redefine inline to static or else the linker will try to link the function and fail (since it's inline) #ifdef TEST @@ -17,11 +19,7 @@ inline int hash_map_default_comparator(const void *l, const void *r) { return *((unsigned long *) l) - *((unsigned long *) r); } -inline size_t hash_map_default_hash_func(const void *key, size_t capacity) { - return *((size_t *) key) % capacity; -} - -void hash_map_init(hash_map *map, size_t capacity, hash_map_comparator comparator, hash_map_hash_func hash_func) { +void hash_map_init(hash_map *map, size_t capacity, hash_map_comparator comparator, hash_map_hash_func hash_func, hash_map_key_size key_size) { map->capacity = capacity; map->size = 0; @@ -40,6 +38,8 @@ void hash_map_init(hash_map *map, size_t capacity, hash_map_comparator comparato map->hash_func = hash_map_default_hash_func; } + map->key_size = key_size; + map->keys = (linked_list *) safe_malloc(sizeof(linked_list)); // No free_data func here because keys will be free'd by linked_list_free for **table linked_list_init(map->keys, NULL); @@ -60,7 +60,7 @@ void hash_map_free(hash_map *map) { } void *hash_map_get(hash_map *map, void *key) { - linked_list *list = map->table[map->hash_func(key, map->capacity)]; + linked_list *list = map->table[map->hash_func(key, map->capacity, map->key_size(key))]; if (!list) { return NULL; @@ -82,12 +82,13 @@ void *hash_map_get(hash_map *map, void *key) { } void hash_map_put(hash_map *map, void *key, void *value) { - linked_list *list = map->table[map->hash_func(key, map->capacity)]; + linked_list *list = map->table[map->hash_func(key, map->capacity, map->key_size(key))]; if (!list) { list = (linked_list *) safe_malloc(sizeof(linked_list)); + linked_list_init(list, (linked_list_destructor) safe_free); - map->table[map->hash_func(key, map->capacity)] = list; + map->table[map->hash_func(key, map->capacity, map->key_size(key))] = list; } linked_list_node *head = linked_list_head(list); @@ -118,7 +119,7 @@ void hash_map_put(hash_map *map, void *key, void *value) { } void hash_map_remove(hash_map *map, void *key) { - size_t offset = map->hash_func(key, map->capacity); + size_t offset = map->hash_func(key, map->capacity, map->key_size(key)); linked_list *list = map->table[offset]; if (!list) { @@ -175,7 +176,7 @@ void hash_map_clear(hash_map *map) { } bool hash_map_contains_key(hash_map *map, void *key) { - linked_list *list = map->table[map->hash_func(key, map->capacity)]; + linked_list *list = map->table[map->hash_func(key, map->capacity, map->key_size(key))]; if (!list) { return false; diff --git a/src/hash_map.h b/src/hash_map.h index e697487..4b59d87 100644 --- a/src/hash_map.h +++ b/src/hash_map.h @@ -22,9 +22,16 @@ typedef int (*hash_map_comparator)(const void *l, const void *r); * Hash function * @param key key to hash * @param capacity maximum size of the map + * @param len the size of the key passed as the first argument * @return an offset within the range `[0, capacity)` */ -typedef size_t (*hash_map_hash_func)(const void *key, size_t capacity); +typedef size_t (*hash_map_hash_func)(const void *key, size_t capacity, int len); + +/** + * Size function to determine the size of each keys in the hash function + * @return the size of each key` + */ +typedef size_t (*hash_map_key_size)(); /** * Hash map object @@ -40,6 +47,8 @@ typedef struct { hash_map_comparator comparator; /** Key hash function */ hash_map_hash_func hash_func; + /** Key size function */ + hash_map_key_size key_size; /** Keys */ linked_list *keys; } hash_map; @@ -61,7 +70,7 @@ typedef struct { * @param comparator key comparator function * @param hash_func key hash function */ -void hash_map_init(hash_map *map, size_t capacity, hash_map_comparator comparator, hash_map_hash_func hash_func); +void hash_map_init(hash_map *map, size_t capacity, hash_map_comparator comparator, hash_map_hash_func hash_func, hash_map_key_size); /** * Free the hash map. diff --git a/src/memory.c b/src/memory.c index 257ae4d..b6c5d05 100644 --- a/src/memory.c +++ b/src/memory.c @@ -1,5 +1,4 @@ #include "memory.h" - #include #include diff --git a/test/test_hash_map.c b/test/test_hash_map.c index 8740c7f..36a81ca 100644 --- a/test/test_hash_map.c +++ b/test/test_hash_map.c @@ -2,160 +2,256 @@ #include #include "hash_map.h" +#include "hash_functions.h" #include "linked_list.h" #include "memory.h" #include -static hash_map *map; +static hash_map **hash_map_array; +const int hash_map_array_size = 2; extern int __malloc_counter; void setUp() { - map = safe_malloc(sizeof(hash_map)); - hash_map_init(map, 1000, (hash_map_comparator) strcmp, NULL); + hash_map_array = (hash_map **) safe_malloc(sizeof(hash_map *) * hash_map_array_size); + // hash_map_array[0] : using default hash function + // hash_map_array[1] : using Pearson hash function + + for (int i = 0; i < hash_map_array_size; i++) { + hash_map_array[i] = safe_malloc(sizeof(hash_map)); + } + + hash_map_init(hash_map_array[0], 1000, (hash_map_comparator) strcmp, NULL, strlen); + hash_map_init(hash_map_array[1], 1000, (hash_map_comparator) strcmp, pearson_hash, strlen); } void test_size() { - TEST_ASSERT_EQUAL_UINT(0, hash_map_size(map)); + hash_map *this_hash_map; - hash_map_put(map, "key", "value"); - TEST_ASSERT_EQUAL_UINT(1, hash_map_size(map)); + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - hash_map_put(map, "key2", "value"); - TEST_ASSERT_EQUAL_UINT(2, hash_map_size(map)); + TEST_ASSERT_EQUAL_UINT(0, hash_map_size(this_hash_map)); - // if the same key was updated, size should not change - hash_map_put(map, "key", "value2"); - TEST_ASSERT_EQUAL_UINT(2, hash_map_size(map)); + hash_map_put(this_hash_map, "key", "value"); + TEST_ASSERT_EQUAL_UINT(1, hash_map_size(this_hash_map)); - // if hashs collide, size should still work - hash_map_put(map, "1234567890", "9090"); - hash_map_put(map, "1234567809", "0909"); - TEST_ASSERT_EQUAL_UINT(4, hash_map_size(map)); + hash_map_put(this_hash_map, "key2", "value"); + TEST_ASSERT_EQUAL_UINT(2, hash_map_size(this_hash_map)); - hash_map_remove(map, "key"); - hash_map_remove(map, "key2"); - hash_map_remove(map, "1234567890"); - hash_map_remove(map, "1234567809"); - TEST_ASSERT_EQUAL_UINT(0, hash_map_size(map)); + // if hashs collide, size should still work + hash_map_put(this_hash_map, "1234567890", "9090"); + hash_map_put(this_hash_map, "1234567809", "0909"); + TEST_ASSERT_EQUAL_UINT(4, hash_map_size(this_hash_map)); + + hash_map_remove(this_hash_map, "key"); + hash_map_remove(this_hash_map, "key2"); + hash_map_remove(this_hash_map, "1234567890"); + hash_map_remove(this_hash_map, "1234567809"); + TEST_ASSERT_EQUAL_UINT(0, hash_map_size(this_hash_map)); + } } void test_put_get() { - hash_map_put(map, "key", "value"); - TEST_ASSERT_EQUAL_STRING("value", (char *) hash_map_get(map, "key")); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - hash_map_put(map, "key", "value2"); - TEST_ASSERT_EQUAL_STRING("value2", (char *) hash_map_get(map, "key")); + hash_map_put(this_hash_map, "key", "value"); + TEST_ASSERT_EQUAL_STRING("value", (char *) hash_map_get(this_hash_map, "key")); - hash_map_put(map, "key2", "value3"); - TEST_ASSERT_EQUAL_STRING("value3", (char *) hash_map_get(map, "key2")); + hash_map_put(this_hash_map, "key", "value2"); + TEST_ASSERT_EQUAL_STRING("value2", (char *) hash_map_get(this_hash_map, "key")); + + hash_map_put(this_hash_map, "key2", "value3"); + TEST_ASSERT_EQUAL_STRING("value3", (char *) hash_map_get(this_hash_map, "key2")); + } } void test_get_invalid_key() { - TEST_ASSERT_NULL(hash_map_get(map, "invalid_key")); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; + + TEST_ASSERT_NULL(hash_map_get(this_hash_map, "invalid_key")); + } +} + +int int_size() { + return sizeof(uint64_t); } + void test_default_hash_func() { uint64_t i1 = 1, i2 = 1001; - // these two should collide because of the % - TEST_ASSERT_EQUAL_UINT(map->hash_func(&i1, map->capacity), - map->hash_func(&i2, map->capacity)); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - // these two hash obtained by default hash_func should be equal (since the first 8-byte is the same) - TEST_ASSERT_EQUAL_UINT(map->hash_func("1234567890", map->capacity), - map->hash_func("1234567809", map->capacity)); + // these two should collide because of the % + TEST_ASSERT_EQUAL_UINT(this_hash_map->hash_func(&i1, this_hash_map->capacity, int_size()), + this_hash_map->hash_func(&i2, this_hash_map->capacity, int_size())); + + // these two hash obtained by default hash_func should be equal (since the first 8-byte is the same) + TEST_ASSERT_EQUAL_UINT(this_hash_map->hash_func("1234567890", this_hash_map->capacity, int_size()), + this_hash_map->hash_func("1234567809", this_hash_map->capacity, int_size())); + } } void test_collision() { - // these two would collide and chaining should come into play - hash_map_put(map, "1234567890", "9090"); - hash_map_put(map, "1234567809", "0909"); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - TEST_ASSERT_EQUAL_STRING("9090", hash_map_get(map, "1234567890")); - TEST_ASSERT_EQUAL_STRING("0909", hash_map_get(map, "1234567809")); + // these two would collide and chaining should come into play + hash_map_put(this_hash_map, "1234567890", "9090"); + hash_map_put(this_hash_map, "1234567809", "0909"); + + TEST_ASSERT_EQUAL_STRING("9090", hash_map_get(this_hash_map, "1234567890")); + TEST_ASSERT_EQUAL_STRING("0909", hash_map_get(this_hash_map, "1234567809")); + } } void test_put_remove() { - hash_map_put(map, "abcd", "the alphabet"); - hash_map_put(map, "1234", "some numbers"); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - hash_map_remove(map, "abcd"); - hash_map_remove(map, "1234"); + hash_map_put(this_hash_map, "abcd", "the alphabet"); + hash_map_put(this_hash_map, "1234", "some numbers"); - TEST_ASSERT_NULL(hash_map_get(map, "abcd")); - TEST_ASSERT_NULL(hash_map_get(map, "1234")); + hash_map_remove(this_hash_map, "abcd"); + hash_map_remove(this_hash_map, "1234"); - hash_map_put(map, "abcd", "try it again"); - TEST_ASSERT_EQUAL_STRING("try it again", hash_map_get(map, "abcd")); + TEST_ASSERT_NULL(hash_map_get(this_hash_map, "abcd")); + TEST_ASSERT_NULL(hash_map_get(this_hash_map, "1234")); + + hash_map_put(this_hash_map, "abcd", "try it again"); + TEST_ASSERT_EQUAL_STRING("try it again", hash_map_get(this_hash_map, "abcd")); + } } void test_remove_non_existent() { - hash_map_remove(map, "not here"); - TEST_ASSERT_EQUAL_UINT(0, hash_map_size(map)); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; + + hash_map_remove(this_hash_map, "not here"); + TEST_ASSERT_EQUAL_UINT(0, hash_map_size(this_hash_map)); + } } void test_clear() { - hash_map_put(map, "key", "value"); - hash_map_put(map, "key2", "value2"); - hash_map_put(map, "key3", "value3"); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - hash_map_clear(map); + hash_map_put(this_hash_map, "key", "value"); + hash_map_put(this_hash_map, "key2", "value2"); + hash_map_put(this_hash_map, "key3", "value3"); - TEST_ASSERT_EQUAL_UINT(0, hash_map_size(map)); + hash_map_clear(this_hash_map); - TEST_ASSERT_NULL(hash_map_get(map, "key")); - TEST_ASSERT_NULL(hash_map_get(map, "key2")); - TEST_ASSERT_NULL(hash_map_get(map, "key3")); + TEST_ASSERT_EQUAL_UINT(0, hash_map_size(this_hash_map)); + + TEST_ASSERT_NULL(hash_map_get(this_hash_map, "key")); + TEST_ASSERT_NULL(hash_map_get(this_hash_map, "key2")); + TEST_ASSERT_NULL(hash_map_get(this_hash_map, "key3")); + } } void test_keys() { char *keys[] = { "key", "keys2", "1234567890", "1234567809" }; char *values[] = { "value", "value2", "9090", "0909" }; - linked_list *keys_list = hash_map_keys(map); - TEST_ASSERT_EQUAL_UINT(0, linked_list_size(keys_list)); + hash_map *this_hash_map; - for (int i = 0; i < sizeof(keys) / sizeof(*keys); i++) { - hash_map_put(map, keys[i], values[i]); - } + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - keys_list = hash_map_keys(map); - TEST_ASSERT_EQUAL_UINT(4, linked_list_size(keys_list)); + linked_list *keys_list = hash_map_keys(this_hash_map); + TEST_ASSERT_EQUAL_UINT(0, linked_list_size(keys_list)); - linked_list_node *node = linked_list_head(keys_list); - for (int i = 0; i < sizeof(keys) / sizeof(*keys); i++) { - TEST_ASSERT_EQUAL_STRING(keys[i], node->data); - node = node->next; + for (int i = 0; i < sizeof(keys) / sizeof(*keys); i++) { + hash_map_put(this_hash_map, keys[i], values[i]); + } + + keys_list = hash_map_keys(this_hash_map); + TEST_ASSERT_EQUAL_UINT(4, linked_list_size(keys_list)); + + linked_list_node *node = linked_list_head(keys_list); + for (int i = 0; i < sizeof(keys) / sizeof(*keys); i++) { + TEST_ASSERT_EQUAL_STRING(keys[i], node->data); + node = node->next; + } } } void test_contains_key_empty_map() { - TEST_ASSERT_FALSE(hash_map_contains_key(map, "no keys in map")); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; + + TEST_ASSERT_FALSE(hash_map_contains_key(this_hash_map, "no keys in map")); + } } void test_contains_key_null_key() { - hash_map_put(map, "null key", NULL); - TEST_ASSERT_TRUE(hash_map_contains_key(map, "null key")); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; + + hash_map_put(this_hash_map, "null key", NULL); + TEST_ASSERT_TRUE(hash_map_contains_key(this_hash_map, "null key")); + } } void test_contains_key_nonexistent_key() { - TEST_ASSERT_FALSE(hash_map_contains_key(map, "not here")); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; + + TEST_ASSERT_FALSE(hash_map_contains_key(this_hash_map, "not here")); + } } void test_contains_key_multiple() { - hash_map_put(map, "key", "value"); - hash_map_put(map, "key2", "value2"); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; - TEST_ASSERT_TRUE(hash_map_contains_key(map, "key")); - TEST_ASSERT_TRUE(hash_map_contains_key(map, "key2")); + hash_map_put(this_hash_map, "key", "value"); + hash_map_put(this_hash_map, "key2", "value2"); + + TEST_ASSERT_TRUE(hash_map_contains_key(this_hash_map, "key")); + TEST_ASSERT_TRUE(hash_map_contains_key(this_hash_map, "key2")); + } } void tearDown() { - hash_map_free(map); + hash_map *this_hash_map; + + for (int i = 0; i < hash_map_array_size; i++) { + this_hash_map = hash_map_array[i]; + hash_map_free(this_hash_map); + + } + safe_free(hash_map_array); TEST_ASSERT_EQUAL_INT(__malloc_counter, 0); }