diff --git a/.gitignore b/.gitignore index ed8ebf583..4341d1cd0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -__pycache__ \ No newline at end of file +__pycache__ +/Pipfile +/Pipfile.lock diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 000000000..e7e9d11d4 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,2 @@ +# Default ignored files +/workspace.xml diff --git a/.idea/Hash-Tables.iml b/.idea/Hash-Tables.iml new file mode 100644 index 000000000..01c5eb368 --- /dev/null +++ b/.idea/Hash-Tables.iml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module type="PYTHON_MODULE" version="4"> + <component name="NewModuleRootManager"> + <content url="file://$MODULE_DIR$" /> + <orderEntry type="jdk" jdkName="Pipenv (Hash-Tables)" jdkType="Python SDK" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> +</module> \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..105ce2da2 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ +<component name="InspectionProjectProfileManager"> + <settings> + <option name="USE_PROJECT_PROFILE" value="false" /> + <version value="1.0" /> + </settings> +</component> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..dc841e568 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectRootManager" version="2" project-jdk-name="Pipenv (Hash-Tables)" project-jdk-type="Python SDK" /> +</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..f61a7ebb3 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/Hash-Tables.iml" filepath="$PROJECT_DIR$/.idea/Hash-Tables.iml" /> + </modules> + </component> +</project> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..94a25f7f4 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="$PROJECT_DIR$" vcs="Git" /> + </component> +</project> \ No newline at end of file diff --git a/dynamic_array/dynamic_array.py b/dynamic_array/dynamic_array.py new file mode 100644 index 000000000..af3f2f835 --- /dev/null +++ b/dynamic_array/dynamic_array.py @@ -0,0 +1,46 @@ +class DynamicArray: + def __init__(self, capacity=8): + self.count = 0 + self.capacity = capacity + self.storage = [None] * self.capacity + + def insert(self, index, value): + if self.count == self.capacity: + # increase size + print("ERROR:, Array is full") + return + + if index >= self.count: + print('Error: Index out of bounds') + return + + for i in range(self.count, index -1): + self.storage[i] = self.storage[i-1] + + self.storage[index] = value + self.count += 1 + + def append(self, value): + if self.count == self.capacity: + # increase size + print("ERROR:, Array is full") + return + + # self.count += 1 + # # account for zero index... - 1 + # self.storage[self.count - 1] = value + # + # same as above + self.storage[self.count] = value + self.count += 1 + + # Double the size + def double_size(self): + self.capacity *= 2 + new_storage = [None] * self.capacity + + for i in range(self.count): + new_storage[i] = self.storage[i] + + # change pointer + self.storage = new_storage diff --git a/hashes/hashes.py b/hashes/hashes.py new file mode 100644 index 000000000..9cce23eb9 --- /dev/null +++ b/hashes/hashes.py @@ -0,0 +1,14 @@ +import hashlib + +# b string - bytes string - strips away -- .encode() works too +key = b"str" +my_string = "normal string" + +for i in range (10): + hashed = hashlib.sha256(key).hexdigest() + print(hashed) + + +# for i in range(10): +# hashed = hash(key) +# print(hashed % 8)?????? diff --git a/hashes1/hashesClass.py b/hashes1/hashesClass.py new file mode 100644 index 000000000..a7c427c18 --- /dev/null +++ b/hashes1/hashesClass.py @@ -0,0 +1,14 @@ +import hashlib + +# b string - bytes string - strips away -- .encode() works too +key = b"str" +my_string = "normal string".encode() + +# for i in range (10): +# hashed = hashlib.sha256(key).hexdigest() +# print(hashed) + + +for i in range(10): + hashed = hash(key) + print(hashed % 8) diff --git a/src/collisions.py b/src/collisions.py new file mode 100644 index 000000000..3ca459fd5 --- /dev/null +++ b/src/collisions.py @@ -0,0 +1,42 @@ + + +import random + + +def how_many_before_collision(buckets, loops=1): + for i in range(loops): + tries = 0 + tried = set() + while True: + random_key = str(random.random()) + hash_index = hash(random_key) % buckets + if hash_index not in tried: + tried.add(hash_index) + tries += 1 + + else: + break + + print(f"{buckets} buckets, {tries} hashes before collision. ({tries / buckets * 100:.1f})") + +how_many_before_collision(32, 10) + + +def longest_linked_list_chain(keys, buckets, loops=10): + for i in range(loops): + key_counts = {} + + for i in range(buckets): + key_counts[i] = 0 + + for i in range(keys): + random_key = str(random.random()) + hash_index = hash(random_key) % buckets + key_counts[hash_index] += 1 + + largest_number = 0 + for key in key_counts: + if key_counts[key] > largest_number: + largest_number = key + + print(f"Longest linked list chain for {keys} keys in {buckets}(Load Factor: {keys/ buckets:2f}: {largest_number}") \ No newline at end of file diff --git a/src/hashtable.py b/src/hashtable.py index a68c6ea1c..ebb6773cb 100644 --- a/src/hashtable.py +++ b/src/hashtable.py @@ -1,6 +1,9 @@ # ''' # Linked List hash table key/value pair # ''' + +from dynamic_array import dynamic_array + class LinkedPair: def __init__(self, key, value): self.key = key @@ -16,15 +19,18 @@ def __init__(self, capacity): self.capacity = capacity # Number of buckets in the hash table self.storage = [None] * capacity - + # _ means private function, should not be used outside of class, but you can def _hash(self, key): ''' Hash an arbitrary key and return an integer. You may replace the Python hash with DJB2 as a stretch goal. ''' - return hash(key) + x = hash(key) + print('x', x) + return x + # _hash(3,'8') def _hash_djb2(self, key): ''' @@ -43,6 +49,18 @@ def _hash_mod(self, key): return self._hash(key) % self.capacity + # if self.capacity is not FULL: + # if self.capacity: + # current_next = self.next + # self.next = LinkedPair(value, self, current_next) + # else: + # self.key = key + # print('k',self.key) + # self.value = value + # print('v',self.value) + # + # print(insert(3, 3, 5)) + def insert(self, key, value): ''' Store the value with the given key. @@ -51,9 +69,16 @@ def insert(self, key, value): Fill this in. ''' - pass + index = self._hash_mod(key) + + if self.storage[index] is not None: + print(f"WARNING: Collusion has occured at {index}") + + else: + self.storage[index] = (key, value) + return def remove(self, key): ''' @@ -63,8 +88,18 @@ def remove(self, key): Fill this in. ''' - pass + index = self._hash_mod(key) + + if self.storage[index] is not None: + if self.storage[index][0] == key: + self.storage[index] = None + else: + print(f"WARNING: Collusion has occured at {index}") + else: + print(f"Warning key ({key}) not found") + + return def retrieve(self, key): ''' @@ -74,8 +109,18 @@ def retrieve(self, key): Fill this in. ''' - pass + index = self._hash_mod(key) + if self.storage[index] is not None: + if self.storage[index][0] == key: + return self.storage[index][1] + else: + print(f"WARNING: Collusion has occured at {index}") + + else: + return None + + return def resize(self): ''' @@ -84,8 +129,12 @@ def resize(self): Fill this in. ''' - pass + old_storage = self.storage + self.capacity *= 2 + self.storage = [None] * self.capacity + for item in old_storage: + self.insert(item[0], item[1]) if __name__ == "__main__": diff --git a/src/test_hashtable.py b/src/test_hashtable.py index b5a4d8194..d089d8295 100644 --- a/src/test_hashtable.py +++ b/src/test_hashtable.py @@ -87,110 +87,116 @@ def test_hash_table_insertion_overwrites_correctly(self): self.assertTrue(return_value == "new-val-9") def test_hash_table_removes_correctly(self): - ht = HashTable(8) - - ht.insert("key-0", "val-0") - ht.insert("key-1", "val-1") - ht.insert("key-2", "val-2") - ht.insert("key-3", "val-3") - ht.insert("key-4", "val-4") - ht.insert("key-5", "val-5") - ht.insert("key-6", "val-6") - ht.insert("key-7", "val-7") - ht.insert("key-8", "val-8") - ht.insert("key-9", "val-9") - - return_value = ht.retrieve("key-0") - self.assertTrue(return_value == "val-0") - return_value = ht.retrieve("key-1") - self.assertTrue(return_value == "val-1") - return_value = ht.retrieve("key-2") - self.assertTrue(return_value == "val-2") - return_value = ht.retrieve("key-3") - self.assertTrue(return_value == "val-3") - return_value = ht.retrieve("key-4") - self.assertTrue(return_value == "val-4") - return_value = ht.retrieve("key-5") - self.assertTrue(return_value == "val-5") - return_value = ht.retrieve("key-6") - self.assertTrue(return_value == "val-6") - return_value = ht.retrieve("key-7") - self.assertTrue(return_value == "val-7") - return_value = ht.retrieve("key-8") - self.assertTrue(return_value == "val-8") - return_value = ht.retrieve("key-9") - self.assertTrue(return_value == "val-9") - - ht.remove("key-9") - ht.remove("key-8") - ht.remove("key-7") - ht.remove("key-6") - ht.remove("key-5") - ht.remove("key-4") - ht.remove("key-3") - ht.remove("key-2") - ht.remove("key-1") - ht.remove("key-0") - - return_value = ht.retrieve("key-0") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-1") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-2") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-3") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-4") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-5") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-6") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-7") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-8") - self.assertTrue(return_value is None) - return_value = ht.retrieve("key-9") - self.assertTrue(return_value is None) - - def test_hash_table_resize(self): - ht = HashTable(8) - - ht.insert("key-0", "val-0") - ht.insert("key-1", "val-1") - ht.insert("key-2", "val-2") - ht.insert("key-3", "val-3") - ht.insert("key-4", "val-4") - ht.insert("key-5", "val-5") - ht.insert("key-6", "val-6") - ht.insert("key-7", "val-7") - ht.insert("key-8", "val-8") - ht.insert("key-9", "val-9") - - ht.resize() - - self.assertTrue(len(ht.storage) == 16) - - return_value = ht.retrieve("key-0") - self.assertTrue(return_value == "val-0") - return_value = ht.retrieve("key-1") - self.assertTrue(return_value == "val-1") - return_value = ht.retrieve("key-2") - self.assertTrue(return_value == "val-2") - return_value = ht.retrieve("key-3") - self.assertTrue(return_value == "val-3") - return_value = ht.retrieve("key-4") - self.assertTrue(return_value == "val-4") - return_value = ht.retrieve("key-5") - self.assertTrue(return_value == "val-5") - return_value = ht.retrieve("key-6") - self.assertTrue(return_value == "val-6") - return_value = ht.retrieve("key-7") - self.assertTrue(return_value == "val-7") - return_value = ht.retrieve("key-8") - self.assertTrue(return_value == "val-8") - return_value = ht.retrieve("key-9") - self.assertTrue(return_value == "val-9") + ht1 = HashTable(8) + + ht1.insert("key1","Hello") + ht1.insert("key2", "goodbybe") + ht1.remove("key1") + + print(ht1.storage) + # + # ht.insert("key-0", "val-0") + # ht.insert("key-1", "val-1") + # ht.insert("key-2", "val-2") + # ht.insert("key-3", "val-3") + # ht.insert("key-4", "val-4") + # ht.insert("key-5", "val-5") + # ht.insert("key-6", "val-6") + # ht.insert("key-7", "val-7") + # ht.insert("key-8", "val-8") + # ht.insert("key-9", "val-9") + # + # return_value = ht.retrieve("key-0") + # self.assertTrue(return_value == "val-0") + # return_value = ht.retrieve("key-1") + # self.assertTrue(return_value == "val-1") + # return_value = ht.retrieve("key-2") + # self.assertTrue(return_value == "val-2") + # return_value = ht.retrieve("key-3") + # self.assertTrue(return_value == "val-3") + # return_value = ht.retrieve("key-4") + # self.assertTrue(return_value == "val-4") + # return_value = ht.retrieve("key-5") + # self.assertTrue(return_value == "val-5") + # return_value = ht.retrieve("key-6") + # self.assertTrue(return_value == "val-6") + # return_value = ht.retrieve("key-7") + # self.assertTrue(return_value == "val-7") + # return_value = ht.retrieve("key-8") + # self.assertTrue(return_value == "val-8") + # return_value = ht.retrieve("key-9") + # self.assertTrue(return_value == "val-9") + # + # ht.remove("key-9") + # ht.remove("key-8") + # ht.remove("key-7") + # ht.remove("key-6") + # ht.remove("key-5") + # ht.remove("key-4") + # ht.remove("key-3") + # ht.remove("key-2") + # ht.remove("key-1") + # ht.remove("key-0") + # + # return_value = ht.retrieve("key-0") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-1") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-2") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-3") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-4") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-5") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-6") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-7") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-8") + # self.assertTrue(return_value is None) + # return_value = ht.retrieve("key-9") + # self.assertTrue(return_value is None) + # + # def test_hash_table_resize(self): + # ht = HashTable(8) + # + # ht.insert("key-0", "val-0") + # ht.insert("key-1", "val-1") + # ht.insert("key-2", "val-2") + # ht.insert("key-3", "val-3") + # ht.insert("key-4", "val-4") + # ht.insert("key-5", "val-5") + # ht.insert("key-6", "val-6") + # ht.insert("key-7", "val-7") + # ht.insert("key-8", "val-8") + # ht.insert("key-9", "val-9") + # + # ht.resize() + # + # self.assertTrue(len(ht.storage) == 16) + # + # return_value = ht.retrieve("key-0") + # self.assertTrue(return_value == "val-0") + # return_value = ht.retrieve("key-1") + # self.assertTrue(return_value == "val-1") + # return_value = ht.retrieve("key-2") + # self.assertTrue(return_value == "val-2") + # return_value = ht.retrieve("key-3") + # self.assertTrue(return_value == "val-3") + # return_value = ht.retrieve("key-4") + # self.assertTrue(return_value == "val-4") + # return_value = ht.retrieve("key-5") + # self.assertTrue(return_value == "val-5") + # return_value = ht.retrieve("key-6") + # self.assertTrue(return_value == "val-6") + # return_value = ht.retrieve("key-7") + # self.assertTrue(return_value == "val-7") + # return_value = ht.retrieve("key-8") + # self.assertTrue(return_value == "val-8") + # return_value = ht.retrieve("key-9") + # self.assertTrue(return_value == "val-9") if __name__ == '__main__':