bloominstituteoftechnology · shyabiswas · Oct 29, 2020 · Oct 29, 2020 · Nov 6, 2020
diff --git a/hashtable/class_example.py b/hashtable/class_example.py
@@ -0,0 +1,166 @@
+# lorem ipsum
+my_arr = ["Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit" ]
+
+# search for an element, for example consectetur
+
+# O(n)
+# for word in my_arr:
+#     if word == 'consectetur':
+#         return True
+
+# if 'elit' in my_arr:
+#     return True 
+
+# O(log n)
+# sort array, then run binary search on it
+
+# what if we could find the index of the element in O(1) time?
+# then we could take 1 more step to access the element: my_arr[5]
+
+# we would have O(1) search
+
+# we would like a function that returns the index
+
+# Hash function or hashing function
+
+## do you have to track where you've put things in the underlying array?
+
+
+# Hash functions
+# Write a function that takes a string and turns it into a number
+
+# hash the string with a hashing function....and you get back a hash
+
+my_arr = [None] * 8
+
+# it's fast
+# deterministic
+# can't get the input from the output
+
+def len_hash(s):
+    return len(s) # for this example, we will use the length of the word as the index
+
+# Use the hashing function to put the word 'hello' into the array
+hello_number = len_hash('hello') # use hashing function to get an index
+my_arr[hello_number] = 'hello'
+
+
+## some time passes...
+hello_number = len_hash('hello') # use hashing function to find the index
+my_arr[hello_number]  # pull out the word we want
+
+# what about words of the same length?
+world_number = len_hash('world')
+my_arr[world_number] = 'world'
+
+
+world_number = len_hash('world')
+my_arr[world_number]
+
+# what about long words?
+long_word = 'supercalifragilisticexpialidocious'
+long_word_hash = len_hash(long_word)
+
+long_word_idx = long_word_hash % len(my_arr)
+
+my_arr[long_word_idx] = long_word
+
+## how to fix this?
+### dynamic array?
+
+### use modulo, aka 'mod the number'
+
+
+# the problem with arrays: search is slow
+# How to get faster?
+# To reach O(1), make a magic function to return the index of the target word in O(1) time
+# made simple hash function
+# make the hash function and array play nice together
+
+# Let's improve our hash function, by making it more unique
+
+## add up the letters
+### assign a number to every letter
+### ASCII has already done this
+
+def add_hash(s):
+    total = 0
+    for letter in s:
+        total += ord(letter)
+    return total
+
+### won't work for anagrams!
+#### dad vs add
+
+# UTF-8, ASCII on steroids
+# encode
+def utf8_hash(s):
+    total = 0
+    string_bytes = s.encode()
+
+    for b in string_bytes:
+        total += b
+    return total
+
+# we can do math on the bytes of the string!
+
+my_arr = [None] * 10000
+
+def put(key, value):
+    # turn the key into an index
+    hashed_string = utf8_hash(key)
+    idx = hashed_string % len(my_arr)
+
+    # put the value at that index in our array
+    my_arr[idx] = value
+
+put('hello', 'hello world')
+
+# what is the time complexity here?
+## if you measure by the length of the key, O(n)
+## if you measure by the number of slots / length of array, then it's O(1)
+
+def get(s):
+    hashed_string = utf8_hash(s) # turn string into number
+
+    idx = hashed_string % len(my_arr) # turn number into index
+
+    value = my_arr[idx] # go and access element at that index
+
+    return value
+
+get('hello') ## get the key
+
+
+# Delete: find the value, then set to None
+
+# Put
+## 1. Hash our string/key, get out a number
+## 2. Take this number and modulo it by the length of the array
+## 3. This new number can be used as an index, so put the value at that index in our array
+
+# Get
+## 1. Hash our string/key, string --> number
+## 2. Mod this number by length of array
+## 3. Use this modded number / index to get the value there
+
+
+
+
+## Common use-cases?
+### hashing functions: encryption
+### Fast O(1) lookup of values using a key to find it
+
+## Easy to think about time complexity for arrays vs objects/dictionaries
+
+# if x in my_data_structure: ## O(n) for an array, runs get() --> O(1) for a hash table
+
+# look up user profile from username, 1billion users
+
+
+
+
+ # Couldn't we end up with the wrong modulo if we've increased the size of the array between put and get?
+ # Increasing the size of the array which we're using with our hash table?
+ # Solving collisions??
+ ### TO BE CONTINUED....
diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py
@@ -22,6 +22,10 @@ class HashTable:
 
     def __init__(self, capacity):
         # Your code here
+        self.capacity = capacity
+
+        self.data = [None]* capacity
+        self.item_count= 0
 
 
     def get_num_slots(self):
@@ -35,7 +39,7 @@ def get_num_slots(self):
         Implement this.
         """
         # Your code here
-
+        return len(self.capacity)
 
     def get_load_factor(self):
         """
@@ -44,7 +48,7 @@ def get_load_factor(self):
         Implement this.
         """
         # Your code here
-
+        return self.capacity
 
     def fnv1(self, key):
         """
@@ -63,6 +67,10 @@ def djb2(self, key):
         Implement this, and/or FNV-1.
         """
         # Your code here
+        hash =5381
+        for c in key:
+            hash = (hash* 33)+ ord(c)
+        return hash
 
 
     def hash_index(self, key):
@@ -82,7 +90,21 @@ def put(self, key, value):
         Implement this.
         """
         # Your code here
-
+        index = self.hash_index(key)
+        current = self.data[index]
+
+        while current is not None:
+            if current.key == key:
+                current.value = value
+                return
+            current = current.next
+        self.item_count += 1
+        new_node = HashTableEntry(key, value)
+        new_node.next = self.data[index]
+        self.data[index] = new_node
+
+        if self.get_load_factor() > 0.7:
+            self.resize(self.capacity * 2)
 
     def delete(self, key):
         """
@@ -93,6 +115,29 @@ def delete(self, key):
         Implement this.
         """
         # Your code here
+        index = self.hash_index(key)
+        # Check if there is any entry in the slot
+        if self.data[index] is not None:
+            # If so, check if that item matches
+            current = self.data[index]
+            if current.key == key:
+                self.item_count -= 1
+                self.data[index] = current.next
+                if self.get_load_factor() < 0.2:
+                    self.resize(self.capacity // 2)
+                return
+
+            while current.next is not None:
+                if current.next.key == key:
+                    self.item_count -= 1
+                    current.next = current.next.next
+                    if self.get_load_factor() < 0.2:
+                        self.resize(self.capacity // 2)
+                    return
+                current = current.next
+
+        print('No entry with the provided key.')
+        return
 
 
     def get(self, key):
@@ -104,6 +149,15 @@ def get(self, key):
         Implement this.
         """
         # Your code here
+        index = self.hash_index(key)
+        current = self.data[index]
+
+        while current is not None:
+            if current.key == key:
+                return current.value
+            current = current.next
+
+        return None
 
 
     def resize(self, new_capacity):
@@ -114,6 +168,16 @@ def resize(self, new_capacity):
         Implement this.
         """
         # Your code here
+        new_ht = HashTable(new_capacity)
+        for item in self.data:
+            current = item
+            while current is not None:
+                new_ht.put(current.key, current.value)
+                current = current.next
+
+        self.capacity = new_ht.capacity
+        self.data = new_ht.data
+        self.item_count = new_ht.item_count