Skip to content
Open

MVP #177

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions hashtable/class_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# lorem ipsum
my_arr = ["Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit" ]

# search for an element, for example consectetur

# O(n)
# for word in my_arr:
# if word == 'consectetur':
# return True

# if 'elit' in my_arr:
# return True

# O(log n)
# sort array, then run binary search on it

# what if we could find the index of the element in O(1) time?
# then we could take 1 more step to access the element: my_arr[5]

# we would have O(1) search

# we would like a function that returns the index

# Hash function or hashing function

## do you have to track where you've put things in the underlying array?


# Hash functions
# Write a function that takes a string and turns it into a number

# hash the string with a hashing function....and you get back a hash

my_arr = [None] * 8

# it's fast
# deterministic
# can't get the input from the output

def len_hash(s):
return len(s) # for this example, we will use the length of the word as the index

# Use the hashing function to put the word 'hello' into the array
hello_number = len_hash('hello') # use hashing function to get an index
my_arr[hello_number] = 'hello'


## some time passes...
hello_number = len_hash('hello') # use hashing function to find the index
my_arr[hello_number] # pull out the word we want

# what about words of the same length?
world_number = len_hash('world')
my_arr[world_number] = 'world'


world_number = len_hash('world')
my_arr[world_number]

# what about long words?
long_word = 'supercalifragilisticexpialidocious'
long_word_hash = len_hash(long_word)

long_word_idx = long_word_hash % len(my_arr)

my_arr[long_word_idx] = long_word

## how to fix this?
### dynamic array?

### use modulo, aka 'mod the number'


# the problem with arrays: search is slow
# How to get faster?
# To reach O(1), make a magic function to return the index of the target word in O(1) time
# made simple hash function
# make the hash function and array play nice together

# Let's improve our hash function, by making it more unique

## add up the letters
### assign a number to every letter
### ASCII has already done this

def add_hash(s):
total = 0
for letter in s:
total += ord(letter)
return total

### won't work for anagrams!
#### dad vs add

# UTF-8, ASCII on steroids
# encode
def utf8_hash(s):
total = 0
string_bytes = s.encode()

for b in string_bytes:
total += b
return total

# we can do math on the bytes of the string!

my_arr = [None] * 10000

def put(key, value):
# turn the key into an index
hashed_string = utf8_hash(key)
idx = hashed_string % len(my_arr)

# put the value at that index in our array
my_arr[idx] = value

put('hello', 'hello world')

# what is the time complexity here?
## if you measure by the length of the key, O(n)
## if you measure by the number of slots / length of array, then it's O(1)

def get(s):
hashed_string = utf8_hash(s) # turn string into number

idx = hashed_string % len(my_arr) # turn number into index

value = my_arr[idx] # go and access element at that index

return value

get('hello') ## get the key


# Delete: find the value, then set to None

# Put
## 1. Hash our string/key, get out a number
## 2. Take this number and modulo it by the length of the array
## 3. This new number can be used as an index, so put the value at that index in our array

# Get
## 1. Hash our string/key, string --> number
## 2. Mod this number by length of array
## 3. Use this modded number / index to get the value there




## Common use-cases?
### hashing functions: encryption
### Fast O(1) lookup of values using a key to find it

## Easy to think about time complexity for arrays vs objects/dictionaries

# if x in my_data_structure: ## O(n) for an array, runs get() --> O(1) for a hash table

# look up user profile from username, 1billion users




# Couldn't we end up with the wrong modulo if we've increased the size of the array between put and get?
# Increasing the size of the array which we're using with our hash table?
# Solving collisions??
### TO BE CONTINUED....
70 changes: 67 additions & 3 deletions hashtable/hashtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ class HashTable:

def __init__(self, capacity):
# Your code here
self.capacity = capacity

self.data = [None]* capacity
self.item_count= 0


def get_num_slots(self):
Expand All @@ -35,7 +39,7 @@ def get_num_slots(self):
Implement this.
"""
# Your code here

return len(self.capacity)

def get_load_factor(self):
"""
Expand All @@ -44,7 +48,7 @@ def get_load_factor(self):
Implement this.
"""
# Your code here

return self.capacity

def fnv1(self, key):
"""
Expand All @@ -63,6 +67,10 @@ def djb2(self, key):
Implement this, and/or FNV-1.
"""
# Your code here
hash =5381
for c in key:
hash = (hash* 33)+ ord(c)
return hash


def hash_index(self, key):
Expand All @@ -82,7 +90,21 @@ def put(self, key, value):
Implement this.
"""
# Your code here

index = self.hash_index(key)
current = self.data[index]

while current is not None:
if current.key == key:
current.value = value
return
current = current.next
self.item_count += 1
new_node = HashTableEntry(key, value)
new_node.next = self.data[index]
self.data[index] = new_node

if self.get_load_factor() > 0.7:
self.resize(self.capacity * 2)

def delete(self, key):
"""
Expand All @@ -93,6 +115,29 @@ def delete(self, key):
Implement this.
"""
# Your code here
index = self.hash_index(key)
# Check if there is any entry in the slot
if self.data[index] is not None:
# If so, check if that item matches
current = self.data[index]
if current.key == key:
self.item_count -= 1
self.data[index] = current.next
if self.get_load_factor() < 0.2:
self.resize(self.capacity // 2)
return

while current.next is not None:
if current.next.key == key:
self.item_count -= 1
current.next = current.next.next
if self.get_load_factor() < 0.2:
self.resize(self.capacity // 2)
return
current = current.next

print('No entry with the provided key.')
return


def get(self, key):
Expand All @@ -104,6 +149,15 @@ def get(self, key):
Implement this.
"""
# Your code here
index = self.hash_index(key)
current = self.data[index]

while current is not None:
if current.key == key:
return current.value
current = current.next

return None


def resize(self, new_capacity):
Expand All @@ -114,6 +168,16 @@ def resize(self, new_capacity):
Implement this.
"""
# Your code here
new_ht = HashTable(new_capacity)
for item in self.data:
current = item
while current is not None:
new_ht.put(current.key, current.value)
current = current.next

self.capacity = new_ht.capacity
self.data = new_ht.data
self.item_count = new_ht.item_count



Expand Down