forked from xharaken/step2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhash_table.py
More file actions
233 lines (202 loc) · 8.26 KB
/
hash_table.py
File metadata and controls
233 lines (202 loc) · 8.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import random, sys, time
###########################################################################
# #
# Implement a hash table from scratch! (⑅•ᴗ•⑅) #
# #
# Please do not use Python's dictionary or Python's collections library. #
# The goal is to implement the data structure yourself. #
# #
###########################################################################
# Hash function.
#
# |key|: string
# Return value: a hash value
def calculate_hash(key):
assert type(key) == str
# Note: This is not a good hash function. Make it better!
hash = 0
for i in key:
hash += ord(i)
return hash
# An item object that represents one key - value pair in the hash table.
class Item:
# |key|: The key of the item. The key must be a string.
# |value|: The value of the item.
# |next|: The next item in the linked list. If this is the last item in the
# linked list, |next| is None.
def __init__(self, key, value, next):
assert type(key) == str
self.key = key
self.value = value
self.next = next
# The main data structure of the hash table that stores key - value pairs.
# The key must be a string. The value can be any type.
#
# |self.bucket_size|: The bucket size.
# |self.buckets|: An array of the buckets. self.buckets[hash % self.bucket_size]
# stores a linked list of items whose hash value is |hash|.
# |self.item_count|: The total number of items in the hash table.
class HashTable:
# Initialize the hash table.
def __init__(self):
# Set the initial bucket size to 97. A prime number is chosen to reduce
# hash conflicts.
self.bucket_size = 97
self.buckets = [None] * self.bucket_size
self.item_count = 0
# Put an item to the hash table. If the key already exists, the
# corresponding value is updated to a new value.
#
# |key|: The key of the item.
# |value|: The value of the item.
# Return value: True if a new item is added. False if the key already exists
# and the value is updated.
def put(self, key, value):
assert type(key) == str
check_size(self.size(), self.bucket_size) # Don't remove this code.
bucket_index = calculate_hash(key) % self.bucket_size
item = self.buckets[bucket_index]
while item:
if item.key == key:
item.value = value
return False
item = item.next
new_item = Item(key, value, self.buckets[bucket_index])
self.buckets[bucket_index] = new_item
self.item_count += 1
return True
# Get an item from the hash table.
#
# |key|: The key.
# Return value: If the item is found, (the value of the item, True) is
# returned. Otherwise, (None, False) is returned.
def get(self, key):
assert type(key) == str
check_size(self.size(), self.bucket_size) # Don't remove this code.
bucket_index = calculate_hash(key) % self.bucket_size
item = self.buckets[bucket_index]
while item:
if item.key == key:
return (item.value, True)
item = item.next
return (None, False)
# Delete an item from the hash table.
#
# |key|: The key.
# Return value: True if the item is found and deleted successfully. False
# otherwise.
def delete(self, key):
assert type(key) == str
#------------------------#
# Write your code here! #
#------------------------#
pass
# Return the total number of items in the hash table.
def size(self):
return self.item_count
# Check that the hash table has a "reasonable" bucket size.
# The bucket size is judged "reasonable" if it is smaller than 100 or
# the buckets are 30% or more used.
#
# Note: Don't change this function.
def check_size(item_count, bucket_size):
assert (bucket_size < 100 or item_count >= bucket_size * 0.3)
# Test the functional behavior of the hash table.
def functional_test():
hash_table = HashTable()
assert hash_table.put("aaa", 1) == True
assert hash_table.get("aaa") == (1, True)
assert hash_table.size() == 1
assert hash_table.put("bbb", 2) == True
assert hash_table.put("ccc", 3) == True
assert hash_table.put("ddd", 4) == True
assert hash_table.get("aaa") == (1, True)
assert hash_table.get("bbb") == (2, True)
assert hash_table.get("ccc") == (3, True)
assert hash_table.get("ddd") == (4, True)
assert hash_table.get("a") == (None, False)
assert hash_table.get("aa") == (None, False)
assert hash_table.get("aaaa") == (None, False)
assert hash_table.size() == 4
assert hash_table.put("aaa", 11) == False
assert hash_table.get("aaa") == (11, True)
assert hash_table.size() == 4
assert hash_table.delete("aaa") == True
assert hash_table.get("aaa") == (None, False)
assert hash_table.size() == 3
assert hash_table.delete("a") == False
assert hash_table.delete("aa") == False
assert hash_table.delete("aaa") == False
assert hash_table.delete("aaaa") == False
assert hash_table.delete("ddd") == True
assert hash_table.delete("ccc") == True
assert hash_table.delete("bbb") == True
assert hash_table.get("aaa") == (None, False)
assert hash_table.get("bbb") == (None, False)
assert hash_table.get("ccc") == (None, False)
assert hash_table.get("ddd") == (None, False)
assert hash_table.size() == 0
assert hash_table.put("abc", 1) == True
assert hash_table.put("acb", 2) == True
assert hash_table.put("bac", 3) == True
assert hash_table.put("bca", 4) == True
assert hash_table.put("cab", 5) == True
assert hash_table.put("cba", 6) == True
assert hash_table.get("abc") == (1, True)
assert hash_table.get("acb") == (2, True)
assert hash_table.get("bac") == (3, True)
assert hash_table.get("bca") == (4, True)
assert hash_table.get("cab") == (5, True)
assert hash_table.get("cba") == (6, True)
assert hash_table.size() == 6
assert hash_table.delete("abc") == True
assert hash_table.delete("cba") == True
assert hash_table.delete("bac") == True
assert hash_table.delete("bca") == True
assert hash_table.delete("acb") == True
assert hash_table.delete("cab") == True
assert hash_table.size() == 0
# Test the rehashing.
for i in range(100):
hash_table.put(str(i), str(i))
for i in range(100):
assert hash_table.get(str(i)) == (str(i), True)
for i in range(100):
assert hash_table.delete(str(i)) == True
hash_table.put("abc", 1)
hash_table.put("acb", 2)
assert hash_table.get("abc") == (1, True)
assert hash_table.get("acb") == (2, True)
print("Functional tests passed!")
# Test the performance of the hash table.
#
# Your goal is to make the hash table work with mostly O(1).
# If the hash table works with mostly O(1), the execution time of each iteration
# should not depend on the number of items in the hash table. To achieve the
# goal, you will need to 1) implement rehashing (Hint: expand / shrink the hash
# table when the number of items in the hash table hits some threshold) and
# 2) tweak the hash function (Hint: think about ways to reduce hash conflicts).
def performance_test():
hash_table = HashTable()
for iteration in range(100):
begin = time.time()
random.seed(iteration)
for i in range(10000):
rand = random.randint(0, 100000000)
hash_table.put(str(rand), str(rand))
random.seed(iteration)
for i in range(10000):
rand = random.randint(0, 100000000)
hash_table.get(str(rand))
end = time.time()
print("%d %.6f" % (iteration, end - begin))
for iteration in range(100):
random.seed(iteration)
for i in range(10000):
rand = random.randint(0, 100000000)
hash_table.delete(str(rand))
assert hash_table.size() == 0
print("Performance tests passed!")
if __name__ == "__main__":
functional_test()
performance_test()