Skip to content

Commit 4d3545d

Browse files
author
icgmilk
committed
Switch hashmap implementation to open addressing
Replace chaining with open addressing using Linear Probing. Set load factor to 50% to balance probe length and performance. This change trades slightly higher memory usage for significantly improved execution time due to better cache locality.
1 parent f265042 commit 4d3545d

File tree

2 files changed

+61
-75
lines changed

2 files changed

+61
-75
lines changed

src/defs.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,13 @@ typedef struct {
100100
typedef struct hashmap_node {
101101
char *key;
102102
void *val;
103-
struct hashmap_node *next;
103+
int state; /* 0: empty, 1: occupied */
104104
} hashmap_node_t;
105105

106106
typedef struct {
107107
int size;
108108
int cap;
109-
hashmap_node_t **buckets;
109+
hashmap_node_t *table;
110110
} hashmap_t;
111111

112112
/* lexer tokens */

src/globals.c

Lines changed: 59 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -414,90 +414,64 @@ hashmap_t *hashmap_create(int cap)
414414

415415
map->size = 0;
416416
map->cap = round_up_pow2(cap);
417-
map->buckets = calloc(map->cap, sizeof(hashmap_node_t *));
417+
map->table = calloc(map->cap, sizeof(hashmap_node_t));
418418

419-
if (!map->buckets) {
420-
printf("Failed to allocate buckets in hashmap_t\n");
419+
if (!map->table) {
420+
printf("Failed to allocate table in hashmap_t\n");
421421
free(map);
422422
return NULL;
423423
}
424424

425425
return map;
426426
}
427427

428-
/* Create a hashmap node on heap.
429-
* @key: The key of node. Must not be NULL.
430-
* @val: The value of node. Could be NULL.
431-
*
432-
* Return: The pointer of created node.
433-
*/
434-
hashmap_node_t *hashmap_node_new(char *key, void *val)
435-
{
436-
if (!key)
437-
return NULL;
438-
439-
const int len = strlen(key);
440-
hashmap_node_t *node = arena_alloc(HASHMAP_ARENA, sizeof(hashmap_node_t));
441-
442-
443-
if (!node) {
444-
printf("Failed to allocate hashmap_node_t\n");
445-
return NULL;
446-
}
447-
448-
node->key = arena_alloc(HASHMAP_ARENA, len + 1);
449-
if (!node->key) {
450-
printf("Failed to allocate hashmap_node_t key with size %d\n", len + 1);
451-
return NULL;
452-
}
453-
454-
strcpy(node->key, key);
455-
node->val = val;
456-
node->next = NULL;
457-
return node;
458-
}
459428

460429
void hashmap_rehash(hashmap_t *map)
461430
{
462431
if (!map)
463432
return;
464433

465434
int old_cap = map->cap;
466-
hashmap_node_t **old_buckets = map->buckets;
435+
hashmap_node_t *old_table = map->table;
467436

468437
map->cap <<= 1;
469-
map->buckets = calloc(map->cap, sizeof(hashmap_node_t *));
438+
map->table = calloc(map->cap, sizeof(hashmap_node_t));
470439

471-
if (!map->buckets) {
472-
printf("Failed to allocate new buckets in hashmap_t\n");
473-
map->buckets = old_buckets;
440+
if (!map->table) {
441+
printf("Failed to allocate new table in hashmap_t\n");
442+
map->table = old_table;
474443
map->cap = old_cap;
475444
return;
476445
}
477446

447+
for (int i = 0; i < map->cap; i++)
448+
map->table[i].state = 0;
449+
450+
map->size = 0;
451+
478452
for (int i = 0; i < old_cap; i++) {
479-
hashmap_node_t *cur = old_buckets[i];
480-
hashmap_node_t *next;
481-
hashmap_node_t *target_cur;
482-
483-
while (cur) {
484-
next = cur->next;
485-
cur->next = NULL;
486-
int index = hashmap_hash_index(map->cap, cur->key);
487-
target_cur = map->buckets[index];
488-
489-
if (!target_cur) {
490-
map->buckets[index] = cur;
491-
} else {
492-
cur->next = target_cur;
493-
map->buckets[index] = cur;
453+
if (old_table[i].state == 1) {
454+
char *key = old_table[i].key;
455+
void *val = old_table[i].val;
456+
457+
int index = hashmap_hash_index(map->cap, key);
458+
int start = index;
459+
460+
while (map->table[index].state == 1) {
461+
index = (index + 1) & (map->cap - 1);
462+
if (index == start) {
463+
printf("Error: New table is full during rehash\n");
464+
abort();
465+
}
494466
}
495467

496-
cur = next;
468+
map->table[index].key = key;
469+
map->table[index].val = val;
470+
map->table[index].state = 1;
471+
map->size++;
497472
}
498473
}
499-
500-
free(old_buckets);
474+
free(old_table);
501475
}
502476

503477
/* Put a key-value pair into given hashmap.
@@ -513,22 +487,28 @@ void hashmap_put(hashmap_t *map, char *key, void *val)
513487
if (!map)
514488
return;
515489

490+
/* Check if size of map exceeds load factor 50% (or 1/2 of capacity) */
491+
if ((map->cap >> 1) <= map->size)
492+
hashmap_rehash(map);
493+
516494
int index = hashmap_hash_index(map->cap, key);
517-
hashmap_node_t *cur = map->buckets[index],
518-
*new_node = hashmap_node_new(key, val);
495+
int start = index;
519496

520-
if (!cur) {
521-
map->buckets[index] = new_node;
522-
} else {
523-
while (cur->next)
524-
cur = cur->next;
525-
cur->next = new_node;
497+
while (map->table[index].state == 1) {
498+
if (strcmp(map->table[index].key, key) == 0) {
499+
map->table[index].val = val;
500+
return;
501+
}
502+
503+
index = (index + 1) & (map->cap - 1);
504+
if (index == start)
505+
return;
526506
}
527507

508+
map->table[index].key = arena_strdup(HASHMAP_ARENA, key);
509+
map->table[index].val = val;
510+
map->table[index].state = 1;
528511
map->size++;
529-
/* Check if size of map exceeds load factor 75% (or 3/4 of capacity) */
530-
if ((map->cap >> 2) + (map->cap >> 1) <= map->size)
531-
hashmap_rehash(map);
532512
}
533513

534514
/* Get key-value pair node from hashmap from given key.
@@ -544,10 +524,16 @@ hashmap_node_t *hashmap_get_node(hashmap_t *map, char *key)
544524
return NULL;
545525

546526
int index = hashmap_hash_index(map->cap, key);
527+
int start = index;
547528

548-
for (hashmap_node_t *cur = map->buckets[index]; cur; cur = cur->next)
549-
if (!strcmp(cur->key, key))
550-
return cur;
529+
while (map->table[index].state == 1) {
530+
if (strcmp(map->table[index].key, key) == 0)
531+
return &map->table[index];
532+
533+
index = (index + 1) & (map->cap - 1);
534+
if (index == start)
535+
return NULL;
536+
}
551537

552538
return NULL;
553539
}
@@ -574,7 +560,7 @@ void *hashmap_get(hashmap_t *map, char *key)
574560
*/
575561
bool hashmap_contains(hashmap_t *map, char *key)
576562
{
577-
return hashmap_get_node(map, key);
563+
return hashmap_get_node(map, key) != NULL;
578564
}
579565

580566
/* Free the hashmap, this also frees key-value pair entry's value.
@@ -585,7 +571,7 @@ void hashmap_free(hashmap_t *map)
585571
if (!map)
586572
return;
587573

588-
free(map->buckets);
574+
free(map->table);
589575
free(map);
590576
}
591577

0 commit comments

Comments
 (0)