diff --git a/batch_only.prof b/batch_only.prof new file mode 100644 index 0000000..b4be275 Binary files /dev/null and b/batch_only.prof differ diff --git a/batchwrite.prof b/batchwrite.prof new file mode 100644 index 0000000..0ee635b Binary files /dev/null and b/batchwrite.prof differ diff --git a/batchwrite_deletedcache.prof b/batchwrite_deletedcache.prof new file mode 100644 index 0000000..317de4d Binary files /dev/null and b/batchwrite_deletedcache.prof differ diff --git a/benchmark_compare.py b/benchmark_compare.py new file mode 100644 index 0000000..0b5600a --- /dev/null +++ b/benchmark_compare.py @@ -0,0 +1,339 @@ +import re +import matplotlib.pyplot as plt +import numpy as np +from collections import defaultdict + +def parse_benchmarks_detailed(bench_str): + """ + Parses benchmark lines extracting: + - db_size: the number after "merkledb_" + - key_size: the number in the "__keys_" part (values and keys are assumed equal) + - op: the operation name (e.g., Delete, BatchWrite, ParallelGet, etc.) + - iterations, ns/op, B/op, allocs/op + Returns a dictionary keyed by (op, db_size, key_size). + """ + # Modified regex: ns/op group now allows decimals (digits, commas, and a decimal point) + pattern = re.compile( + r"^Benchmark_MerkleDB_DBInterface/merkledb_(\d+)_1024_pairs_(\d+)_keys_(\d+)_values_([A-Za-z]+)-\d+\s+([\d,]+)\s+([\d.,]+)\s+ns\/op\s+([\d,]+)\s+B\/op\s+([\d,]+)\s+allocs\/op" + ) + data = {} + for line in bench_str.strip().splitlines(): + line = line.strip() + m = pattern.match(line) + if m: + db_size = int(m.group(1)) + # pair_count = int(m.group(2)) # typically constant (1024_pairs) + key_size = int(m.group(3)) + op = m.group(4) + iterations = int(m.group(5).replace(',', '')) + ns_op = float(m.group(6).replace(',', '')) # Use float to accommodate decimals. + b_op = int(m.group(7).replace(',', '')) + allocs = int(m.group(8).replace(',', '')) + data[(op, db_size, key_size)] = { + 'iterations': iterations, + 'ns_op': ns_op, + 'b_op': b_op, + 'allocs': allocs + } + return data + +def compare_benchmarks_detailed(bench1, bench2): + """ + For keys from the union of both benchmark sets (keyed by (op, db_size, key_size)), + compute the percentage difference for each metric: + percentage change = ((value2 - value1) / value1) * 100. + If a key is missing in one of the sets, its value is assumed equal to the one present. + Returns a dictionary with the same keys. + """ + union_keys = set(bench1.keys()) | set(bench2.keys()) + diff = {} + for key in union_keys: + diff[key] = {} + for metric in ['ns_op', 'b_op', 'allocs']: + v1 = bench1[key][metric] if key in bench1 else None + v2 = bench2[key][metric] if key in bench2 else None + # If one value is missing, assume it to be equal to the other so that diff = 0. + if v1 is None: + v1 = v2 + if v2 is None: + v2 = v1 + diff[key][metric] = ((v2 - v1) / v1 * 100) if v1 != 0 else 0 + return diff + +def organize_diff_by_operation(diff): + """ + Reorganizes the diff dictionary into a nested dict: + diff_by_op[operation][key_size][db_size] = metrics diff. + """ + diff_by_op = defaultdict(lambda: defaultdict(dict)) + for (op, db_size, key_size), metrics in diff.items(): + diff_by_op[op][key_size][db_size] = metrics + return diff_by_op + +def plot_diff_by_operation_grouped(diff_by_op): + """ + For each operation, create a single figure with subplots for each key/value size. + Each subplot shows a grouped bar chart (ns/op, B/op, allocs/op) for the different DB sizes. + Up to three subplots are displayed per row. + """ + # Fixed color mapping per metric: (color if negative, color if positive) + color_map = { + 'ns_op': ('red', 'green'), + 'b_op': ('blue', 'orange'), + 'allocs': ('purple', 'cyan') + } + + for op, keys_dict in diff_by_op.items(): + key_sizes = sorted(keys_dict.keys()) + n_keys = len(key_sizes) + # Show up to 3 subplots per row. + ncols = 3 + nrows = (n_keys + ncols - 1) // ncols # ceiling division + + fig, axs = plt.subplots(nrows, ncols, figsize=(5*ncols, 5*nrows), squeeze=False) + fig.suptitle(f"Operation: {op}", fontsize=16) + + for i, key_size in enumerate(key_sizes): + row = i // ncols + col = i % ncols + ax = axs[row][col] + db_dict = keys_dict[key_size] + # Ensure the DB sizes are in sorted order. + db_sizes = sorted(db_dict.keys()) + ns_values = [db_dict[db]['ns_op'] for db in db_sizes] + b_values = [db_dict[db]['b_op'] for db in db_sizes] + allocs_values = [db_dict[db]['allocs'] for db in db_sizes] + + x = np.arange(len(db_sizes)) + width = 0.25 + + # Create bars with fixed colors per metric. + bars_ns = ax.bar( + x - width, ns_values, width, label='ns/op', + color=[color_map['ns_op'][0] if v < 0 else color_map['ns_op'][1] for v in ns_values] + ) + bars_b = ax.bar( + x, b_values, width, label='B/op', + color=[color_map['b_op'][0] if v < 0 else color_map['b_op'][1] for v in b_values] + ) + bars_allocs = ax.bar( + x + width, allocs_values, width, label='allocs/op', + color=[color_map['allocs'][0] if v < 0 else color_map['allocs'][1] for v in allocs_values] + ) + + ax.set_xlabel('DB size (merkledb_X)') + ax.set_xticks(x) + ax.set_xticklabels([str(db) for db in db_sizes]) + ax.set_ylabel('Percentage change (%)') + ax.set_title(f"Key/Value size: {key_size}") + ax.axhline(0, color='black', linewidth=0.8) + ax.legend(fontsize=8) + + # Add text labels above each bar. + for j, v in enumerate(ns_values): + ax.text(x[j] - width, v, f"{v:.1f}%", ha='center', va='bottom', fontsize=8) + for j, v in enumerate(b_values): + ax.text(x[j], v, f"{v:.1f}%", ha='center', va='bottom', fontsize=8) + for j, v in enumerate(allocs_values): + ax.text(x[j] + width, v, f"{v:.1f}%", ha='center', va='bottom', fontsize=8) + + # Hide any unused subplots. + total_subplots = nrows * ncols + if total_subplots > n_keys: + for i in range(n_keys, total_subplots): + row = i // ncols + col = i % ncols + axs[row][col].axis('off') + + plt.tight_layout(rect=[0, 0, 1, 0.95]) + plt.show() + + +if __name__ == "__main__": + # --- Sample input strings --- + # Replace these strings with your actual benchmark outputs. + bench_str1 = """ +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_Delete-12 8067 126044 ns/op 11522 B/op 156 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_BatchWrite-12 100 79435234 ns/op 13014340 B/op 190448 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_ParallelGet-12 1624815 720.7 ns/op 416 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_ParallelPut-12 2240 682686 ns/op 44412 B/op 586 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_ParallelDelete-12 37150 27209 ns/op 5902 B/op 70 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_Get-12 1533818 724.0 ns/op 415 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_Put-12 1723 825558 ns/op 72331 B/op 962 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_ParallelDelete-12 103514 10456 ns/op 4577 B/op 50 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_Get-12 1608061 739.6 ns/op 415 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_Put-12 2275 522840 ns/op 46992 B/op 538 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_Delete-12 135913 7768 ns/op 4448 B/op 48 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_BatchWrite-12 100 34994273 ns/op 8271106 B/op 113269 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_ParallelGet-12 2042518 585.5 ns/op 415 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_ParallelPut-12 2047 514857 ns/op 31802 B/op 343 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_Put-12 2476 462139 ns/op 53235 B/op 467 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_Delete-12 129934 9526 ns/op 4495 B/op 48 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_BatchWrite-12 100 20901970 ns/op 7743596 B/op 84909 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_ParallelGet-12 1663791 762.9 ns/op 415 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_ParallelPut-12 2132 548670 ns/op 32511 B/op 291 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_ParallelDelete-12 137877 7632 ns/op 4581 B/op 48 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_Get-12 1762561 732.3 ns/op 415 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_ParallelGet-12 2118614 566.1 ns/op 415 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_ParallelPut-12 2053 622703 ns/op 64073 B/op 532 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_ParallelDelete-12 7159 149580 ns/op 66728 B/op 619 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_Get-12 1665997 767.1 ns/op 415 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_Put-12 1489 727007 ns/op 203068 B/op 1599 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_Delete-12 116457 9066 ns/op 5611 B/op 56 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_BatchWrite-12 100 21853933 ns/op 22775362 B/op 178299 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_ParallelPut-12 2443 676658 ns/op 54396 B/op 607 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_ParallelDelete-12 22845 49186 ns/op 8209 B/op 87 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_Get-12 894346 1275 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_Put-12 1520 993861 ns/op 79724 B/op 931 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_Delete-12 1116 909631 ns/op 63776 B/op 857 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_BatchWrite-12 70 95051614 ns/op 18792271 B/op 189410 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_ParallelGet-12 1390196 847.0 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_Get-12 1115325 1122 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_Put-12 2325 554259 ns/op 57293 B/op 536 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_Delete-12 52034 19543 ns/op 5972 B/op 54 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_BatchWrite-12 98 46989593 ns/op 14433638 B/op 112904 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_ParallelGet-12 2159552 594.2 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_ParallelPut-12 2184 470487 ns/op 42474 B/op 354 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_ParallelDelete-12 131427 8285 ns/op 5516 B/op 49 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_ParallelDelete-12 116683 10067 ns/op 5769 B/op 49 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_Get-12 1276891 884.4 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_Put-12 2078 537454 ns/op 71707 B/op 460 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_Delete-12 181916 6553 ns/op 5461 B/op 47 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_BatchWrite-12 100 24401280 ns/op 14224390 B/op 85376 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_ParallelGet-12 2141138 655.6 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_ParallelPut-12 2415 461099 ns/op 51290 B/op 309 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_Get-12 1403350 827.5 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_Put-12 2300 645404 ns/op 291949 B/op 1658 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_Delete-12 103884 9755 ns/op 7822 B/op 58 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_BatchWrite-12 100 32477850 ns/op 34297666 B/op 185445 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_ParallelGet-12 1772977 586.1 ns/op 1103 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_ParallelPut-12 1909 649502 ns/op 138098 B/op 528 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_ParallelDelete-12 8389 137420 ns/op 75929 B/op 590 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_ParallelDelete-12 19838 51548 ns/op 18984 B/op 93 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_Get-12 633679 3288 ns/op 6703 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_Put-12 938 1083099 ns/op 140481 B/op 818 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_Delete-12 34564 29568 ns/op 16398 B/op 71 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_BatchWrite-12 37 168569032 ns/op 66318469 B/op 187871 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_ParallelGet-12 1344744 786.2 ns/op 6703 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_ParallelPut-12 1718 679750 ns/op 121669 B/op 535 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_Get-12 546355 1978 ns/op 6703 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_Put-12 1456 770109 ns/op 136494 B/op 501 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_Delete-12 100112 10556 ns/op 14217 B/op 49 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_BatchWrite-12 46 89121948 ns/op 62253886 B/op 111942 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_ParallelGet-12 1452489 762.4 ns/op 6703 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_ParallelPut-12 2368 524796 ns/op 123198 B/op 352 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_ParallelDelete-12 28863 37930 ns/op 16923 B/op 64 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_Put-12 2098 607418 ns/op 201174 B/op 457 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_Delete-12 118306 9531 ns/op 14704 B/op 48 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_BatchWrite-12 49 53954180 ns/op 62030841 B/op 84017 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_ParallelGet-12 1401019 729.4 ns/op 6704 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_ParallelPut-12 2164 490429 ns/op 166730 B/op 294 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_ParallelDelete-12 71875 14643 ns/op 16158 B/op 55 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_Get-12 638206 2353 ns/op 6704 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_ParallelGet-12 1376769 1202 ns/op 6703 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_ParallelPut-12 1777 1095206 ns/op 626614 B/op 479 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_ParallelDelete-12 7081 184202 ns/op 180245 B/op 627 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_Get-12 594469 2907 ns/op 6703 B/op 10 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_Put-12 1898 794022 ns/op 913173 B/op 1614 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_Delete-12 3867 401730 ns/op 282690 B/op 402 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_BatchWrite-12 32 62429472 ns/op 103188607 B/op 175385 allocs/op +""" + # A second benchmark string; here we simulate some differences by tweaking the numbers. + bench_str2 = """ +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_Delete-12 606022 1919 ns/op 3450 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_BatchWrite-12 135 8711351 ns/op 3197731 B/op 31343 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_ParallelGet-12 2167849 559.4 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_ParallelPut-12 23853 53867 ns/op 22321 B/op 220 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_ParallelDelete-12 276682 3835 ns/op 3484 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_Get-12 2020380 548.0 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_32_keys_32_values_Put-12 17073 68154 ns/op 23450 B/op 222 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_Put-12 26124 47576 ns/op 17612 B/op 166 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_Delete-12 541275 2180 ns/op 3446 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_BatchWrite-12 158 9549935 ns/op 2473991 B/op 22162 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_ParallelGet-12 1468522 1118 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_ParallelPut-12 22179 56126 ns/op 17671 B/op 164 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_ParallelDelete-12 256494 4090 ns/op 3473 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_32_keys_32_values_Get-12 2227004 557.0 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_ParallelGet-12 1541641 779.2 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_ParallelPut-12 21726 60658 ns/op 22165 B/op 181 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_ParallelDelete-12 259716 3920 ns/op 3494 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_Get-12 2310726 545.8 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_Put-12 24762 52335 ns/op 22161 B/op 182 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_Delete-12 597252 2012 ns/op 3450 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_32_keys_32_values_BatchWrite-12 285 4444315 ns/op 2026786 B/op 16166 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_ParallelPut-12 10000 152811 ns/op 95557 B/op 712 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_ParallelDelete-12 118893 9197 ns/op 6918 B/op 56 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_Get-12 2332216 522.9 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_Put-12 10000 174139 ns/op 107905 B/op 826 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_Delete-12 540813 2041 ns/op 3591 B/op 26 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_BatchWrite-12 349 3377677 ns/op 1882185 B/op 14104 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_32_keys_32_values_ParallelGet-12 2211526 552.3 ns/op 208 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_ParallelPut-12 18416 65799 ns/op 26024 B/op 227 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_ParallelDelete-12 277788 5135 ns/op 3492 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_Get-12 1918507 655.4 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_Put-12 15189 81602 ns/op 26630 B/op 228 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_Delete-12 490267 2383 ns/op 3461 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_BatchWrite-12 100 19714630 ns/op 5935083 B/op 33060 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_256_keys_256_values_ParallelGet-12 1554502 779.9 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_Get-12 1872217 666.7 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_Put-12 21265 56643 ns/op 21482 B/op 171 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_Delete-12 547382 2316 ns/op 3450 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_BatchWrite-12 100 12078021 ns/op 5334783 B/op 23765 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_ParallelGet-12 1553620 767.8 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_ParallelPut-12 19024 65361 ns/op 21598 B/op 171 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_256_keys_256_values_ParallelDelete-12 281832 5081 ns/op 3478 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_Get-12 1826692 698.5 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_Put-12 19698 60110 ns/op 26518 B/op 187 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_Delete-12 474770 2314 ns/op 3464 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_BatchWrite-12 160 7347888 ns/op 4835370 B/op 17519 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_ParallelGet-12 2162096 562.0 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_ParallelPut-12 23643 51271 ns/op 26179 B/op 186 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_256_keys_256_values_ParallelDelete-12 442194 2964 ns/op 3471 B/op 25 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_ParallelDelete-12 102321 9826 ns/op 7898 B/op 63 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_Get-12 1953824 712.6 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_Put-12 10000 167600 ns/op 128312 B/op 842 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_Delete-12 391153 2776 ns/op 3738 B/op 26 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_BatchWrite-12 184 6414198 ns/op 4670607 B/op 15333 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_ParallelGet-12 1550438 775.1 ns/op 656 B/op 5 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_256_keys_256_values_ParallelPut-12 10000 169952 ns/op 131328 B/op 711 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_Get-12 201868 6690 ns/op 12167 B/op 17 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_Put-12 9700 236459 ns/op 117751 B/op 378 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_Delete-12 110016 11064 ns/op 15718 B/op 46 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_BatchWrite-12 51 141944308 ns/op 64781177 B/op 95989 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_ParallelGet-12 1155688 1003 ns/op 12088 B/op 15 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_ParallelPut-12 7076 177573 ns/op 60424 B/op 207 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_2_1024_pairs_2048_keys_2048_values_ParallelDelete-12 66601 15638 ns/op 15702 B/op 47 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_Get-12 208437 6907 ns/op 11930 B/op 16 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_Put-12 9548 190783 ns/op 120089 B/op 310 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_Delete-12 182011 5916 ns/op 9943 B/op 36 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_BatchWrite-12 60 76759443 ns/op 63336358 B/op 73185 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_ParallelGet-12 1249963 928.8 ns/op 11864 B/op 15 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_ParallelPut-12 11324 109749 ns/op 62461 B/op 185 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_4_1024_pairs_2048_keys_2048_values_ParallelDelete-12 183841 6576 ns/op 9559 B/op 35 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_Put-12 10000 144545 ns/op 139921 B/op 318 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_Delete-12 148218 6828 ns/op 10484 B/op 41 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_BatchWrite-12 79 49527325 ns/op 63238078 B/op 57649 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_ParallelGet-12 1064394 1126 ns/op 13621 B/op 19 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_ParallelPut-12 12357 104600 ns/op 86595 B/op 207 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_ParallelDelete-12 119232 8636 ns/op 11052 B/op 42 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_16_1024_pairs_2048_keys_2048_values_Get-12 181191 6251 ns/op 13680 B/op 20 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_ParallelPut-12 10000 303806 ns/op 437555 B/op 697 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_ParallelDelete-12 35574 30576 ns/op 33505 B/op 175 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_Get-12 179374 7063 ns/op 13321 B/op 21 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_Put-12 7713 275772 ns/op 336306 B/op 987 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_Delete-12 95526 11573 ns/op 16093 B/op 58 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_BatchWrite-12 92 37342318 ns/op 62074654 B/op 50402 allocs/op +Benchmark_MerkleDB_DBInterface/merkledb_256_1024_pairs_2048_keys_2048_values_ParallelGet-12 1028581 1111 ns/op 13255 B/op 20 allocs/op 91224 11946 ns/op 16324 B/op 58 allocs/op 910558 ns/op 80031 B/op 1050 allocs/op +""" + # Parse benchmark strings. + benchmarks1 = parse_benchmarks_detailed(bench_str1) + benchmarks2 = parse_benchmarks_detailed(bench_str2) + + # Compute percentage differences for common benchmarks. + diff = compare_benchmarks_detailed(benchmarks1, benchmarks2) + + # Organize the differences by operation. + diff_by_op = organize_diff_by_operation(diff) + + # Plot one figure per operation, with subplots for each key/value size. + plot_diff_by_operation_grouped(diff_by_op) \ No newline at end of file diff --git a/cached_raw.prof b/cached_raw.prof new file mode 100644 index 0000000..99f72d7 Binary files /dev/null and b/cached_raw.prof differ diff --git a/cpu.prof b/cpu.prof new file mode 100644 index 0000000..b1a9396 Binary files /dev/null and b/cpu.prof differ diff --git a/database/dbtest/benchmark.go b/database/dbtest/benchmark.go index 8e50145..6f13014 100644 --- a/database/dbtest/benchmark.go +++ b/database/dbtest/benchmark.go @@ -19,8 +19,8 @@ var ( "Get": BenchmarkGet, "Put": BenchmarkPut, "Delete": BenchmarkDelete, - "BatchPut": BenchmarkBatchPut, - "BatchDelete": BenchmarkBatchDelete, + // "BatchPut": BenchmarkBatchPut, + // "BatchDelete": BenchmarkBatchDelete, "BatchWrite": BenchmarkBatchWrite, "ParallelGet": BenchmarkParallelGet, "ParallelPut": BenchmarkParallelPut, diff --git a/database/dbtest/dbtest.go b/database/dbtest/dbtest.go index 4e993a5..67308c6 100644 --- a/database/dbtest/dbtest.go +++ b/database/dbtest/dbtest.go @@ -52,9 +52,9 @@ var Tests = map[string]func(t *testing.T, db database.Database){ "MemorySafetyDatabase": TestMemorySafetyDatabase, "MemorySafetyBatch": TestMemorySafetyBatch, // "AtomicClear": TestAtomicClear, - //"Clear": TestClear, - //"AtomicClearPrefix": TestAtomicClearPrefix, - //"ClearPrefix": TestClearPrefix, + // "Clear": TestClear, + // "AtomicClearPrefix": TestAtomicClearPrefix, + // "ClearPrefix": TestClearPrefix, "ModifyValueAfterPut": TestModifyValueAfterPut, "ModifyValueAfterBatchPut": TestModifyValueAfterBatchPut, "ModifyValueAfterBatchPutReplay": TestModifyValueAfterBatchPutReplay, diff --git a/go.mod b/go.mod index 4714640..0170d2a 100644 --- a/go.mod +++ b/go.mod @@ -95,11 +95,13 @@ require ( github.com/crate-crypto/go-kzg-4844 v0.7.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/deckarep/golang-set/v2 v2.1.0 // indirect + github.com/dgraph-io/ristretto v0.2.0 // indirect github.com/distribution/reference v0.5.0 // indirect github.com/dlclark/regexp2 v1.7.0 // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dop251/goja v0.0.0-20230806174421-c933cf95e127 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/ethereum/c-kzg-4844 v0.4.0 // indirect github.com/frankban/quicktest v1.14.4 // indirect diff --git a/go.sum b/go.sum index db42149..b0a8838 100644 --- a/go.sum +++ b/go.sum @@ -162,6 +162,8 @@ github.com/decred/dcrd/dcrec/secp256k1/v4 v4.1.0 h1:HbphB4TFFXpv7MNrT52FGrrgVXF1 github.com/decred/dcrd/dcrec/secp256k1/v4 v4.1.0/go.mod h1:DZGJHZMqrU4JJqFAWUS2UO1+lbSKsdiOoYi9Zzey7Fc= github.com/decred/dcrd/lru v1.0.0/go.mod h1:mxKOwFd7lFjN2GZYsiz/ecgqR6kkYAl+0pz0tEMk218= github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= +github.com/dgraph-io/ristretto v0.2.0 h1:XAfl+7cmoUDWW/2Lx8TGZQjjxIQ2Ley9DSf52dru4WE= +github.com/dgraph-io/ristretto v0.2.0/go.mod h1:8uBHCU/PBV4Ag0CJrP47b9Ofby5dqWNh4FicAdoqFNU= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= @@ -178,6 +180,8 @@ github.com/dop251/goja v0.0.0-20230806174421-c933cf95e127/go.mod h1:QMWlm50DNe14 github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y= github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d/go.mod h1:DngW8aVqWbuLRMHItjPUyqdj+HWPvnQe8V8y1nDpIbM= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= diff --git a/leveldb.prof b/leveldb.prof new file mode 100644 index 0000000..7e016d3 Binary files /dev/null and b/leveldb.prof differ diff --git a/merkledb.prof b/merkledb.prof new file mode 100644 index 0000000..36813f8 Binary files /dev/null and b/merkledb.prof differ diff --git a/merkledb.test.exe b/merkledb.test.exe new file mode 100644 index 0000000..ea17652 Binary files /dev/null and b/merkledb.test.exe differ diff --git a/profile.png b/profile.png new file mode 100644 index 0000000..95f2f7b Binary files /dev/null and b/profile.png differ diff --git a/profile.svg b/profile.svg new file mode 100644 index 0000000..d4d5f14 Binary files /dev/null and b/profile.svg differ diff --git a/regular.prof b/regular.prof new file mode 100644 index 0000000..9dfd5aa Binary files /dev/null and b/regular.prof differ diff --git a/x/merkledb/codec.go b/x/merkledb/codec.go index f24483b..5235d26 100644 --- a/x/merkledb/codec.go +++ b/x/merkledb/codec.go @@ -351,7 +351,7 @@ func (r *codecReader) Key() (Key, error) { } byteLen := bytesNeeded(result.length) if byteLen > len(r.b) { - return Key{}, io.ErrUnexpectedEOF + return Key{}, io.ErrUnexpectedEOF } if result.hasPartialByte() { // Confirm that the padding bits in the partial byte are 0. diff --git a/x/merkledb/db_test.go b/x/merkledb/db_test.go index f470566..8c00159 100644 --- a/x/merkledb/db_test.go +++ b/x/merkledb/db_test.go @@ -19,6 +19,7 @@ import ( "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/database/dbtest" + // "github.com/ava-labs/avalanchego/database/memdb" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/trace" @@ -128,16 +129,21 @@ func Test_MerkleDB_DB_Interface(t *testing.T) { func TestDelete(t *testing.T) { require := require.New(t) keys, values := dbtest.SetupBenchmark(t, 1024, 32, 32) + for _, bf := range validBranchFactors { - db, err := getBasicDB(t) - require.NoError(err) + db, err := getBasicDBWithBranchFactor(bf) + require.NoError(err) - for i, key := range keys { - value := values[i] - require.NoError(db.Put(key, value)) - } + for i, key := range keys { + value := values[i] + require.NoError(db.Put(key, value)) + } - require.NoError(db.Delete(keys[0])) + require.NoError(db.Delete(keys[0])) + t.Cleanup(func() { + db.Close() + }) + } } @@ -159,6 +165,36 @@ func Benchmark_MerkleDB_DBInterface(b *testing.B) { } } +func Benchmark_MerkleDB_DBInterface_Timed(b *testing.B) { + totalTime := time.Duration(0) + for _, size := range dbtest.BenchmarkSizes { + keys, values := dbtest.SetupBenchmark(b, size[0], size[1], size[2]) + for _, bf := range validBranchFactors { + for name, bench := range dbtest.Benchmarks { + // Run the benchmark and track its time + b.Run(fmt.Sprintf("merkledb_%d_%d_pairs_%d_keys_%d_values_%s", bf, size[0], size[1], size[2], name), func(b *testing.B) { + db, err := getBasicDBWithBranchFactor(bf) + require.NoError(b, err) + b.Cleanup(func() { + db.Close() + }) + + // Record the start time for each individual benchmark + start := time.Now() + bench(b, db, keys, values) + // Calculate the time taken for the specific benchmark + duration := time.Since(start) + totalTime += duration + // Log the time taken for the specific benchmark + }) + } + } + } + // Log the total time taken for all benchmarks + fmt.Printf("Total time taken for all benchmarks: %v\n", totalTime) + b.Log("Total time taken for all benchmarks: ", totalTime) +} + // PASSES func Test_MerkleDB_DB_Load_Root_From_DB(t *testing.T) { require := require.New(t) @@ -886,7 +922,7 @@ func TestMerkleDBClear(t *testing.T) { require, r, []database.Database{db}, - 1_000, + 95, 0.25, ) diff --git a/x/merkledb/disk_manager.go b/x/merkledb/disk_manager.go index 722cb56..d302728 100644 --- a/x/merkledb/disk_manager.go +++ b/x/merkledb/disk_manager.go @@ -85,7 +85,7 @@ func newDiskManager(metaData []byte, dir string, fileName string) (*diskMgr, err } // start freelist - maxSize := 4096 * 4 * 4 + maxSize := 4096 * 4 * 4 * 4 f := newFreeList(maxSize) f.load(dir) @@ -119,8 +119,6 @@ func (dm *diskMgr) putBack(addr diskAddress) error { } func (dm *diskMgr) writeRoot(rootNode dbNode) (diskAddress, error) { - - // first check the size of rootNode without the disk address bytes := encodeDBNode_disk(&rootNode) freeSpace, ok := dm.free.get(int64(len(bytes)) + 16) @@ -190,6 +188,22 @@ func (dm *diskMgr) writeRoot(rootNode dbNode) (diskAddress, error) { } +func (dm *diskMgr) fetch(byteLength int64) (diskAddress, error) { + freeSpace, ok := dm.free.get(int64(byteLength)) + if !ok { + endOffset, err := dm.endOfFile() + if err != nil { + log.Fatalf("failed to get end of file: %v", err) + return diskAddress{}, err + } + return diskAddress{offset: endOffset, size: int64(byteLength)}, nil + } else { + return diskAddress{offset: freeSpace.offset, size: int64(byteLength)}, nil + } + + +} + // returning diskaddress that it wrote to // if we write to freelist: diskaddress would be the size of freespace // if we dont write to freelist: append bytes to end, return endoffset and size diff --git a/x/merkledb/freelist.go b/x/merkledb/freelist.go index 9b9404c..32b77d1 100644 --- a/x/merkledb/freelist.go +++ b/x/merkledb/freelist.go @@ -49,8 +49,15 @@ func (f *freeList) get(size int64) (diskAddress, bool) { if f.closed { return diskAddress{}, false }*/ + if size == 0{ + return diskAddress{}, false + } bucket := f.bucketIndex(size) + // first check whether or not the bucket would be out of bounds + if bucket >= len(f.buckets) && size >0 { + return diskAddress{}, false + } if len(f.buckets[bucket]) > 0 { space := f.buckets[bucket][len(f.buckets[bucket])-1] f.buckets[bucket] = f.buckets[bucket][:len(f.buckets[bucket])-1] @@ -72,7 +79,7 @@ func (f *freeList) put(space diskAddress) { // bucketIndex returns the index of the bucket that the size belongs to. func (f *freeList) bucketIndex(size int64) int { - return int(math.Ceil(math.Log2(float64(size)))) + return int(math.Floor(math.Log2(float64(size)))) } // close writes the remaining diskAddresses in the freeList to a file and closes the file. diff --git a/x/merkledb/helpers_test.go b/x/merkledb/helpers_test.go index 2e2550a..b21f444 100644 --- a/x/merkledb/helpers_test.go +++ b/x/merkledb/helpers_test.go @@ -11,11 +11,18 @@ import ( "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/database/memdb" + "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/utils/hashing" "github.com/ava-labs/avalanchego/utils/maybe" + + + "github.com/ava-labs/avalanchego/database/leveldb" + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/prometheus/client_golang/prometheus" + ) -const disk = true +const disk = false func getBasicDB(tb testing.TB) (*merkleDB, error) { if disk{ return getBasicDB_disk(tb) @@ -29,21 +36,24 @@ func getBasicDB(tb testing.TB) (*merkleDB, error) { } func getBasicDBWithBranchFactor(bf BranchFactor) (*merkleDB, error) { - config := newDefaultConfig() - config.BranchFactor = bf - t := &testing.T{} - dir := t.TempDir() - if disk{ - return getBasicDBWithBranchFactor_disk(bf,dir) - } - return newDatabase( - context.Background(), - memdb.New(), - config, - &mockMetrics{}, - ) -} + config := newDefaultConfig() + config.BranchFactor = bf + t := &testing.T{} + + folder := t.TempDir() + db, err := leveldb.New(folder, nil, logging.NoLog{}, prometheus.NewRegistry()) + require.NoError(t, err) + if disk { + return getBasicDBWithBranchFactor_disk(bf, folder) + } + return newDatabase( + context.Background(), + db, + config, + &mockMetrics{}, + ) +} // Writes []byte{i} -> []byte{i} for i in [0, 4] func writeBasicBatch(t *testing.T, db *merkleDB) { require := require.New(t) diff --git a/x/merkledb/key.go b/x/merkledb/key.go index dc4b070..02a6794 100644 --- a/x/merkledb/key.go +++ b/x/merkledb/key.go @@ -37,7 +37,7 @@ var ( BranchFactor2, BranchFactor4, BranchFactor16, - // BranchFactor256, + BranchFactor256, } ) diff --git a/x/merkledb/raw_disk.go b/x/merkledb/raw_disk.go index 308033c..77ea325 100644 --- a/x/merkledb/raw_disk.go +++ b/x/merkledb/raw_disk.go @@ -11,6 +11,10 @@ import ( "log" "sort" + // "github.com/hashicorp/golang-lru" + + "github.com/dgraph-io/ristretto" + "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/utils/maybe" ) @@ -52,9 +56,11 @@ type rawDisk struct { // [0] = shutdownType // [1,17] = rootKey raw file offset // [18,] = node store - dm *diskMgr - config Config - hasher Hasher + dm *diskMgr + config Config + hasher Hasher + cache *ristretto.Cache + deletedCache *ristretto.Cache } func newRawDisk(dir string, fileName string, hasher Hasher, config Config) (*rawDisk, error) { @@ -62,8 +68,20 @@ func newRawDisk(dir string, fileName string, hasher Hasher, config Config) (*raw if err != nil { return nil, err } + cache, _ := ristretto.NewCache(&ristretto.Config{ + NumCounters: 1e6, // Number of keys to track frequency (higher = better hit rate) + MaxCost: 2 << 30, // Maximum cost in bytes (adjust as needed) + BufferItems: 64, // Number of keys per eviction buffer + }) + + deletedCache, _ := ristretto.NewCache(&ristretto.Config{ + NumCounters: 1e5, // Number of keys to track frequency (higher = better hit rate) + MaxCost: 1 << 30, // Maximum cost in bytes (adjust as needed) + BufferItems: 64, // Number of keys per eviction buffer + }) + // correctly read rootId from the header - return &rawDisk{dm: dm, hasher: hasher, config: config}, nil + return &rawDisk{dm: dm, hasher: hasher, config: config, cache: cache, deletedCache: deletedCache}, nil } func (r *rawDisk) getShutdownType() ([]byte, error) { @@ -130,8 +148,11 @@ func (r *rawDisk) printTree(rootDiskAddr diskAddress, changes *changeSummary) er newRootNodeBytes, _ := r.dm.get(rootDiskAddr) newRootNode := &dbNode{} decodeDBNode_disk(newRootNodeBytes, newRootNode) - // log.Printf("Root node %v with key {%v, %s}", rootDiskAddr, changes.rootChange.after.Value().key.length, changes.rootChange.after.Value().key.value) - parentKey := changes.rootChange.after.Value().key + parentKey := Key{} + if newRootNode.value.HasValue() { + log.Printf("Root node %v with key {%v, %s}", rootDiskAddr, changes.rootChange.after.Value().key.length, changes.rootChange.after.Value().key.value) + parentKey = changes.rootChange.after.Value().key + } for token, child := range newRootNode.children { totalKeyBytes := append(parentKey.Bytes(), token) totalKeyBytes = append(totalKeyBytes, child.compressedKey.Bytes()...) @@ -191,22 +212,35 @@ func (r *rawDisk) writeChanges(ctx context.Context, changes *changeSummary) erro return keys[i].length > keys[j].length }) - if len(keys) > 0 { - for i := 0; i < len(keys)-2; i += 2 { - pair := keys[i : i+2] - if pair[0].length < pair[1].length { - log.Printf("prev key shorter than next key") - } + totalLenBytes := 0 + for _, nodes := range changes.nodes { + if nodes.after != nil { + // Increase length by one for the padding byte per node + totalLenBytes += len(encodeDBNode_disk(&nodes.after.dbNode)) + 1 + totalLenBytes += 16 * len(nodes.after.children) } } + // every node in the tree has a diskaddress of its children except leaf nodes + // find how many leaf nodes there are - // Create a temporary map of remainingNodes to store the disk address and compressed key of the remainingNodes - childrenNodes := make(map[Key]diskAddress) + // log.Printf("Total length of bytes %d", totalLenBytes) + + // fetch the available disk address for totallenbytes + totalDiskAddress, err := r.dm.fetch(int64(totalLenBytes)) + if err != nil { + return err + } - // ITERATES THROUGH ALL NODES EXCEPT THE ROOT - // STARTS WITH CHILDRENS THEN MOVES TO PARENTS - // EACH PARENT CHECKS THEIR CHILDREN'S KEY THEN ENSURES POINTERS PROPERLY WORK + // Start partitioning the data within the totaldiskaddress + // Start with longest keys (children), then move up the tree + // Start with the leaf nodes // Iterate through the keys + totalOffset := 0 + childrenNodes := make(map[Key]diskAddress) + totalBytes := make([]byte, 0) + numWritten := 0 + rootDiskAddr := diskAddress{} + totalRootBytes := make([]byte, 0) for _, k := range keys { // find the nodechange associated with the key nodeChange := changes.nodes[k] @@ -216,11 +250,6 @@ func (r *rawDisk) writeChanges(ctx context.Context, changes *changeSummary) erro } // Ensure root is not being written twice - if changes.rootChange.after.HasValue() { - if nodeChange.after.key == changes.rootChange.after.Value().key { - continue - } - } // Iterate through node's children for token, child := range nodeChange.after.children { @@ -247,108 +276,124 @@ func (r *rawDisk) writeChanges(ctx context.Context, changes *changeSummary) erro return errors.New("regular node child disk address missing") } } - nodeBytes := encodeDBNode_disk(&nodeChange.after.dbNode) - diskAddr, err := r.dm.write(nodeBytes) - if err != nil { - return err - } - - // If there is not a node with the key in the map, create a new map with the key being the ch - if childrenNodes[k] == (diskAddress{}) { - // If the node is a leaf node, compress the key and store the disk address - key := Key{length: k.length, value: k.value} - childrenNodes[key] = diskAddr - - } - - } - if err := r.dm.file.Sync(); err != nil { - return err - } - - // ITERATES THROUGH THE ROOT NODE A FINAL TIME - // ENSURES THAT ROOTNODE - if changes.rootChange.after.HasValue() { - // Adding remainingNodes to the root node - k := changes.rootChange.after.Value().key - for token, child := range changes.rootChange.after.Value().children { - - // CURRENT IMPLEMENTATION - completeKey := k.Extend(ToToken(token, BranchFactorToTokenSize[r.config.BranchFactor])) - if child.compressedKey.length != 0 { - completeKey = completeKey.Extend(child.compressedKey) - } - // Check whether or not there exists a value for the child in the map - if childrenNodes[completeKey] != (diskAddress{}) { - // If there is a value, set the disk address of the child to the value in the map - child.diskAddr = childrenNodes[completeKey] + if nodeChange.after.key == changes.rootChange.after.Value().key { + // writing rootNode to header + if changes.rootChange.after.HasValue() { + rootNode := changes.rootChange.after.Value() + rootNodeBytes := encodeDBNode_disk(&rootNode.dbNode) + rootDiskAddr = diskAddress{totalDiskAddress.offset + int64(totalOffset) + int64(numWritten), int64(len(rootNodeBytes))} + totalOffset += len(rootNodeBytes) + numWritten++ + totalBytes = append(totalBytes, rootNodeBytes...) + if err != nil { + return err + } + + // iterate through cache and delete all nodes with same key value + // as the root node + changes.rootChange.after.Value().dbNode.diskAddr = rootDiskAddr + if changes.rootChange.after.HasValue() { + compositeKey := fmt.Sprintf("%s:%d", changes.rootChange.after.Value().key.value, changes.rootChange.after.Value().key.length) + r.cache.Set(compositeKey, changes.rootChange.after.Value().dbNode, changes.rootChange.after.Value().dbNode.diskAddr.size) + if val, _ := r.deletedCache.Get(compositeKey); val != nil { + r.deletedCache.Del(compositeKey) + } + } + + // log.Print("Setting root node in cache", changes.rootChange.after.Value().dbNode.diskAddr) + // add function that would write the root node to the disk while also updating the disk address + if err != nil { + return err + } + rootDiskAddrBytes := rootDiskAddr.bytes() + totalRootBytes = append(totalRootBytes, rootDiskAddrBytes[:]...) + totalBytes = append(totalBytes, 0) + // r.dm.file.WriteAt(rootDiskAddrBytes[:], 1) + + rootKey := rootNode.key + rootKeyByteArray := encodeKey(rootKey) + + // need to set tthe endof file to something different - current issue is that its ovelapping with end of file + + size, err := r.dm.file.WriteAt(rootKeyByteArray, int64(totalDiskAddress.size+totalDiskAddress.offset)) + if err != nil { + return err + } + rootKeyDiskAddr := diskAddress{int64(totalDiskAddress.size + totalDiskAddress.offset), int64(size)} + // log.Print("wrote root key to disk", rootKeyDiskAddr) + // log.Print("total disk address", totalDiskAddress) + rootKeyDiskAddrBytes := rootKeyDiskAddr.bytes() + totalRootBytes = append(totalRootBytes, rootKeyDiskAddrBytes[:]...) + r.dm.file.WriteAt(totalRootBytes[:], 1) + + // print the tree + changes.rootChange.after.Value().dbNode.diskAddr = rootDiskAddr } - } - for _, child := range changes.rootChange.after.Value().children { - // Check remainingNodes actually have disk addresses - if child.diskAddr == (diskAddress{}) { - return errors.New("root node child disk address missing") + } else { + nodeBytes := encodeDBNode_disk(&nodeChange.after.dbNode) + diskAddr := diskAddress{totalDiskAddress.offset + int64(totalOffset) + int64(numWritten), int64(len(nodeBytes))} + totalOffset += len(nodeBytes) + totalBytes = append(totalBytes, nodeBytes...) + totalBytes = append(totalBytes, 0) + numWritten++ + if err != nil { + return err } - } - // writing rootNode to header - rootNode := changes.rootChange.after.Value() - rootNodeBytes := encodeDBNode_disk(&rootNode.dbNode) - rootDiskAddr, err := r.dm.write(rootNodeBytes) - // add function that would write the root node to the disk while also updating the disk address - if err != nil { - return err - } - rootDiskAddrBytes := rootDiskAddr.bytes() - r.dm.file.WriteAt(rootDiskAddrBytes[:], 1) - - rootKey := rootNode.key - rooyKeyByteArray := encodeKey(rootKey) - rootKeyDiskAddr, err := r.dm.write(rooyKeyByteArray) - if err != nil { - return err - } - rootKeyDiskAddrBytes := rootKeyDiskAddr.bytes() - r.dm.file.WriteAt(rootKeyDiskAddrBytes[:], 17) + nodeChange.after.dbNode.diskAddr = diskAddr + if nodeChange.after.value.HasValue() { + compositeKey := fmt.Sprintf("%s:%d", nodeChange.after.key.value, nodeChange.after.key.length) + r.cache.Set(compositeKey, nodeChange.after.dbNode, nodeChange.after.dbNode.diskAddr.size) + if val, _ := r.deletedCache.Get(compositeKey); val != nil { + r.deletedCache.Del(compositeKey) + } + } + // log.Print("Setting node in cache", nodeChange.after.dbNode.diskAddr) + // If there is not a node with the key in the map, create a new map with the key being the ch + if childrenNodes[k] == (diskAddress{}) { + // If the node is a leaf node, compress the key and store the disk address + key := Key{length: k.length, value: k.value} + childrenNodes[key] = diskAddr + } - // print the tree - // err = r.printTree(rootDiskAddr, changes) - if err != nil { - return err } - changes.rootChange.after.Value().dbNode.diskAddr = rootDiskAddr + } + // write the total bytes to the disk + _, err = r.dm.file.WriteAt(totalBytes, totalDiskAddress.offset) + if err != nil { + return err } - if err := r.dm.file.Sync(); err != nil { + // err = r.printTree(rootDiskAddr, changes) + if err != nil { return err } + + // if err := r.dm.file.Sync(); err != nil { + // return err + // } + + // } + // if err := r.dm.file.Sync(); err != nil { + // return err + // } // ensuring that there are two trees, then add old one to freelist for _, nodeChange := range changes.nodes { - if nodeChange.before != nil { - r.dm.free.put(nodeChange.before.diskAddr) - - } - if nodeChange.before != nil && nodeChange.after == nil { - // make a new node that is the same as the old node but with has value set to false - tempdBNode := dbNode{} - nextBytes, err := r.dm.get(nodeChange.before.diskAddr) - if err != nil { - return err - } - err = decodeDBNode_disk(nextBytes, &tempdBNode) - if err != nil { - return err + if nodeChange.before != nil && nodeChange.after == nil { // r.dm.free.put(nodeChange.before.diskAddr) + // check that node has a key value and a disk address + if nodeChange.before.key != (Key{}) { + if nodeChange.before.dbNode.diskAddr != (diskAddress{}) { + compositeKey := fmt.Sprintf("%s:%d", nodeChange.before.key.value, nodeChange.before.key.length) + r.deletedCache.Set(compositeKey, nodeChange.before.dbNode, nodeChange.before.dbNode.diskAddr.size) + if val, _ := r.cache.Get(compositeKey); val != nil { + r.cache.Del(compositeKey) + } + } } - tempdBNode.value = maybe.Nothing[[]byte]() - // write the new node to disk - nodeBytes := encodeDBNode_disk(&tempdBNode) - // write new node at the same disk address - _, err = r.dm.file.WriteAt(nodeBytes, nodeChange.before.diskAddr.offset) - if err != nil { - return err - } - } + + // } + } return r.dm.file.Sync() } @@ -358,6 +403,45 @@ func (r *rawDisk) Clear() error { } func (r *rawDisk) getNode(key Key, hasValue bool) (*node, error) { + // Add a flag to check if the cache was found + + if val, found := r.cache.Get(fmt.Sprintf("%s:%d", key.value, key.length)); found { + if val != nil { + // If the value is found, process normally + + // Assuming val is of type dbNode, create the return node + returnNode := &node{ + dbNode: val.(dbNode), + key: key, + valueDigest: val.(dbNode).value, + } + + // Set the disk address from the cache entry + returnNode.dbNode.diskAddr = val.(dbNode).diskAddr + + // You can then return the node if you wish + return returnNode, nil + } + } + + if val, found := r.deletedCache.Get(fmt.Sprintf("%s:%d", key.value, key.length)); found { + if val != nil { + // If the value is found, process normally + returnNode := &node{ + dbNode: val.(dbNode), + key: key, + valueDigest: val.(dbNode).value, + } + + // Set the disk address from the cache entry + returnNode.dbNode.value = maybe.Nothing[[]byte]() + returnNode.dbNode.diskAddr = val.(dbNode).diskAddr + + // You can then return the node if you wish + return returnNode, nil + } + } + // log.Printf("Getting node for key %v", key) metadata, err := r.dm.getHeader() if err != nil { @@ -405,31 +489,31 @@ func (r *rawDisk) getNode(key Key, hasValue bool) (*node, error) { } if !key.HasPrefix(currKey) { - // log.Printf("key %v %v, currkey %v %v", key.length, []byte(key.value), currKey.length, []byte(currKey.value)) - return nil, database.ErrNotFound //errors.New("Key doesn't match rootkey") + // log.Printf("key %v %v, currKey %v %v", key.length, []byte(key.value), currKey.length, []byte(currKey.value)) + return nil, database.ErrNotFound //errors.New("Key doesn't match rootKey") } - keylen := currKey.length // keeps track of where to start comparing prefixes in the key i.e. the length of key iterated so far + keyLen := currKey.length // keeps track of where to start comparing prefixes in the key i.e. the length of key iterated so far // tempDiskAddr := diskAddress{} // while the entire path hasn't been matched - for keylen < (key.length) { + for keyLen < (key.length) { // confirm that a child exists and grab its address before attempting to load it - // log.Printf("Token: %v", key.Token(keylen, tokenSize)) + // log.Printf("Token: %v", key.Token(keyLen, tokenSize)) // log.Printf("currentDbNode value %s", currentDbNode.value.Value()) // log.Printf("num of children %d", len(currentDbNode.children)) // for token, child := range currentDbNode.children { // log.Printf("Token: %v for Child: %x", (token), child.compressedKey.value) // } - // log.Printf("Checking key %x", key.Token(keylen, tokenSize)) - nextChildEntry, hasChild := currentDbNode.children[key.Token(keylen, tokenSize)] + // log.Printf("Checking key %x", key.Token(keyLen, tokenSize)) + nextChildEntry, hasChild := currentDbNode.children[key.Token(keyLen, tokenSize)] - keylen += tokenSize + keyLen += tokenSize if !hasChild { return nil, database.ErrNotFound } // log.Printf("nextChildEntry %v", nextChildEntry) - if !key.iteratedHasPrefix(nextChildEntry.compressedKey, keylen, tokenSize) { + if !key.iteratedHasPrefix(nextChildEntry.compressedKey, keyLen, tokenSize) { // there was no child along the path or the child that was there doesn't match the remaining path // return nil, errors.New("Key doesn't match an existing node") return nil, database.ErrNotFound @@ -437,10 +521,10 @@ func (r *rawDisk) getNode(key Key, hasValue bool) (*node, error) { } // get the next key from the current child - currKey := ToToken(key.Token(keylen-tokenSize, tokenSize), tokenSize) + currKey := ToToken(key.Token(keyLen-tokenSize, tokenSize), tokenSize) // log.Printf("currKey %x", currKey) currKey = currKey.Extend(nextChildEntry.compressedKey) - keylen += currKey.length - tokenSize + keyLen += currKey.length - tokenSize // grab the next node along the path nextBytes, err := r.dm.get(nextChildEntry.diskAddr) @@ -463,6 +547,8 @@ func (r *rawDisk) getNode(key Key, hasValue bool) (*node, error) { } returnNode.dbNode.diskAddr = currentDbNode.diskAddr + // log.Print("Found node in rawdisk", returnNode.dbNode.diskAddr, returnNode.key.value) + returnNode.setValueDigest(r.hasher) return returnNode, nil }