Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitmodules

This file was deleted.

10 changes: 4 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

include_directories(include third_party)

set(PA_OVERRIDE ON)
set(PA_BUILD_TESTS OFF CACHE BOOL "Disable palloc tests for embedded pomai_cache" FORCE)
add_subdirectory(third_party/palloc)

add_library(pomaicache SHARED
src/engine/engine.cpp
src/engine/ssd_store.cpp
Expand All @@ -28,8 +24,6 @@ add_library(pomaicache SHARED
src/bindings/c_api.cc
)

target_link_libraries(pomaicache PUBLIC palloc)

include(CTest)
if(BUILD_TESTING)
add_library(mini_catch_main third_party/catch2/catch_main.cpp)
Expand All @@ -40,8 +34,12 @@ if(BUILD_TESTING)
add_executable(test_ai_cache tests/test_ai_cache.cpp)
target_link_libraries(test_ai_cache PRIVATE pomaicache mini_catch_main)

add_executable(test_prompt_cache tests/test_prompt_cache.cpp)
target_link_libraries(test_prompt_cache PRIVATE pomaicache mini_catch_main)

add_test(NAME test_engine COMMAND test_engine)
add_test(NAME test_ai_cache COMMAND test_ai_cache)
add_test(NAME test_prompt_cache COMMAND test_prompt_cache)

if(NOT WIN32)
add_executable(test_integration tests/test_integration.cpp)
Expand Down
11 changes: 11 additions & 0 deletions ai_bench_summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"workloads": [
{"name":"embedding_zipf","ops_s":459.52,"p50_us":130.28,"p95_us":11175.92,"p99_us":12889.39,"p999_us":14281.39,"hit_rate":0.82},
{"name":"prompt_response_churn","ops_s":155.11,"p50_us":842.28,"p95_us":21994.37,"p99_us":25153.57,"p999_us":45459.81,"hit_rate":0.73},
{"name":"rerank_ttl_storm","ops_s":323.85,"p50_us":549.67,"p95_us":10760.80,"p99_us":11009.46,"p999_us":11728.75,"hit_rate":0.75},
{"name":"mixed_rag_pipeline","ops_s":51.31,"p50_us":8994.29,"p95_us":61316.55,"p99_us":69901.89,"p999_us":113447.43,"hit_rate":0.74}
],
"ssd_mb_s": 0.0,
"engine_ctor_ms": 0.18,
"dedup_ratio": 0.0
}
21 changes: 15 additions & 6 deletions bench/prompt_cache_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,15 @@ run_prompt_workload_embedded(const std::string &name,
}

int main(int argc, char **argv) {
bool quick = false;
std::string json_out = "prompt_cache_bench.json";
if (argc > 1)
json_out = argv[1];
for (int i = 1; i < argc; ++i) {
if (std::string(argv[i]) == "--quick")
quick = true;
else
json_out = argv[i];
}
const int ops_per_workload = quick ? 500 : 5000;

EngineConfig cfg;
cfg.memory_limit_bytes = 64 * 1024 * 1024;
Expand All @@ -234,19 +240,22 @@ int main(int argc, char **argv) {
PromptCacheManager pcm(engine, ai, pcfg);

std::cout << std::fixed << std::setprecision(2);
std::cout << "Embedded token/prompt cache benchmark (in-process, no network)\n";
std::cout << "Embedded token/prompt cache benchmark (in-process, no network)";
if (quick)
std::cout << " [--quick]";
std::cout << "\n";

std::vector<PromptBenchResult> results;
results.push_back(run_prompt_workload("chatty_short_sessions",
pcm,
5000,
ops_per_workload,
200, // hot prefixes
128, // max tokens per prompt
0.30 // writes
));
results.push_back(run_prompt_workload("long_lived_system_prompts",
pcm,
5000,
ops_per_workload,
50, // fewer prefixes, more reuse
256, // max tokens per prompt
0.10 // mostly reads
Expand All @@ -259,7 +268,7 @@ int main(int argc, char **argv) {
pomaicache::PomaiCache cache(embedded_cfg);
results.push_back(run_prompt_workload_embedded("embedded_api_hot_prompts",
cache,
5000,
ops_per_workload,
100,
256,
0.25));
Expand Down
7 changes: 7 additions & 0 deletions prompt_cache_bench.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"prompt_cache_workloads": [
{"name":"chatty_short_sessions","ops_s":142.875,"p50_us":5932.13,"p95_us":13378.5,"p99_us":20634.9,"hit_rate":0.143678,"avg_savings_ratio":0.00735079},
{"name":"long_lived_system_prompts","ops_s":67.667,"p50_us":14609.5,"p95_us":17571,"p99_us":24018.2,"hit_rate":0.569869,"avg_savings_ratio":0.0215384},
{"name":"embedded_api_hot_prompts","ops_s":137.045,"p50_us":8095.73,"p95_us":13802.2,"p99_us":14491.8,"hit_rate":0,"avg_savings_ratio":0}
]
}
25 changes: 25 additions & 0 deletions scripts/run_tests_and_benches.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Run all unit tests and embedded benchmarks (no server required).
# Use prompt_cache_bench --quick so the full run completes in reasonable time.
set -euo pipefail

ROOT="$(cd "$(dirname "$0")/.." && pwd)"
BUILD_DIR="${BUILD_DIR:-${ROOT}/build}"
mkdir -p "${BUILD_DIR}"

cmake -S "${ROOT}" -B "${BUILD_DIR}" -DBUILD_TESTING=ON -DBUILD_PYTHON_BINDINGS=OFF
cmake --build "${BUILD_DIR}" -j

echo "========== Running tests =========="
ctest --test-dir "${BUILD_DIR}" --output-on-failure

echo ""
echo "========== Running benchmarks =========="
"${BUILD_DIR}/pomai_cache_bench"
"${BUILD_DIR}/ai_artifact_bench"
"${BUILD_DIR}/vector_cache_bench"
"${BUILD_DIR}/prompt_cache_bench" --quick
"${BUILD_DIR}/pomai_cache_crash_harness"

echo ""
echo "All tests and benchmarks completed successfully."
5 changes: 5 additions & 0 deletions tests/crash_harness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <csignal>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <netinet/in.h>
#include <optional>
#include <string>
Expand Down Expand Up @@ -86,6 +87,10 @@ pid_t spawn_server(int port, const std::string &dir, const std::string &fsync) {
} // namespace

int main(int argc, char **argv) {
if (access("./pomai_cache_server", X_OK) != 0) {
std::cout << "SKIP: pomai_cache_server not found (build the server to run crash harness)\n";
return 0;
}
std::string fsync = "everysec";
if (argc > 1)
fsync = argv[1];
Expand Down
1 change: 0 additions & 1 deletion third_party/palloc
Submodule palloc deleted from 5dca0a
18 changes: 18 additions & 0 deletions vector_bench_results.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"vector_benchmarks": [
{"label":"dim128-1K","dim":128,"dataset_size":1000,"insert_ops_s":51700.9,"search_ops_s":4701.12,"search_p50_us":204.181,"search_p99_us":318.114,"memory_mb":0.586435,"bytes_per_vector":614.922,"recall_at_k":0.346,"sim_hit_rate":0.346,"quant_f16_ratio":2,"quant_i8_ratio":4},
{"label":"dim128-10K","dim":128,"dataset_size":10000,"insert_ops_s":5217.21,"search_ops_s":410.475,"search_p50_us":2420.76,"search_p99_us":2793.61,"memory_mb":5.87361,"bytes_per_vector":615.892,"recall_at_k":0.352,"sim_hit_rate":0.352,"quant_f16_ratio":2,"quant_i8_ratio":4},
{"label":"dim384-1K","dim":384,"dataset_size":1000,"insert_ops_s":52397.8,"search_ops_s":1679.02,"search_p50_us":593.619,"search_p99_us":753.053,"memory_mb":1.563,"bytes_per_vector":1638.92,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4},
{"label":"dim384-10K","dim":384,"dataset_size":10000,"insert_ops_s":5606.39,"search_ops_s":152.023,"search_p50_us":6556.22,"search_p99_us":7069.5,"memory_mb":15.6392,"bytes_per_vector":1639.89,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4},
{"label":"dim768-1K","dim":768,"dataset_size":1000,"insert_ops_s":50942.3,"search_ops_s":841.376,"search_p50_us":1184.97,"search_p99_us":1373.65,"memory_mb":3.02784,"bytes_per_vector":3174.92,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4},
{"label":"dim768-10K","dim":768,"dataset_size":10000,"insert_ops_s":5775.09,"search_ops_s":79.3694,"search_p50_us":12559.8,"search_p99_us":13241.6,"memory_mb":30.2877,"bytes_per_vector":3175.89,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4},
{"label":"dim1536-1K","dim":1536,"dataset_size":1000,"insert_ops_s":43567.7,"search_ops_s":408.786,"search_p50_us":2442.17,"search_p99_us":2671.27,"memory_mb":5.95753,"bytes_per_vector":6246.92,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4},
{"label":"dim1536-10K","dim":1536,"dataset_size":10000,"insert_ops_s":5547.31,"search_ops_s":39.6758,"search_p50_us":24957.7,"search_p99_us":29559.9,"memory_mb":59.5845,"bytes_per_vector":6247.89,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4}
],
"e2e_comparisons": [
{"scenario":"identical_prompts","exact_hit_rate":0,"sim_hit_rate":1,"hit_rate_boost":1,"avg_latency_us":296.367,"dollar_saved":15,"tokens_saved":500000},
{"scenario":"slight_rephrase","exact_hit_rate":0,"sim_hit_rate":1,"hit_rate_boost":1,"avg_latency_us":356.408,"dollar_saved":15,"tokens_saved":500000},
{"scenario":"moderate_rephrase","exact_hit_rate":0,"sim_hit_rate":1,"hit_rate_boost":1,"avg_latency_us":302.969,"dollar_saved":15,"tokens_saved":500000},
{"scenario":"heavy_rephrase","exact_hit_rate":0,"sim_hit_rate":0.878,"hit_rate_boost":0.878,"avg_latency_us":271.672,"dollar_saved":13.17,"tokens_saved":439000}
]
}
Loading