diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 889e4cf..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "third_party/palloc"] - path = third_party/palloc - url = https://github.com/AutoCookies/palloc.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 0de9154..a24db3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,10 +7,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) include_directories(include third_party) -set(PA_OVERRIDE ON) -set(PA_BUILD_TESTS OFF CACHE BOOL "Disable palloc tests for embedded pomai_cache" FORCE) -add_subdirectory(third_party/palloc) - add_library(pomaicache SHARED src/engine/engine.cpp src/engine/ssd_store.cpp @@ -28,8 +24,6 @@ add_library(pomaicache SHARED src/bindings/c_api.cc ) -target_link_libraries(pomaicache PUBLIC palloc) - include(CTest) if(BUILD_TESTING) add_library(mini_catch_main third_party/catch2/catch_main.cpp) @@ -40,8 +34,12 @@ if(BUILD_TESTING) add_executable(test_ai_cache tests/test_ai_cache.cpp) target_link_libraries(test_ai_cache PRIVATE pomaicache mini_catch_main) + add_executable(test_prompt_cache tests/test_prompt_cache.cpp) + target_link_libraries(test_prompt_cache PRIVATE pomaicache mini_catch_main) + add_test(NAME test_engine COMMAND test_engine) add_test(NAME test_ai_cache COMMAND test_ai_cache) + add_test(NAME test_prompt_cache COMMAND test_prompt_cache) if(NOT WIN32) add_executable(test_integration tests/test_integration.cpp) diff --git a/ai_bench_summary.json b/ai_bench_summary.json new file mode 100644 index 0000000..d97bfcc --- /dev/null +++ b/ai_bench_summary.json @@ -0,0 +1,11 @@ +{ + "workloads": [ + {"name":"embedding_zipf","ops_s":459.52,"p50_us":130.28,"p95_us":11175.92,"p99_us":12889.39,"p999_us":14281.39,"hit_rate":0.82}, + {"name":"prompt_response_churn","ops_s":155.11,"p50_us":842.28,"p95_us":21994.37,"p99_us":25153.57,"p999_us":45459.81,"hit_rate":0.73}, + {"name":"rerank_ttl_storm","ops_s":323.85,"p50_us":549.67,"p95_us":10760.80,"p99_us":11009.46,"p999_us":11728.75,"hit_rate":0.75}, + {"name":"mixed_rag_pipeline","ops_s":51.31,"p50_us":8994.29,"p95_us":61316.55,"p99_us":69901.89,"p999_us":113447.43,"hit_rate":0.74} + ], + "ssd_mb_s": 0.0, + "engine_ctor_ms": 0.18, + "dedup_ratio": 0.0 +} diff --git a/bench/prompt_cache_bench.cpp b/bench/prompt_cache_bench.cpp index fa9a5bd..7d838dc 100644 --- a/bench/prompt_cache_bench.cpp +++ b/bench/prompt_cache_bench.cpp @@ -213,9 +213,15 @@ run_prompt_workload_embedded(const std::string &name, } int main(int argc, char **argv) { + bool quick = false; std::string json_out = "prompt_cache_bench.json"; - if (argc > 1) - json_out = argv[1]; + for (int i = 1; i < argc; ++i) { + if (std::string(argv[i]) == "--quick") + quick = true; + else + json_out = argv[i]; + } + const int ops_per_workload = quick ? 500 : 5000; EngineConfig cfg; cfg.memory_limit_bytes = 64 * 1024 * 1024; @@ -234,19 +240,22 @@ int main(int argc, char **argv) { PromptCacheManager pcm(engine, ai, pcfg); std::cout << std::fixed << std::setprecision(2); - std::cout << "Embedded token/prompt cache benchmark (in-process, no network)\n"; + std::cout << "Embedded token/prompt cache benchmark (in-process, no network)"; + if (quick) + std::cout << " [--quick]"; + std::cout << "\n"; std::vector results; results.push_back(run_prompt_workload("chatty_short_sessions", pcm, - 5000, + ops_per_workload, 200, // hot prefixes 128, // max tokens per prompt 0.30 // writes )); results.push_back(run_prompt_workload("long_lived_system_prompts", pcm, - 5000, + ops_per_workload, 50, // fewer prefixes, more reuse 256, // max tokens per prompt 0.10 // mostly reads @@ -259,7 +268,7 @@ int main(int argc, char **argv) { pomaicache::PomaiCache cache(embedded_cfg); results.push_back(run_prompt_workload_embedded("embedded_api_hot_prompts", cache, - 5000, + ops_per_workload, 100, 256, 0.25)); diff --git a/prompt_cache_bench.json b/prompt_cache_bench.json new file mode 100644 index 0000000..0c92e65 --- /dev/null +++ b/prompt_cache_bench.json @@ -0,0 +1,7 @@ +{ + "prompt_cache_workloads": [ + {"name":"chatty_short_sessions","ops_s":142.875,"p50_us":5932.13,"p95_us":13378.5,"p99_us":20634.9,"hit_rate":0.143678,"avg_savings_ratio":0.00735079}, + {"name":"long_lived_system_prompts","ops_s":67.667,"p50_us":14609.5,"p95_us":17571,"p99_us":24018.2,"hit_rate":0.569869,"avg_savings_ratio":0.0215384}, + {"name":"embedded_api_hot_prompts","ops_s":137.045,"p50_us":8095.73,"p95_us":13802.2,"p99_us":14491.8,"hit_rate":0,"avg_savings_ratio":0} + ] +} diff --git a/scripts/run_tests_and_benches.sh b/scripts/run_tests_and_benches.sh new file mode 100755 index 0000000..e047259 --- /dev/null +++ b/scripts/run_tests_and_benches.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Run all unit tests and embedded benchmarks (no server required). +# Use prompt_cache_bench --quick so the full run completes in reasonable time. +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +BUILD_DIR="${BUILD_DIR:-${ROOT}/build}" +mkdir -p "${BUILD_DIR}" + +cmake -S "${ROOT}" -B "${BUILD_DIR}" -DBUILD_TESTING=ON -DBUILD_PYTHON_BINDINGS=OFF +cmake --build "${BUILD_DIR}" -j + +echo "========== Running tests ==========" +ctest --test-dir "${BUILD_DIR}" --output-on-failure + +echo "" +echo "========== Running benchmarks ==========" +"${BUILD_DIR}/pomai_cache_bench" +"${BUILD_DIR}/ai_artifact_bench" +"${BUILD_DIR}/vector_cache_bench" +"${BUILD_DIR}/prompt_cache_bench" --quick +"${BUILD_DIR}/pomai_cache_crash_harness" + +echo "" +echo "All tests and benchmarks completed successfully." diff --git a/tests/crash_harness.cpp b/tests/crash_harness.cpp index 72b4439..a6f44e8 100644 --- a/tests/crash_harness.cpp +++ b/tests/crash_harness.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,10 @@ pid_t spawn_server(int port, const std::string &dir, const std::string &fsync) { } // namespace int main(int argc, char **argv) { + if (access("./pomai_cache_server", X_OK) != 0) { + std::cout << "SKIP: pomai_cache_server not found (build the server to run crash harness)\n"; + return 0; + } std::string fsync = "everysec"; if (argc > 1) fsync = argv[1]; diff --git a/third_party/palloc b/third_party/palloc deleted file mode 160000 index 5dca0af..0000000 --- a/third_party/palloc +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5dca0af3c6d59853e5a41d6b79d5f9955867e28c diff --git a/vector_bench_results.json b/vector_bench_results.json new file mode 100644 index 0000000..1637d33 --- /dev/null +++ b/vector_bench_results.json @@ -0,0 +1,18 @@ +{ + "vector_benchmarks": [ + {"label":"dim128-1K","dim":128,"dataset_size":1000,"insert_ops_s":51700.9,"search_ops_s":4701.12,"search_p50_us":204.181,"search_p99_us":318.114,"memory_mb":0.586435,"bytes_per_vector":614.922,"recall_at_k":0.346,"sim_hit_rate":0.346,"quant_f16_ratio":2,"quant_i8_ratio":4}, + {"label":"dim128-10K","dim":128,"dataset_size":10000,"insert_ops_s":5217.21,"search_ops_s":410.475,"search_p50_us":2420.76,"search_p99_us":2793.61,"memory_mb":5.87361,"bytes_per_vector":615.892,"recall_at_k":0.352,"sim_hit_rate":0.352,"quant_f16_ratio":2,"quant_i8_ratio":4}, + {"label":"dim384-1K","dim":384,"dataset_size":1000,"insert_ops_s":52397.8,"search_ops_s":1679.02,"search_p50_us":593.619,"search_p99_us":753.053,"memory_mb":1.563,"bytes_per_vector":1638.92,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4}, + {"label":"dim384-10K","dim":384,"dataset_size":10000,"insert_ops_s":5606.39,"search_ops_s":152.023,"search_p50_us":6556.22,"search_p99_us":7069.5,"memory_mb":15.6392,"bytes_per_vector":1639.89,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4}, + {"label":"dim768-1K","dim":768,"dataset_size":1000,"insert_ops_s":50942.3,"search_ops_s":841.376,"search_p50_us":1184.97,"search_p99_us":1373.65,"memory_mb":3.02784,"bytes_per_vector":3174.92,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4}, + {"label":"dim768-10K","dim":768,"dataset_size":10000,"insert_ops_s":5775.09,"search_ops_s":79.3694,"search_p50_us":12559.8,"search_p99_us":13241.6,"memory_mb":30.2877,"bytes_per_vector":3175.89,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4}, + {"label":"dim1536-1K","dim":1536,"dataset_size":1000,"insert_ops_s":43567.7,"search_ops_s":408.786,"search_p50_us":2442.17,"search_p99_us":2671.27,"memory_mb":5.95753,"bytes_per_vector":6246.92,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4}, + {"label":"dim1536-10K","dim":1536,"dataset_size":10000,"insert_ops_s":5547.31,"search_ops_s":39.6758,"search_p50_us":24957.7,"search_p99_us":29559.9,"memory_mb":59.5845,"bytes_per_vector":6247.89,"recall_at_k":0.334,"sim_hit_rate":0.334,"quant_f16_ratio":2,"quant_i8_ratio":4} + ], + "e2e_comparisons": [ + {"scenario":"identical_prompts","exact_hit_rate":0,"sim_hit_rate":1,"hit_rate_boost":1,"avg_latency_us":296.367,"dollar_saved":15,"tokens_saved":500000}, + {"scenario":"slight_rephrase","exact_hit_rate":0,"sim_hit_rate":1,"hit_rate_boost":1,"avg_latency_us":356.408,"dollar_saved":15,"tokens_saved":500000}, + {"scenario":"moderate_rephrase","exact_hit_rate":0,"sim_hit_rate":1,"hit_rate_boost":1,"avg_latency_us":302.969,"dollar_saved":15,"tokens_saved":500000}, + {"scenario":"heavy_rephrase","exact_hit_rate":0,"sim_hit_rate":0.878,"hit_rate_boost":0.878,"avg_latency_us":271.672,"dollar_saved":13.17,"tokens_saved":439000} + ] +}