bitcoin-core
diff --git a/‎benchmark_analysis.ipynb‎
Lines changed: 102 additions & 0 deletions b/‎benchmark_analysis.ipynb‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎simd-bench.sh‎
Lines changed: 57 additions & 0 deletions b/‎simd-bench.sh‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎simd-build.sh‎
Lines changed: 30 additions & 0 deletions b/‎simd-build.sh‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎simd-test.sh‎
Lines changed: 30 additions & 0 deletions b/‎simd-test.sh‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎src/field_10x26_impl.h‎
Lines changed: 55 additions & 40 deletions b/‎src/field_10x26_impl.h‎
Lines changed: 55 additions & 40 deletions
@@ -0,0 +1,102 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b49ae6d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n",
+    "plt.rcParams['figure.figsize'] = (16, 10)\n",
+    "plt.rcParams['font.size'] = 11"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d236980d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def parse_csv(filepath):\n",
+    "    with open(filepath, 'r') as f:\n",
+    "        lines = f.readlines()[1:]\n",
+    "    \n",
+    "    data = []\n",
+    "    for line in lines:\n",
+    "        line = line.strip()\n",
+    "        if line and ',' in line and not line.endswith(','):\n",
+    "            parts = line.split(',')\n",
+    "            if len(parts) >= 3:\n",
+    "                try:\n",
+    "                    data.append({'Benchmark': parts[0].strip(), 'Time': float(parts[2])})\n",
+    "                except:\n",
+    "                    continue\n",
+    "    return pd.DataFrame(data)\n",
+    "\n",
+    "baseline = parse_csv('BASELINE_bench.csv')\n",
+    "custom = parse_csv('CUSTOM_AVX2_bench.csv')\n",
+    "\n",
+    "merged = baseline.merge(custom, on='Benchmark', suffixes=('_baseline', '_custom'))\n",
+    "merged['improvement'] = ((merged['Time_baseline'] - merged['Time_custom']) / merged['Time_baseline']) * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8442b12d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorted_data = merged.sort_values('improvement', ascending=False)\n",
+    "top10 = sorted_data.head(10)\n",
+    "bottom10 = sorted_data.tail(10)\n",
+    "filtered = pd.concat([top10, bottom10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa07550a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data = filtered.set_index('Benchmark')[['improvement']]\n",
+    "\n",
+    "plt.figure(figsize=(8, 12))\n",
+    "sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn', center=0, \n",
+    "            cbar_kws={'label': 'Performance Improvement (%)'})\n",
+    "plt.title('CUSTOM_AVX2 vs BASELINE Performance (Top/Bottom 10)', fontsize=14, fontweight='bold')\n",
+    "plt.ylabel('')\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,57 @@
+#!/bin/bash
+set -e
+
+options=("OFF" "ON")
+BENCH_ITERS=${SECP256K1_BENCH_ITERS:-20000}
+
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo > /dev/null
+sudo cpupower -c 0 frequency-set -g performance > /dev/null
+command -v taskset > /dev/null && TASKSET_CMD="taskset -c 0"
+
+run_bench() {
+  local dir=$1 bin=$2 log=$3
+  (
+    cd "$dir"
+    $TASKSET_CMD env SECP256K1_BENCH_ITERS=$BENCH_ITERS nice -n 0 ./bin/$bin >> "../../$log" 2>&1
+    echo "" >> "../../$log"
+  )
+}
+
+bench_all() {
+  local config="$1"
+  local dir="build/$config"
+  local log="${config}_bench.csv"
+
+  if [[ ! -d "$dir" ]]; then
+    echo -e "${RED}✖ $config${NC} (no dir)"
+    return 1
+  fi
+  
+  {
+    echo "Benchmark results for $config"
+    echo "Generated on $(date)"
+    echo "Iterations: $BENCH_ITERS"
+    echo ""
+  } > "$log"
+
+  for bin in bench bench_ecmult bench_internal; do
+    if run_bench "$dir" "$bin" "$log"; then
+      echo -e "  ${GREEN}✔ $bin${NC}"
+    else
+      echo -e "  ${RED}✖ $bin${NC}"
+      return 1
+    fi
+  done
+
+  echo -e "${GREEN}✔ $config${NC} (log: $log)"
+}
+
+bench_all "BASELINE"
+bench_all "CUSTOM_AVX2"
+
+echo -e "\n${YELLOW}All benchmarks successful. Logs in project root${NC}"
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+mkdir -p build
+
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+run_build() {
+  local config="$1"
+  local flags="-O3 -mavx2 $2"
+  local dir="build/$config"
+  local log="${config}_build.log"
+  
+  mkdir -p "$dir"
+  
+  if (cd "$dir" && cmake ../.. -G Ninja -DCMAKE_BUILD_TYPE=Release -DSECP256K1_APPEND_CFLAGS="$flags" >"../../$log" 2>&1 && ninja >>"../../$log" 2>&1); then
+    echo -e "${GREEN}✔ $config${NC}"
+  else
+    echo -e "${RED}✖ $config failed${NC}"
+    return 1
+  fi
+}
+
+run_build "BASELINE"    "-U__AVX2__"
+run_build "CUSTOM_AVX2" "-D__AVX2__"
+
+echo -e "\n${YELLOW}All builds done. Logs in project root${NC}"
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+run_test() {
+  local config="$1"
+  local dir="build/$config"
+  local log="${config}_test.log"
+
+  if [[ ! -d "$dir" ]]; then
+    echo -e "${RED}✖ $config${NC} (no dir)"
+    return 1
+  fi
+
+  if (cd "$dir" && ctest --output-on-failure -j"$(nproc)" &> "../../$log"); then
+    echo -e "${GREEN}✔ $config${NC} (log: $log)"
+  else
+    echo -e "${RED}✖ $config${NC} (log: $log)"
+    return 1
+  fi
+}
+
+run_test "BASELINE"
+run_test "CUSTOM_AVX2"
+
+echo -e "\n${YELLOW}All tests passed. Logs in project root${NC}"
@@ -12,6 +12,10 @@
 #include "field.h"
 #include "modinv32_impl.h"
 
+#ifdef X86
+# include <immintrin.h>
+#endif
+
 #ifdef VERIFY
 static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
     const uint32_t *d = a->n;
@@ -38,16 +42,20 @@ static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
 #endif
 
 static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m) {
-    r->n[0] = 0x3FFFFFFUL * 2 * m;
-    r->n[1] = 0x3FFFFFFUL * 2 * m;
-    r->n[2] = 0x3FFFFFFUL * 2 * m;
-    r->n[3] = 0x3FFFFFFUL * 2 * m;
-    r->n[4] = 0x3FFFFFFUL * 2 * m;
-    r->n[5] = 0x3FFFFFFUL * 2 * m;
-    r->n[6] = 0x3FFFFFFUL * 2 * m;
-    r->n[7] = 0x3FFFFFFUL * 2 * m;
-    r->n[8] = 0x3FFFFFFUL * 2 * m;
-    r->n[9] = 0x03FFFFFUL * 2 * m;
+    const uint64_t two_m = 2 * m;
+    const uint64_t bound1 = 0x3FFFFFFUL * two_m;
+    const uint64_t bound2 = 0x03FFFFFUL * two_m;
+
+    r->n[0] = bound1;
+    r->n[1] = bound1;
+    r->n[2] = bound1;
+    r->n[3] = bound1;
+    r->n[4] = bound1;
+    r->n[5] = bound1;
+    r->n[6] = bound1;
+    r->n[7] = bound1;
+    r->n[8] = bound1;
+    r->n[9] = bound2;
 }
 
 static void secp256k1_fe_impl_normalize(secp256k1_fe *r) {
@@ -257,8 +265,8 @@ static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r) {
 }
 
 SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) {
+    memset(r->n, 0, sizeof(r->n));
     r->n[0] = a;
-    r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
 }
 
 SECP256K1_INLINE static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a) {
@@ -272,12 +280,11 @@ SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) {
 
 static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
     int i;
+    int diff;
     for (i = 9; i >= 0; i--) {
-        if (a->n[i] > b->n[i]) {
-            return 1;
-        }
-        if (a->n[i] < b->n[i]) {
-            return -1;
+        diff = (a->n[i] > b->n[i]) - (a->n[i] < b->n[i]);
+        if (diff != 0) {
+            return diff;
         }
     }
     return 0;
@@ -338,24 +345,32 @@ static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) {
 }
 
 SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) {
+    const uint32_t two_m1 = 2 * (m + 1);
+
+    const uint32_t bound1 = 0x3FFFC2FUL * two_m1;
+    const uint32_t bound2 = 0x3FFFFBFUL * two_m1;
+    const uint32_t bound3 = 0x3FFFFFFUL * two_m1;
+    const uint32_t bound4 = 0x03FFFFFUL * two_m1;
+
     /* For all legal values of m (0..31), the following properties hold: */
-    VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
-    VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
-    VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
-    VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m);
+    VERIFY_CHECK(bound1 >= 0x3FFFFFFUL * 2 * m);
+    VERIFY_CHECK(bound2 >= 0x3FFFFFFUL * 2 * m);
+    VERIFY_CHECK(bound3 >= 0x3FFFFFFUL * 2 * m);
+    VERIFY_CHECK(bound4 >= 0x03FFFFFUL * 2 * m);
 
     /* Due to the properties above, the left hand in the subtractions below is never less than
      * the right hand. */
-    r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
-    r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
-    r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
-    r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
-    r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
-    r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
-    r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
-    r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
-    r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
-    r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
+
+    r->n[0] = bound1 - a->n[0];
+    r->n[1] = bound2 - a->n[1];
+    r->n[2] = bound3 - a->n[2];
+    r->n[3] = bound3 - a->n[3];
+    r->n[4] = bound3 - a->n[4];
+    r->n[5] = bound3 - a->n[5];
+    r->n[6] = bound3 - a->n[6];
+    r->n[7] = bound3 - a->n[7];
+    r->n[8] = bound3 - a->n[8];
+    r->n[9] = bound4 - a->n[9];
 }
 
 SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a) {
@@ -1111,26 +1126,26 @@ static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r,
 }
 
 static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
-    r->n[0] = a->n[0] | a->n[1] << 26;
-    r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
+    r->n[0] = a->n[0]       | a->n[1] << 26;
+    r->n[1] = a->n[1] >> 6  | a->n[2] << 20;
     r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
     r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
     r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
-    r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
+    r->n[5] = a->n[6] >> 4  | a->n[7] << 22;
     r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
     r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
 }
 
 static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
     r->n[0] = a->n[0] & 0x3FFFFFFUL;
-    r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
-    r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
-    r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
-    r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
+    r->n[1] = a->n[0] >> 26  | ((a->n[1] << 6)   & 0x3FFFFFFUL);
+    r->n[2] = a->n[1] >> 20  | ((a->n[2] << 12)  & 0x3FFFFFFUL);
+    r->n[3] = a->n[2] >> 14  | ((a->n[3] << 18)  & 0x3FFFFFFUL);
+    r->n[4] = a->n[3] >> 8   | ((a->n[4] << 24)  & 0x3FFFFFFUL);
     r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
-    r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
-    r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
-    r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
+    r->n[6] = a->n[4] >> 28  | ((a->n[5] << 4)   & 0x3FFFFFFUL);
+    r->n[7] = a->n[5] >> 22  | ((a->n[6] << 10)  & 0x3FFFFFFUL);
+    r->n[8] = a->n[6] >> 16  | ((a->n[7] << 16)  & 0x3FFFFFFUL);
     r->n[9] = a->n[7] >> 10;
 }