-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrun_sql_tests.py
More file actions
executable file
·811 lines (720 loc) · 34.1 KB
/
run_sql_tests.py
File metadata and controls
executable file
·811 lines (720 loc) · 34.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
#!/usr/bin/env python3
"""
MemCP SQL Test Runner (Optimized)
Runs structured SQL/SPARQL tests from YAML files.
- Declarative tests
- Unified execution path
- Compact success logging, verbose failure logging
Requirements (pip): requests, PyYAML
If missing, create a venv and install, e.g.:
python3 -m venv .venv && . .venv/bin/activate && pip install -U requests PyYAML
"""
import sys
import os
# Dependency checks with clear install hints
try:
import yaml # PyYAML
except Exception as e:
print("Missing dependency: PyYAML (module 'yaml').")
print("Install with: pip install -U PyYAML")
sys.exit(2)
try:
import requests
except Exception as e:
print("Missing dependency: requests.")
print("Install with: pip install -U requests")
sys.exit(2)
import json
import subprocess
import time
import threading
import multiprocessing
from pathlib import Path
from base64 import b64encode
from typing import Dict, List, Any, Optional, Tuple
from urllib.parse import quote
# CPU measurement helpers
NUM_CPUS = multiprocessing.cpu_count()
def find_memcp_pid() -> Optional[int]:
"""Find the PID of the memcp process."""
try:
result = subprocess.run(['pgrep', '-f', 'memcp'], capture_output=True, text=True, timeout=2)
pids = result.stdout.strip().split('\n')
for pid_str in pids:
if pid_str.strip():
return int(pid_str.strip())
except:
pass
return None
def get_process_cpu_times(pid: int) -> Optional[Tuple[float, float]]:
"""Get user and system CPU times for a process from /proc/[pid]/stat.
Returns (utime + cutime, stime + cstime) in seconds, or None if unavailable."""
try:
with open(f'/proc/{pid}/stat', 'r') as f:
parts = f.read().split()
# Fields: utime(14), stime(15), cutime(16), cstime(17) - 1-indexed in docs, 0-indexed here
utime = int(parts[13]) # user time
stime = int(parts[14]) # system time
cutime = int(parts[15]) # children user time
cstime = int(parts[16]) # children system time
# Convert from clock ticks to seconds (typically 100 Hz = 100 ticks/sec)
hz = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
total_time = (utime + stime + cutime + cstime) / hz
return total_time
except:
return None
def measure_cpu_load(pid: int, start_cpu: float, end_cpu: float, elapsed_sec: float) -> Optional[float]:
"""Calculate CPU load as percentage of total CPU capacity.
Returns percentage where 100% = one core fully utilized, NUM_CPUS*100% = all cores."""
if start_cpu is None or end_cpu is None or elapsed_sec <= 0:
return None
cpu_used = end_cpu - start_cpu
# CPU load as percentage of wall time (100% = 1 core, 200% = 2 cores, etc.)
return (cpu_used / elapsed_sec) * 100
# Global flag for connect-only mode
is_connect_only_mode = False
# Performance test configuration
PERF_TEST_ENABLED = os.environ.get("PERF_TEST", "0") == "1"
PERF_CALIBRATE = os.environ.get("PERF_CALIBRATE", "0") == "1" # reset baselines to current times
PERF_NORECALIBRATE = os.environ.get("PERF_NORECALIBRATE", "0") == "1" # freeze row counts for bisecting
PERF_EXPLAIN = os.environ.get("PERF_EXPLAIN", "0") == "1" # show query plans
PERF_BASELINE_FILE = ".perf_baseline.json"
PERF_THRESHOLD_FACTOR = 1.1 # 10% tolerance over baseline
PERF_TARGET_MIN_MS = 10000 # target minimum query time (10s)
PERF_TARGET_MAX_MS = 20000 # target maximum query time (20s)
PERF_SCALE_FACTOR = 1.3 # scale up/down by 30%
PERF_DEFAULT_ROWS = 10000 # default starting row count
PERF_MAX_ROWS = 10_000_000 # allow large datasets for proper calibration
PERF_MAX_RAM_FRACTION = 0.3 # max 30% of RAM for table data
def get_max_rows_for_ram(bytes_per_row: int = 100) -> int:
"""Calculate max rows based on available RAM (30% limit)."""
try:
with open('/proc/meminfo', 'r') as f:
for line in f:
if line.startswith('MemTotal:'):
total_kb = int(line.split()[1])
max_bytes = int(total_kb * 1024 * PERF_MAX_RAM_FRACTION)
return max_bytes // bytes_per_row
except:
pass
return 10_000_000 # fallback: 10M rows
class SQLTestRunner:
def __init__(self, base_url="http://localhost:4321", username="root", password="admin", default_database="memcp-tests", suite_db_cleanup=True):
self.base_url = base_url
self.username = username
self.password = password
self.default_database = default_database
self.suite_db_cleanup = suite_db_cleanup
self.auth_header = self._create_auth_header()
self.test_count = 0
self.test_passed = 0
self.failed_tests = [] # list of (name, is_noncritical)
self.failed_critical = 0
self.failed_noncritical = 0
self.noncritical_count = 0
self.noncritical_passed = 0
self.setup_operations = []
self.current_database = None
self._ensured_dbs = set()
self.suite_metadata = {}
self._restart_handler = None # callable to restart memcp between tests
self.suite_syntax = None
self.perf_baselines = {} # test_name -> {"time_ms": float, "rows": int}
self.perf_results = {} # test_name -> {"time_ms": float, "rows": int}
def set_restart_handler(self, fn):
"""Install a restart handler callable that restarts MemCP (returns True on success)."""
self._restart_handler = fn
def load_perf_baselines(self):
"""Load performance baselines from config file."""
try:
with open(PERF_BASELINE_FILE, 'r') as f:
self.perf_baselines = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
self.perf_baselines = {}
def save_perf_baselines(self):
"""Save updated performance baselines to config file.
All tests in a suite share the same row count (based on slowest test)
to avoid issues with DELETE operations.
"""
if not self.perf_results:
return
max_rows = get_max_rows_for_ram()
if PERF_NORECALIBRATE:
# Just update times, keep existing rows
for name, result in self.perf_results.items():
current_rows = result["rows"]
if name in self.perf_baselines:
self.perf_baselines[name]["time_ms"] = round(result["time_ms"], 1)
else:
self.perf_baselines[name] = {"time_ms": round(result["time_ms"], 1), "rows": current_rows}
else:
# Scale each test independently to target 10-20s
for name, result in self.perf_results.items():
current_rows = result["rows"]
test_time = result["time_ms"]
if test_time < PERF_TARGET_MIN_MS:
new_rows = int(current_rows * PERF_SCALE_FACTOR)
elif test_time > PERF_TARGET_MAX_MS:
new_rows = max(1000, int(current_rows / PERF_SCALE_FACTOR))
else:
new_rows = current_rows
# Apply RAM limit and hard cap
new_rows = min(new_rows, max_rows, PERF_MAX_ROWS)
self.perf_baselines[name] = {
"time_ms": round(test_time, 1),
"rows": new_rows
}
with open(PERF_BASELINE_FILE, 'w') as f:
json.dump(self.perf_baselines, f, indent=2)
print(f"📏 Updated performance baselines in {PERF_BASELINE_FILE}")
# ----------------------
# SQL identifier quoting
# ----------------------
def _quote_ident(self, name: str) -> str:
if name is None:
return "``"
# Escape backticks by doubling them
return f"`{str(name).replace('`', '``')}`"
# ----------------------
# Helpers
# ----------------------
def _create_auth_header(self):
credentials = f"{self.username}:{self.password}"
encoded = b64encode(credentials.encode()).decode()
return {"Authorization": f"Basic {encoded}"}
def _record_success(self, name: str, is_noncritical: bool = False, elapsed_ms: float = None, threshold_ms: float = None, rows: int = None, heap_mb: float = None, cpu_pct: float = None):
self.test_passed += 1
if elapsed_ms is not None and threshold_ms is not None:
rows_info = f", {rows:,} rows" if rows else ""
# Calculate time per row in microseconds
if rows and rows > 0:
us_per_row = (elapsed_ms * 1000) / rows
rate_info = f", {us_per_row:.2f}µs/row"
else:
rate_info = ""
# Show heap memory if available
mem_info = f", {heap_mb:.1f}MB heap" if heap_mb else ""
# Show CPU load as percentage of total capacity (100%/Ncores = one core)
cpu_info = f", {cpu_pct:.0f}%/{NUM_CPUS*100}% CPU" if cpu_pct is not None else ""
print(f"✅ {name} ({elapsed_ms:.1f}ms / {threshold_ms:.0f}ms{rows_info}{rate_info}{mem_info}{cpu_info})")
else:
print(f"✅ {name}")
if is_noncritical:
self.noncritical_passed += 1
print(f" ⚠️ Passed but flagged noncritical — enable soon")
def _record_fail(self, name: str, reason: str, query: str, response: Optional[requests.Response], expect, is_noncritical: bool = False, elapsed_ms: float = None, threshold_ms: float = None):
self.failed_tests.append((name, is_noncritical))
if is_noncritical:
self.failed_noncritical += 1
else:
self.failed_critical += 1
time_info = f" ({elapsed_ms:.1f}ms / {threshold_ms:.0f}ms)" if elapsed_ms is not None else ""
print(f"❌ {name}{' (noncritical)' if is_noncritical else ''}{time_info}")
print(f" Reason: {reason}")
if query:
print(f" Query: {query[:200]}{'...' if len(query) > 200 else ''}")
if response is not None:
print(f" HTTP {response.status_code}: {response.text[:500]}{'...' if len(response.text) > 500 else ''}")
if expect is not None:
print(f" Expected: {expect}\n")
# ----------------------
# Core execution
# ----------------------
def ensure_database(self, database: str) -> None:
if not database or database == "system" or database in self._ensured_dbs:
return
try:
url = f"{self.base_url}/sql/system"
create_db_sql = f"CREATE DATABASE IF NOT EXISTS {self._quote_ident(database)}"
requests.post(url, data=create_db_sql, headers=self.auth_header, timeout=10)
# verify availability with a lightweight call
check_url = f"{self.base_url}/sql/{quote(database, safe='')}"
for _ in range(3):
resp = requests.post(check_url, data="SHOW TABLES", headers=self.auth_header, timeout=10)
if resp is not None and "database" not in resp.text.lower():
self._ensured_dbs.add(database)
break
time.sleep(0.1)
except Exception:
pass
def execute_sql(self, database: str, query: str, auth_header: Optional[Dict[str, str]] = None, syntax: Optional[str] = None, session_id: Optional[str] = None, timeout: int = 10) -> Optional[requests.Response]:
# proactively ensure database exists (works for connect-only too)
self.ensure_database(database)
encoded_db = quote(database, safe='')
normalized = self._normalize_syntax(syntax)
route = "psql" if normalized == "postgresql" else "sql"
url = f"{self.base_url}/{route}/{encoded_db}"
headers = dict(auth_header if auth_header is not None else self.auth_header)
if session_id:
headers["X-Session-Id"] = session_id
# Try request; if connection fails, wait for memcp to be ready and retry a few times
for attempt in range(5):
try:
return requests.post(url, data=query, headers=headers, timeout=timeout)
except Exception:
# parse port from base_url
try:
port = int(self.base_url.rsplit(':', 1)[1])
except Exception:
port = 4321
wait_for_memcp(port, timeout=5)
return None
def _normalize_syntax(self, syntax: Optional[str]) -> Optional[str]:
if not syntax:
return None
syntax_lower = str(syntax).strip().lower()
if syntax_lower in ["mysql", "default"]:
return None
if syntax_lower in ["postgres", "postgresql", "psql"]:
return "postgresql"
return syntax_lower
def execute_sparql(self, database: str, query: str, auth_header: Optional[Dict[str, str]] = None, timeout: int = 10) -> Optional[requests.Response]:
try:
encoded_db = quote(database, safe='')
url = f"{self.base_url}/rdf/{encoded_db}"
headers = auth_header if auth_header is not None else self.auth_header
return requests.post(url, data=query, headers=headers, timeout=timeout)
except Exception as e:
print(f"Error executing SPARQL: {e}")
return None
def load_ttl(self, database: str, ttl_data: str) -> bool:
try:
self.ensure_database(database)
url = f"{self.base_url}/rdf/{quote(database, safe='')}/load_ttl"
response = requests.post(url, data=ttl_data, headers=self.auth_header, timeout=10)
return response is not None and response.status_code == 200
except Exception as e:
print(f"Error loading TTL data: {e}")
return False
def parse_jsonl_response(self, response: requests.Response) -> Optional[List[Dict]]:
if response is None:
return None
text = response.text.strip()
if not text:
return []
results = []
for line in text.splitlines():
try:
results.append(json.loads(line))
except:
continue
return results
# ----------------------
# Test execution
# ----------------------
def run_test_case(self, test_case: Dict, database: str) -> bool:
self.test_count += 1
name = test_case.get("name", f"Test {self.test_count}")
is_noncritical = bool(test_case.get("noncritical"))
if is_noncritical:
self.noncritical_count += 1
# Performance test handling
yaml_threshold_ms = test_case.get("threshold_ms")
is_perf_test = yaml_threshold_ms is not None
perf_rows = PERF_DEFAULT_ROWS # default row count
if is_perf_test:
# Get baseline data if available
baseline = self.perf_baselines.get(name, {})
if isinstance(baseline, dict):
baseline_time = baseline.get("time_ms")
baseline_rows = baseline.get("rows", PERF_DEFAULT_ROWS)
else:
# Legacy format: just a number
baseline_time = baseline
baseline_rows = PERF_DEFAULT_ROWS
# Use baseline time × 1.3 as threshold ONLY if in target range
# During scaling, use YAML threshold (generous)
if baseline_time and not PERF_CALIBRATE and baseline_time >= PERF_TARGET_MIN_MS:
threshold_ms = baseline_time * PERF_THRESHOLD_FACTOR
else:
threshold_ms = yaml_threshold_ms
# Use baseline rows (which may have been scaled)
perf_rows = baseline_rows
if not PERF_TEST_ENABLED:
print(f"⏭️ {name} (skipped - set PERF_TEST=1 to run)")
self.test_count -= 1 # don't count skipped perf tests
return True
# Per-test setup steps (SQL and/or Scheme, e.g. perf data generation)
# Supports {rows} and {database} template placeholders for perf tests
test_setup_steps = test_case.get("setup")
heap_bytes = 0
if test_setup_steps and isinstance(test_setup_steps, list):
for step in test_setup_steps:
if "sql" in step:
sql_code = step["sql"]
if is_perf_test:
sql_code = sql_code.replace("{rows}", str(perf_rows)).replace("{database}", database)
resp = self.execute_sql(database, sql_code, syntax=self.suite_syntax)
elif "scm" in step:
scm = step["scm"]
if is_perf_test:
scm = scm.replace("{rows}", str(perf_rows)).replace("{database}", database)
url = f"{self.base_url}/scm"
try:
resp = requests.post(url, data=scm, headers=self.auth_header, timeout=600)
except Exception as e:
return self._record_fail(name, f"Setup SCM error: {e}", scm, None, None, is_noncritical)
if resp is None or resp.status_code != 200:
return self._record_fail(name, "Setup SCM failed", scm, resp, None, is_noncritical)
# Extract heap stats from response if available
try:
result = json.loads(resp.text.strip())
if isinstance(result, list) and len(result) >= 2:
heap_bytes = result[1]
except:
pass
# Scheme code execution via /scm endpoint
scm_code = test_case.get("scm")
if scm_code:
if is_perf_test:
scm_code = scm_code.replace("{rows}", str(perf_rows)).replace("{database}", database)
expect = test_case.get("expect", {})
expect_error = expect.get("error", False)
try:
url = f"{self.base_url}/scm"
resp = requests.post(url, data=scm_code, headers=self.auth_header, timeout=600)
except Exception as e:
if expect_error:
self._record_success(name, is_noncritical)
return True
return self._record_fail(name, f"SCM exception: {e}", scm_code, None, None, is_noncritical)
if resp is None:
if expect_error:
self._record_success(name, is_noncritical)
return True
return self._record_fail(name, "No response from /scm", scm_code, None, None, is_noncritical)
if expect_error:
if resp.status_code != 200:
self._record_success(name, is_noncritical)
return True
return self._record_fail(name, f"Expected error but got 200: {resp.text[:200]}", scm_code, None, None, is_noncritical)
if resp.status_code != 200:
return self._record_fail(name, f"SCM error ({resp.status_code}): {resp.text[:200]}", scm_code, None, None, is_noncritical)
self._record_success(name, is_noncritical)
return True
query = test_case.get("sql") or test_case.get("sparql")
if query and is_perf_test:
query = query.replace("{rows}", str(perf_rows)).replace("{database}", database)
is_sparql = "sparql" in test_case
# auth: allow per-test overrides, fallback to suite metadata, then runner defaults
tc_user = test_case.get("username") or self.suite_metadata.get("username") or self.username
tc_pass = test_case.get("password") or self.suite_metadata.get("password") or self.password
creds = f"{tc_user}:{tc_pass}".encode()
auth_header = {"Authorization": f"Basic {b64encode(creds).decode()}"}
test_syntax = test_case.get("syntax")
active_syntax = self._normalize_syntax(test_syntax) if test_syntax is not None else self.suite_syntax
session_id = test_case.get("session_id")
sql_timeout = int(test_case.get("timeout", 10))
# TTL preload if SPARQL
if is_sparql and "ttl_data" in test_case:
if not self.load_ttl(database, test_case["ttl_data"]):
return self._record_fail(name, "TTL load failed", query, None, None)
# Special handling: SHUTDOWN command triggers graceful restart flow
response: Optional[requests.Response]
cpu_pct = None # CPU load percentage, measured during query execution
if query and query.strip().upper() == "SHUTDOWN":
# Issue shutdown
resp = self.execute_sql(database, query, auth_header, active_syntax)
if resp is not None and resp.status_code >= 500:
response = resp
else:
# Treat SHUTDOWN as successful regardless of response body, even if the connection closed.
if self._restart_handler is not None:
self._restart_handler()
else:
# No restart handler (--connect-only): wait for external supervisor to restart
import time as _time
for _i in range(60):
_time.sleep(1)
try:
if requests.get(self.base_url, timeout=2).status_code < 500:
break
except Exception:
pass
self._record_success(name, is_noncritical)
return True
response = resp
else:
# Show query plan if PERF_EXPLAIN is enabled
if is_perf_test and PERF_EXPLAIN and not is_sparql:
explain_resp = self.execute_sql(database, f"DESCRIBE {query}", auth_header, active_syntax)
if explain_resp and explain_resp.status_code == 200:
print(f" 📋 Query plan for {name}:")
for line in explain_resp.text.strip().split('\n')[:10]:
print(f" {line[:120]}")
# Warmup runs for performance tests (2 unmeasured runs before the measured one)
if is_perf_test and test_case.get("warmup", True):
for _ in range(2):
self.execute_sparql(database, query, auth_header, timeout=sql_timeout) if is_sparql else self.execute_sql(database, query, auth_header, active_syntax, timeout=sql_timeout)
# Get memcp PID and start CPU measurement for perf tests
memcp_pid = find_memcp_pid() if is_perf_test else None
start_cpu = get_process_cpu_times(memcp_pid) if memcp_pid else None
# Execute query (with timing for perf tests)
start_time = time.monotonic()
response = self.execute_sparql(database, query, auth_header, timeout=sql_timeout) if is_sparql else self.execute_sql(database, query, auth_header, active_syntax, session_id=session_id, timeout=sql_timeout)
elapsed_ms = (time.monotonic() - start_time) * 1000
elapsed_sec = elapsed_ms / 1000
# End CPU measurement
end_cpu = get_process_cpu_times(memcp_pid) if memcp_pid else None
cpu_pct = measure_cpu_load(memcp_pid, start_cpu, end_cpu, elapsed_sec) if memcp_pid else None
if response is None:
return self._record_fail(name, "No response", query, None, None, is_noncritical)
results = self.parse_jsonl_response(response)
# Check performance threshold
if is_perf_test and elapsed_ms > threshold_ms:
return self._record_fail(name, f"Too slow: {elapsed_ms:.1f}ms > {threshold_ms:.0f}ms", query, response,
test_case.get("expect"), is_noncritical, elapsed_ms, threshold_ms)
if self.validate_expectation(test_case, response, results):
if is_perf_test:
heap_mb = heap_bytes / (1024 * 1024) if heap_bytes else None
self._record_success(name, is_noncritical, elapsed_ms, threshold_ms, perf_rows, heap_mb, cpu_pct)
# Store result for baseline update (time and row count)
self.perf_results[name] = {"time_ms": elapsed_ms, "rows": perf_rows}
else:
self._record_success(name, is_noncritical)
return True
else:
return self._record_fail(name, "Expectation mismatch", query, response, test_case.get("expect"), is_noncritical)
def validate_expectation(self, test_case: Dict, response: requests.Response, results: Optional[List[Dict]]) -> bool:
expect = test_case.get("expect", {})
if expect.get("error"):
return response.status_code != 200 or "Error" in response.text
if "Error" in response.text or response.status_code != 200:
return False
if "affected_rows" in expect:
expected = expect["affected_rows"]
if results and results and "affected_rows" in results[0]:
return results[0]["affected_rows"] == expected
return True
if results is None:
return False
if expect.get("rows") is not None:
if len(results) != expect["rows"]:
return False
if expect.get("data"):
for i, row in enumerate(expect["data"]):
if i >= len(results):
return False
for k, v in row.items():
if isinstance(v, float) and isinstance(results[i].get(k), (int, float)):
if abs(results[i][k] - v) > 0.01:
return False
elif results[i].get(k) != v:
return False
return True
# ----------------------
# Setup & Cleanup
# ----------------------
def run_setup(self, setup_steps: List[Dict], database: str) -> bool:
self.setup_operations = []
self.current_database = database
for step in setup_steps:
self.setup_operations.append(step)
resp = self.execute_sql(database, step['sql'], syntax=self.suite_syntax)
if resp is None or resp.status_code not in [200, 500]:
return False
return True
def run_cleanup(self, cleanup_steps: List[Dict], database: str) -> None:
for step in cleanup_steps:
self.execute_sql(database, step['sql'], syntax=self.suite_syntax)
def cleanup_test_database(self, database: str) -> None:
try:
global is_connect_only_mode
if is_connect_only_mode:
resp = self.execute_sql(database, "SHOW TABLES")
if resp is not None and resp.status_code == 200:
try:
tables = resp.json().get('data', [])
for row in tables:
tbl = list(row.values())[0]
self.execute_sql(database, f"DROP TABLE IF EXISTS {self._quote_ident(tbl)}")
except:
pass
return
self.execute_sql("system", f"DROP DATABASE IF EXISTS {self._quote_ident(database)}")
# drop ensures next ensure_database will recreate
if database in self._ensured_dbs:
try:
self._ensured_dbs.remove(database)
except KeyError:
pass
except:
pass
# ----------------------
# Parallel test groups
# ----------------------
def _run_parallel_group(self, tests: List[Dict], database: str) -> None:
"""Run a list of test cases concurrently using threads."""
results = [None] * len(tests) # True/False per test
lock = threading.Lock()
def run_one(idx, tc):
ok = self.run_test_case(tc, database)
with lock:
results[idx] = ok
threads = []
for idx, tc in enumerate(tests):
t = threading.Thread(target=run_one, args=(idx, tc), daemon=True)
threads.append(t)
t.start()
for t in threads:
t.join(timeout=120)
# ----------------------
# Spec Runner
# ----------------------
def run_test_spec(self, spec_file: str) -> bool:
with open(spec_file, 'r') as f:
spec = yaml.safe_load(f)
metadata = spec.get('metadata', {})
self.suite_metadata = metadata or {}
self.suite_syntax = self._normalize_syntax(self.suite_metadata.get("syntax"))
database = self.default_database
if metadata.get('disabled'):
print(f"⏭️ Suite disabled: {metadata.get('description', spec_file)}")
return True
# Load performance baselines for this machine
if PERF_TEST_ENABLED:
self.load_perf_baselines()
if PERF_CALIBRATE:
self.perf_baselines = {} # reset rows to PERF_DEFAULT_ROWS
print("🔧 Calibration mode: resetting baselines to current times")
print(f"🎯 Running suite: {metadata.get('description', spec_file)}")
print(f"💾 Database: {database}")
# Optional suite-level DB isolation. In shared-server parallel runs we
# disable this to avoid cross-suite DROP/CREATE races.
if self.suite_db_cleanup:
self.cleanup_test_database(database)
self.ensure_database(database)
if spec.get('setup') and not self.run_setup(spec['setup'], database):
print("❌ Setup failed")
return False
# Group consecutive test cases by 'parallel' key and run groups concurrently
test_cases = spec.get('test_cases', [])
i = 0
while i < len(test_cases):
tc = test_cases[i]
group = tc.get('parallel')
if group:
# Collect all consecutive tests with the same parallel group
group_tests = [tc]
j = i + 1
while j < len(test_cases) and test_cases[j].get('parallel') == group:
group_tests.append(test_cases[j])
j += 1
print(f"⚡ Running {len(group_tests)} tests in parallel group '{group}'")
self._run_parallel_group(group_tests, database)
i = j
else:
self.run_test_case(tc, database)
i += 1
if spec.get('cleanup'):
self.run_cleanup(spec['cleanup'], database)
if self.suite_db_cleanup:
self.cleanup_test_database(database)
print("="*60)
total = self.test_count
passed = self.test_passed
failed_total = len(self.failed_tests)
failed_noncrit = self.failed_noncritical
failed_crit = self.failed_critical
print(f"📊 Results: {passed}/{total} passed | Failures: {failed_total} | Noncritical failures: {failed_noncrit}")
if self.failed_tests:
print("❌ Failed:")
for name, is_noncrit in self.failed_tests:
suffix = " (noncritical)" if is_noncrit else ""
print(f" - {name}{suffix}")
else:
print("🎉 All tests passed!")
print("="*60)
# Update performance baselines on success (or in calibration mode)
if PERF_TEST_ENABLED and self.perf_results and (failed_crit == 0 or PERF_CALIBRATE):
self.save_perf_baselines()
# Suite success is determined solely by critical tests
return failed_crit == 0
def wait_for_memcp(port=4321, timeout=30) -> bool:
for _ in range(timeout):
try:
requests.get(f"http://localhost:{port}", timeout=2)
return True
except:
time.sleep(1)
return False
def start_memcp_process(port: int) -> subprocess.Popen | None:
try:
datadir = os.environ.get("MEMCP_TEST_DATADIR", f"/tmp/memcp-sql-tests-{port}")
env = os.environ.copy()
devnull = open(os.devnull, 'w')
proc = subprocess.Popen([
"./memcp", "-data", datadir,
f"--api-port={port}", f"--mysql-port={port+1000}",
"--disable-mysql", "lib/main.scm"
], cwd=os.path.dirname(os.path.abspath(__file__)),
env=env, stdin=subprocess.PIPE, stdout=devnull, stderr=devnull, text=True)
if not wait_for_memcp(port):
return None
return proc
except Exception:
return None
def stop_memcp_process(proc: subprocess.Popen) -> None:
try:
proc.stdin.close()
proc.wait(timeout=10)
except Exception:
try:
proc.kill()
except Exception:
pass
def kill_memcp_by_port(port: int) -> None:
pattern = f"memcp.*--api-port={port}"
try:
subprocess.run(["pkill", "-f", pattern], check=False)
except Exception:
pass
def main():
if len(sys.argv) < 2:
print("Usage: python3 run_sql_tests.py <test_spec.yaml> [port] [--connect-only] [--no-db-cleanup]")
sys.exit(1)
spec_file = sys.argv[1]
port = 4321
connect_only = False
suite_db_cleanup = True
for arg in sys.argv[2:]:
if arg == "--connect-only":
connect_only = True
elif arg == "--no-db-cleanup":
suite_db_cleanup = False
elif arg.isdigit():
port = int(arg)
base_url = f"http://localhost:{port}"
global is_connect_only_mode
is_connect_only_mode = connect_only
memcp_process = None
if connect_only:
try:
requests.get(base_url, timeout=2)
except:
print(f"❌ Cannot connect to MemCP on port {port}")
sys.exit(1)
else:
try:
requests.get(base_url, timeout=2)
except:
memcp_process = start_memcp_process(port)
if not memcp_process:
print("❌ Failed to start MemCP")
sys.exit(1)
runner = SQLTestRunner(base_url, suite_db_cleanup=suite_db_cleanup)
if not connect_only:
def restart_handler() -> bool:
nonlocal memcp_process
if memcp_process:
stop_memcp_process(memcp_process)
memcp_process = None
memcp_process = start_memcp_process(port)
return memcp_process is not None
runner.set_restart_handler(restart_handler)
success = runner.run_test_spec(spec_file)
if not connect_only and memcp_process:
stop_memcp_process(memcp_process)
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()