-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinit_db.sql
More file actions
179 lines (161 loc) · 8.66 KB
/
init_db.sql
File metadata and controls
179 lines (161 loc) · 8.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
-- init_db.sql
-- Postgres schema for ML Monitoring.
-- Executed once by the postgres container on first start
-- (mounted at /docker-entrypoint-initdb.d/).
--
-- Tables
-- ──────
-- prediction_logs : Every prediction the API serves
-- drift_reports : Summary row per drift check run
-- drift_feature_results: Per-feature drift metrics
-- quality_reports : Summary row per quality check run
-- quality_checks : Per-check results per quality run
-- model_registry : Mirrors artifacts/model_registry.json in SQL
-- alert_log : Append-only record of every alert fired
-- ── Extensions ────────────────────────────────────────────────────────────────
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ── Schemas ───────────────────────────────────────────────────────────────────
CREATE SCHEMA IF NOT EXISTS ml;
SET search_path TO ml, public;
-- ── prediction_logs ───────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS prediction_logs (
id BIGSERIAL PRIMARY KEY,
request_id TEXT NOT NULL,
ts TIMESTAMPTZ NOT NULL DEFAULT NOW(),
model_version TEXT NOT NULL,
predicted_class TEXT NOT NULL,
confidence DOUBLE PRECISION NOT NULL,
-- raw feature dict stored as JSONB for ad-hoc queries
features JSONB,
-- optional ground-truth label joined later for accuracy tracking
label TEXT,
latency_ms DOUBLE PRECISION,
CONSTRAINT uq_request_id UNIQUE (request_id)
);
CREATE INDEX IF NOT EXISTS idx_pred_logs_ts
ON prediction_logs (ts DESC);
CREATE INDEX IF NOT EXISTS idx_pred_logs_model
ON prediction_logs (model_version, ts DESC);
CREATE INDEX IF NOT EXISTS idx_pred_logs_class
ON prediction_logs (predicted_class, ts DESC);
-- ── drift_reports ─────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS drift_reports (
id BIGSERIAL PRIMARY KEY,
report_id TEXT NOT NULL UNIQUE,
generated_at TIMESTAMPTZ NOT NULL,
model_version TEXT NOT NULL,
reference_size INT NOT NULL,
current_size INT NOT NULL,
window_start TIMESTAMPTZ,
window_end TIMESTAMPTZ,
overall_drifted BOOLEAN NOT NULL,
drifted_features TEXT[],
drifted_count INT NOT NULL DEFAULT 0,
critical_count INT NOT NULL DEFAULT 0,
warning_count INT NOT NULL DEFAULT 0,
drift_rate_pct DOUBLE PRECISION,
prediction_drifted BOOLEAN NOT NULL DEFAULT FALSE,
prediction_psi DOUBLE PRECISION,
summary JSONB
);
CREATE INDEX IF NOT EXISTS idx_drift_reports_ts
ON drift_reports (generated_at DESC);
CREATE INDEX IF NOT EXISTS idx_drift_reports_model
ON drift_reports (model_version, generated_at DESC);
-- ── drift_feature_results ─────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS drift_feature_results (
id BIGSERIAL PRIMARY KEY,
report_id TEXT NOT NULL REFERENCES drift_reports(report_id) ON DELETE CASCADE,
feature TEXT NOT NULL,
dtype TEXT NOT NULL, -- numerical | categorical
method TEXT NOT NULL, -- ks | chi2
statistic DOUBLE PRECISION,
p_value DOUBLE PRECISION,
psi DOUBLE PRECISION,
drifted BOOLEAN NOT NULL,
severity TEXT NOT NULL, -- none | warning | critical
ref_mean DOUBLE PRECISION,
cur_mean DOUBLE PRECISION,
ref_std DOUBLE PRECISION,
cur_std DOUBLE PRECISION
);
CREATE INDEX IF NOT EXISTS idx_drift_feat_report
ON drift_feature_results (report_id);
CREATE INDEX IF NOT EXISTS idx_drift_feat_feature
ON drift_feature_results (feature, drifted);
-- ── quality_reports ───────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS quality_reports (
id BIGSERIAL PRIMARY KEY,
report_id TEXT NOT NULL UNIQUE,
generated_at TIMESTAMPTZ NOT NULL,
model_version TEXT NOT NULL,
window_size INT NOT NULL,
window_start TIMESTAMPTZ,
window_end TIMESTAMPTZ,
overall_passed BOOLEAN NOT NULL,
hard_failures TEXT[],
soft_warnings TEXT[],
hard_fail_count INT NOT NULL DEFAULT 0,
soft_warn_count INT NOT NULL DEFAULT 0,
summary JSONB
);
CREATE INDEX IF NOT EXISTS idx_quality_reports_ts
ON quality_reports (generated_at DESC);
-- ── quality_checks ────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS quality_checks (
id BIGSERIAL PRIMARY KEY,
report_id TEXT NOT NULL REFERENCES quality_reports(report_id) ON DELETE CASCADE,
check_name TEXT NOT NULL,
severity TEXT NOT NULL, -- hard | soft
passed BOOLEAN NOT NULL,
message TEXT,
details JSONB
);
CREATE INDEX IF NOT EXISTS idx_quality_checks_report
ON quality_checks (report_id, passed);
-- ── model_registry ────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS model_registry (
id BIGSERIAL PRIMARY KEY,
version_tag TEXT NOT NULL UNIQUE,
registered_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
status TEXT NOT NULL DEFAULT 'candidate',
-- status: candidate | staging | production | retired | failed
model_path TEXT,
pipeline_path TEXT,
eval_report_path TEXT,
val_accuracy DOUBLE PRECISION,
f1 DOUBLE PRECISION,
roc_auc DOUBLE PRECISION,
promoted_at TIMESTAMPTZ,
promoted_by TEXT, -- 'auto' | 'manual:<user>'
retired_at TIMESTAMPTZ,
notes TEXT,
metrics JSONB
);
CREATE INDEX IF NOT EXISTS idx_model_registry_status
ON model_registry (status, registered_at DESC);
-- ── alert_log ─────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS alert_log (
id BIGSERIAL PRIMARY KEY,
ts TIMESTAMPTZ NOT NULL DEFAULT NOW(),
title TEXT NOT NULL,
message TEXT,
severity TEXT NOT NULL, -- info | warning | critical
channel TEXT NOT NULL,
sent BOOLEAN NOT NULL DEFAULT TRUE,
labels JSONB,
environment TEXT
);
CREATE INDEX IF NOT EXISTS idx_alert_log_ts
ON alert_log (ts DESC);
CREATE INDEX IF NOT EXISTS idx_alert_log_severity
ON alert_log (severity, ts DESC);
-- ── Airflow metadata DB (separate DB; created by airflow db upgrade) ──────────
-- Nothing to create here — Airflow manages its own schema.
-- The POSTGRES_AIRFLOW_DB is set in .env and created by the postgres image
-- via POSTGRES_MULTIPLE_DATABASES if you add that extension, or manually:
-- (handled by docker-compose airflow-init command)
-- ── Seed: initial model_registry placeholder ──────────────────────────────────
-- Uncomment to pre-populate after your first training run.
-- INSERT INTO ml.model_registry (version_tag, status, notes)
-- VALUES ('baseline', 'retired', 'Initial seed — replace with real training run');