Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,16 @@ jobs:

- name: Check SQLite Alembic Migrations
run: |
uv run alembic upgrade heads
uv run alembic check
uv run alembic downgrade base
uv run alembic upgrade heads
uv run alembic check
uv run alembic -n data_db upgrade heads
uv run alembic -n meta_db upgrade heads
uv run alembic -n data_db check
uv run alembic -n meta_db check
uv run alembic -n data_db downgrade base
uv run alembic -n meta_db downgrade base
uv run alembic -n data_db upgrade heads
uv run alembic -n meta_db upgrade heads
uv run alembic -n data_db check
uv run alembic -n meta_db check

build:
needs: test
Expand Down
12 changes: 6 additions & 6 deletions alembic.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# A generic, single database configuration.

[alembic]
[DEFAULT]
# path to migration scripts.
# this is typically a path given in POSIX (e.g. forward slashes)
# format, relative to the token %(here)s which refers to the location of this
Expand All @@ -19,6 +19,7 @@ script_location = %(here)s/alembic
prepend_sys_path = .



# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the tzdata library which can be installed by adding
Expand All @@ -44,7 +45,6 @@ prepend_sys_path = .
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "path_separator"
# below.
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions

# path_separator; This indicates what character is used to split lists of file
# paths, including version_locations and prepend_sys_path within configparser
Expand Down Expand Up @@ -81,11 +81,11 @@ path_separator = os
# are written from script.py.mako
# output_encoding = utf-8

# database URL. This is consumed by the user-maintained env.py script only.
# other means of configuring database URLs may be customized within the env.py
# file.
sqlalchemy.url = driver://user:pass@localhost/dbname
[data_db]
version_locations = %(here)s/alembic/versions

[meta_db]
version_locations = %(here)s/alembic/meta_versions

[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
Expand Down
76 changes: 64 additions & 12 deletions alembic/env.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import pathlib
from logging.config import fileConfig
from pathlib import Path

from sqlalchemy import MetaData
from sqlmodel import SQLModel

from alembic import context
from api.util.db import engine
from api.env import Settings
from api.models import BaseModel
from api.models import BaseModel, MetadataModel
from api.util.db import engine, meta_engine

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand All @@ -14,18 +17,67 @@
fileConfig(config.config_file_name)


def _get_table_names(base_cls: type) -> set[str]:
"""Recursively collect __tablename__ from all table-model subclasses."""
names: set[str] = set()
for cls in base_cls.__subclasses__():
tablename = getattr(cls, "__tablename__", None)
if isinstance(tablename, str) and hasattr(cls, "__table__"):
names.add(tablename)
names |= _get_table_names(cls)
return names


def _build_filtered_metadata(table_names: set[str]) -> MetaData:
"""Build a new MetaData containing only the specified tables from SQLModel.metadata.

SQLModel ignores the metadata= kwarg and registers all tables into a single
shared SQLModel.metadata. To make Alembic correctly detect additions, changes,
AND removals per-database, we construct a filtered MetaData that only contains
the tables belonging to that database. This way Alembic sees exactly which
tables should exist and can generate drops for any that are missing.
"""
filtered = MetaData()
for name, table in SQLModel.metadata.tables.items():
if name in table_names:
table.to_metadata(filtered)
return filtered


# Distinguish which tables belong to which DB by walking model subclasses.
_base_table_names = _get_table_names(BaseModel)
_meta_table_names = _get_table_names(MetadataModel)

_base_metadata = _build_filtered_metadata(_base_table_names)
_meta_metadata = _build_filtered_metadata(_meta_table_names)


def run_migrations() -> None:
pathlib.Path(Settings().db_path).parent.mkdir(parents=True, exist_ok=True)
if "".join(config.get_version_locations_list() or "").endswith("meta_versions"):
# metadata db
Path(Settings().meta_db_path).parent.mkdir(parents=True, exist_ok=True)
with meta_engine.connect() as connection:
context.configure(
connection=connection,
target_metadata=_meta_metadata,
render_as_batch=True,
)

with context.begin_transaction():
context.run_migrations()

with engine.connect() as connection:
context.configure(
connection=connection,
target_metadata=BaseModel.metadata,
render_as_batch=True,
)
else:
# data db
Path(Settings().db_path).parent.mkdir(parents=True, exist_ok=True)
with engine.connect() as connection:
context.configure(
connection=connection,
target_metadata=_base_metadata,
render_as_batch=True,
)

with context.begin_transaction():
context.run_migrations()
with context.begin_transaction():
context.run_migrations()


run_migrations()
43 changes: 43 additions & 0 deletions alembic/meta_versions/357b241a4250_add_flagged.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""add flagged

Revision ID: 357b241a4250
Revises: c28cde0a90db
Create Date: 2026-02-27 22:17:37.706745

"""

from typing import Sequence, Union

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "357b241a4250"
down_revision: Union[str, Sequence[str], None] = "c28cde0a90db"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("httpcache", schema=None) as batch_op:
batch_op.add_column(
sa.Column(
"flagged", sa.Boolean(), nullable=False, server_default=sa.false()
)
)

with op.batch_alter_table("httpcache", schema=None) as batch_op:
batch_op.alter_column("flagged", server_default=None)
# ### end Alembic commands ###


def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("httpcache", schema=None) as batch_op:
batch_op.drop_column("flagged")

# ### end Alembic commands ###
37 changes: 37 additions & 0 deletions alembic/meta_versions/c28cde0a90db_move_over_cleanup_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""move over cleanup table

Revision ID: c28cde0a90db
Revises: fd345d2b7d78
Create Date: 2026-02-27 19:08:43.207368

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
import sqlmodel


# revision identifiers, used by Alembic.
revision: str = 'c28cde0a90db'
down_revision: Union[str, Sequence[str], None] = 'fd345d2b7d78'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('lastcleanup',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('timestamp', sa.INTEGER(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
# ### end Alembic commands ###


def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('lastcleanup')
# ### end Alembic commands ###
42 changes: 42 additions & 0 deletions alembic/meta_versions/fd345d2b7d78_httpcache_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""httpcache table

Revision ID: fd345d2b7d78
Revises:
Create Date: 2026-02-27 17:45:49.644585

"""

from typing import Sequence, Union

import sqlalchemy as sa
import sqlmodel

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "fd345d2b7d78"
down_revision: Union[str, Sequence[str], None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"httpcache",
sa.Column("url", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column("status_code", sa.Integer(), nullable=False),
sa.Column("body", sa.LargeBinary(), nullable=True),
sa.Column("headers", sa.JSON(), nullable=True),
sa.Column("scraped_at", sa.INTEGER(), nullable=False),
sa.PrimaryKeyConstraint("url"),
)
# ### end Alembic commands ###


def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("httpcache")
# ### end Alembic commands ###
37 changes: 37 additions & 0 deletions alembic/versions/3b1a337a1fe5_move_over_cleanup_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""move over cleanup table

Revision ID: 3b1a337a1fe5
Revises: aa860aba0a9f
Create Date: 2026-02-27 19:18:10.668579

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
import sqlmodel


# revision identifiers, used by Alembic.
revision: str = '3b1a337a1fe5'
down_revision: Union[str, Sequence[str], None] = 'aa860aba0a9f'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('lastcleanup')
# ### end Alembic commands ###


def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('lastcleanup',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('timestamp', sa.INTEGER(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
# ### end Alembic commands ###
1 change: 1 addition & 0 deletions api/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class Settings(BaseSettings):
)

db_path: str = "data/db.sqlite"
meta_db_path: str = "data/meta_db.sqlite"
cache_expiry: int = 60 * 60 * 24 * 30 # in seconds (30 days)
sitemap_expiry: int = 86400 # in seconds
plausible_url: str | None = None
Expand Down
Loading