diff --git a/README.md b/README.md index a696139..3704363 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Features: and check if they are allowed in the current context. - Can identify "unsafe" operations, e.g. operations that acquire locks that can be dangerous for production database. +- Checks if newly created tables have primary keys to ensure better performance and data integrity. ## Installation @@ -30,4 +31,4 @@ pip install "migration-lint" ## Documentation -Read the docs on [GitHub Pages](https://pandadoc.github.io/migration-lint/) \ No newline at end of file +Read the docs on [GitHub Pages](https://pandadoc.github.io/migration-lint/) diff --git a/docs/classification.md b/docs/classification.md index 5564a5a..de819bc 100644 --- a/docs/classification.md +++ b/docs/classification.md @@ -125,9 +125,17 @@ Backward-compatible migration > **WARNING**: If there are foreign keys, table creation requires > `ShareRowExclusiveLock` on the child tables, so use `lock_timeout` > if the table to create contains foreign keys. `ADD FOREIGN KEY ... NOT VALID` -> does require the same lock, so it doesnโ€™t make much sense +> does require the same lock, so it doesn't make much sense > to create foreign keys separately. +> **BEST PRACTICE**: Always include a primary key when creating tables. +> Tables without primary keys can lead to performance issues and complicate +> replication. The migration-lint primary key linter will warn about tables +> created without primary keys. Common patterns include: +> * `id SERIAL PRIMARY KEY` (auto-incrementing integer) +> * `id UUID PRIMARY KEY DEFAULT gen_random_uuid()` (UUID) +> * Composite primary keys for junction tables + ### Drop Table Backward-incompatible migration @@ -522,6 +530,10 @@ Backward-compatible migration * Drop foreign keys in other tables * `ALTER TABLE DROP CONSTRAINT IF EXISTS `. +> **NOTE**: The migration-lint primary key linter only checks for missing +> primary keys in newly created tables. It does not validate operations +> that modify or drop existing primary keys. + ### Add Check Constraints Backward-compatible migration diff --git a/docs/index.md b/docs/index.md index ca1bd2b..22a6f1a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,6 +13,8 @@ Features: and check if they are allowed in the current context. - Can identify "unsafe" operations, e.g. operations that acquire locks that can be dangerous for production database. +- Primary Key Validation: Checks if newly created tables have primary keys + to ensure better performance and data integrity. ## Installation @@ -31,7 +33,8 @@ pip install "migration-lint" so it depends on the framework you use for migrations. - **Linter** - class that checks migration's SQL and context and returns errors if any. We have implemented our linter - for backward incompatible migrations as well as integrated `squawk` linter. + for backward incompatible migrations, integrated `squawk` linter, + and added a primary key validation linter. ## Run @@ -74,6 +77,65 @@ migration-lint --loader=gitlab_branch --extractor= I uses env variable CI_MERGE_REQUEST_ID or option --mr-id. +## Configuration Options + +### Primary Key Validation + +The primary key linter can be configured using the following option: + +```shell +--primary-key-check-dry-run +``` + +- **Default**: `True` (dry-run mode enabled) +- **Environment variable**: `MIGRATION_LINTER_PRIMARY_KEY_CHECK_DRY_RUN` +- **Description**: When enabled (default), the primary key linter runs in dry-run mode, + logging warnings about tables created without primary keys but not failing the overall + linting process. When disabled, missing primary keys are treated as errors. + +**Example usage:** + +```shell +# Run with primary key validation in dry-run mode (default) +migration-lint --loader=local_git --extractor=django + +# Disable dry-run mode to treat missing primary keys as errors +migration-lint --loader=local_git --extractor=django --primary-key-check-dry-run=false +``` + +## Linters + +### Primary Key Linter + +The Primary Key Linter checks if newly created tables have primary keys defined. +This is important for database performance and data integrity. + +**What it checks:** +- Analyzes `CREATE TABLE` statements in migrations +- Detects tables created without a `PRIMARY KEY` constraint +- Provides helpful warnings or errors depending on configuration + +**Example output in dry-run mode:** +``` +๐Ÿ” Primary Key Check (dry-run mode): + โš ๏ธ Table 'user_sessions' created without a primary key: + CREATE TABLE user_sessions (session_id VARCHAR(255), user_id INTEGER, created_at TIMESTAMP) + ๐Ÿ’ก Consider adding a primary key for better performance and data integrity. +``` + +**Example output when dry-run is disabled:** +``` +Errors found in migrations: + +- Table 'user_sessions' is created without a primary key: CREATE TABLE user_sessions (session_id VARCHAR(255), user_id INTEGER, created_at TIMESTAMP) + Consider adding a primary key for better performance and data integrity. +``` + +**Best practices:** +- Every table should have a primary key for optimal database performance +- Primary keys ensure row uniqueness and enable efficient indexing +- Common primary key patterns include auto-incrementing integers or UUIDs + ## Feedback We value feedback and are committed to supporting engineers throughout diff --git a/migration_lint/analyzer/__init__.py b/migration_lint/analyzer/__init__.py index 1e382b6..a43be94 100644 --- a/migration_lint/analyzer/__init__.py +++ b/migration_lint/analyzer/__init__.py @@ -1,9 +1,11 @@ from migration_lint.analyzer.base import Analyzer from migration_lint.analyzer.compat import CompatibilityLinter +from migration_lint.analyzer.primary_key import PrimaryKeyLinter from migration_lint.analyzer.squawk import SquawkLinter __all__ = ( "Analyzer", "CompatibilityLinter", "SquawkLinter", + "PrimaryKeyLinter", ) diff --git a/migration_lint/analyzer/primary_key.py b/migration_lint/analyzer/primary_key.py new file mode 100644 index 0000000..38b4971 --- /dev/null +++ b/migration_lint/analyzer/primary_key.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +from typing import List + +from migration_lint import logger +from migration_lint.analyzer.base import BaseLinter +from migration_lint.extractor.model import ExtendedSourceDiff +from migration_lint.sql.parser import classify_migration +from migration_lint.util.colors import blue, yellow + + +class PrimaryKeyLinter(BaseLinter): + """ + Linter that checks if newly created tables have primary keys. + + This linter operates in dry-run mode by default, meaning it only logs + warnings but doesn't cause the overall linting process to fail. + """ + + def __init__(self, dry_run: bool = True): + """ + Initialize the PrimaryKeyLinter. + + Args: + dry_run: If True, only log warnings without failing the linter. + If False, treat missing primary keys as errors. + """ + self.dry_run = dry_run + + def lint( + self, + migration_sql: str, + changed_files: List[ExtendedSourceDiff], + ) -> List[str]: + """Perform primary key validation on CREATE TABLE statements.""" + + errors = [] + + try: + classification_result = classify_migration(migration_sql) + + create_table_statements = [ + (statement_sql, statement_type) + for statement_sql, statement_type in classification_result + if self._is_create_table_statement(statement_sql) + ] + + tables_without_pk = [] + for statement_sql, _ in create_table_statements: + table_name = self._extract_table_name_from_sql(statement_sql) + if not self._sql_has_primary_key(statement_sql): + tables_without_pk.append({ + 'table_name': table_name or 'unknown', + 'sql': statement_sql + }) + + if tables_without_pk: + if self.dry_run: + logger.info(yellow("\n๐Ÿ” Primary Key Check (dry-run mode):")) + for table_info in tables_without_pk: + logger.info( + yellow( + f" โš ๏ธ Table '{table_info['table_name']}' " + f"created without a primary key:" + ) + ) + logger.info(blue(f" {table_info['sql']}")) + logger.info( + yellow( + " ๐Ÿ’ก Consider adding a primary key for better " + "performance and data integrity." + ) + ) + else: + for table_info in tables_without_pk: + errors.append( + f"- Table '{table_info['table_name']}' is created " + f"without a primary key: {table_info['sql']}\n" + f" Consider adding a primary key for better " + f"performance and data integrity." + ) + else: + logger.info( + blue("โœ… Primary Key Check: All created tables have primary keys.") + ) + + except Exception as e: + logger.info( + yellow(f"Primary Key Check: Could not analyze SQL - {str(e)}") + ) + + return errors + + def _is_create_table_statement(self, statement_sql: str) -> bool: + """Check if a SQL statement is a CREATE TABLE statement.""" + normalized_sql = statement_sql.strip().upper() + return normalized_sql.startswith("CREATE TABLE") + + def _extract_table_name_from_sql(self, statement_sql: str) -> str: + """Extract table name from CREATE TABLE SQL statement.""" + try: + import re + + # Match CREATE TABLE [IF NOT EXISTS] table_name + match = re.search( + r'CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?([^\s\(]+)', + statement_sql, + re.IGNORECASE + ) + if match: + return match.group(1).strip('"`') + return "unknown" + except Exception: + return "unknown" + + def _sql_has_primary_key(self, statement_sql: str) -> bool: + """Check if CREATE TABLE SQL statement includes a primary key.""" + normalized_sql = statement_sql.upper() + + return "PRIMARY KEY" in normalized_sql diff --git a/migration_lint/main.py b/migration_lint/main.py index fd8accc..e7fec46 100644 --- a/migration_lint/main.py +++ b/migration_lint/main.py @@ -3,9 +3,14 @@ import click from migration_lint import logger -from migration_lint.analyzer import Analyzer, CompatibilityLinter, SquawkLinter -from migration_lint.extractor import Extractor, DjangoExtractor -from migration_lint.source_loader import SourceLoader, LocalLoader +from migration_lint.analyzer import ( + Analyzer, + CompatibilityLinter, + PrimaryKeyLinter, + SquawkLinter, +) +from migration_lint.extractor import DjangoExtractor, Extractor +from migration_lint.source_loader import LocalLoader, SourceLoader from migration_lint.util.env import get_bool_env @@ -87,30 +92,43 @@ "--ignore-extractor-not-found", "ignore_extractor_not_found", is_flag=True, - help="Don't fail the whole linter if extraction went fine, but info about particular migration couldn't be found", + help="Don't fail the whole linter if extraction went fine, but info " + "about particular migration couldn't be found", default=os.getenv("MIGRATION_LINTER_IGNORE_EXTRACTOR_NOT_FOUND", False), ) +@click.option( + "--primary-key-check-dry-run", + "primary_key_check_dry_run", + is_flag=True, + help="Primary key validation dry-run mode", + default=os.getenv("MIGRATION_LINTER_PRIMARY_KEY_CHECK_DRY_RUN", True), +) def main( loader_type: str, extractor_type: str, squawk_config_path: str, squawk_pg_version: str, + primary_key_check_dry_run: bool, **kwargs, ) -> None: logger.info("Start analysis..") loader = SourceLoader.get(loader_type)(**kwargs) extractor = Extractor.get(extractor_type)(**kwargs) + + linters = [ + CompatibilityLinter(), + SquawkLinter( + config_path=squawk_config_path, + pg_version=squawk_pg_version, + ), + PrimaryKeyLinter(dry_run=primary_key_check_dry_run), + ] + analyzer = Analyzer( loader=loader, extractor=extractor, - linters=[ - CompatibilityLinter(), - SquawkLinter( - config_path=squawk_config_path, - pg_version=squawk_pg_version, - ), - ], + linters=linters, ) analyzer.analyze() diff --git a/tests/test_primary_key_linter.py b/tests/test_primary_key_linter.py new file mode 100644 index 0000000..aac66c8 --- /dev/null +++ b/tests/test_primary_key_linter.py @@ -0,0 +1,103 @@ +from unittest.mock import patch + +import pytest + +from migration_lint.analyzer.primary_key import PrimaryKeyLinter + + +@pytest.mark.parametrize( + "sql, has_pk", + [ + ("CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100))", True), + ("CREATE TABLE users (id INT, name VARCHAR(100), PRIMARY KEY (id))", True), + ("CREATE TABLE users (id INT primary key)", True), # case insensitive + ("CREATE TABLE users (id INT, name VARCHAR(100))", False), + ("CREATE TABLE logs (message TEXT, created_at TIMESTAMP)", False), + ], +) +def test_sql_has_primary_key(sql: str, has_pk: bool): + linter = PrimaryKeyLinter() + assert linter._sql_has_primary_key(sql) == has_pk + + +@pytest.mark.parametrize( + "sql, is_create_table", + [ + ("CREATE TABLE users (id INT, name VARCHAR(100))", True), + ("create table products (id serial, name text)", True), + ("CREATE TABLE IF NOT EXISTS categories (id INT)", True), + ("ALTER TABLE users ADD COLUMN email VARCHAR(255)", False), + ("DROP TABLE users", False), + ("SELECT * FROM users", False), + ], +) +def test_is_create_table_statement(sql: str, is_create_table: bool): + linter = PrimaryKeyLinter() + assert linter._is_create_table_statement(sql) == is_create_table + + +@patch('migration_lint.analyzer.primary_key.classify_migration') +@patch('migration_lint.analyzer.primary_key.logger') +def test_lint_dry_run_mode_missing_pk(mock_logger, mock_classify): + """Test dry-run mode logs warnings but returns no errors.""" + mock_classify.return_value = [ + ("CREATE TABLE users (id INT, name VARCHAR(100))", "CREATE_TABLE"), + ] + + linter = PrimaryKeyLinter(dry_run=True) + errors = linter.lint("CREATE TABLE users (id INT, name VARCHAR(100))", []) + + assert errors == [] + mock_logger.info.assert_any_call( + "\x1b[33;21m\n๐Ÿ” Primary Key Check (dry-run mode):\x1b[0m" + ) + + +@patch('migration_lint.analyzer.primary_key.classify_migration') +def test_lint_strict_mode_missing_pk(mock_classify): + """Test strict mode returns errors for missing primary keys.""" + mock_classify.return_value = [ + ("CREATE TABLE users (id INT, name VARCHAR(100))", "CREATE_TABLE"), + ] + + linter = PrimaryKeyLinter(dry_run=False) + errors = linter.lint("CREATE TABLE users (id INT, name VARCHAR(100))", []) + + assert len(errors) == 1 + assert "Table 'users' is created without a primary key" in errors[0] + + +@patch('migration_lint.analyzer.primary_key.classify_migration') +@patch('migration_lint.analyzer.primary_key.logger') +def test_lint_all_tables_have_pk(mock_logger, mock_classify): + """Test when all tables have primary keys.""" + mock_classify.return_value = [ + ("CREATE TABLE users (id INT PRIMARY KEY)", "CREATE_TABLE"), + ] + + linter = PrimaryKeyLinter() + errors = linter.lint("CREATE TABLE users (id INT PRIMARY KEY)", []) + + assert errors == [] + expected_msg = ( + "\x1b[1;34mโœ… Primary Key Check: All created tables " + "have primary keys.\x1b[0m" + ) + mock_logger.info.assert_called_with(expected_msg) + + +@patch('migration_lint.analyzer.primary_key.classify_migration') +@patch('migration_lint.analyzer.primary_key.logger') +def test_lint_exception_handling(mock_logger, mock_classify): + """Test graceful exception handling.""" + mock_classify.side_effect = Exception("Classification failed") + + linter = PrimaryKeyLinter() + errors = linter.lint("CREATE TABLE users (id INT)", []) + + assert errors == [] + expected_msg = ( + "\x1b[33;21mPrimary Key Check: Could not analyze SQL - " + "Classification failed\x1b[0m" + ) + mock_logger.info.assert_called_with(expected_msg)