From 87a0813b0df0a333d4eb94683bc4f78e0a40a9e4 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 21 Jan 2026 21:01:27 +0000 Subject: [PATCH 1/9] Replace stderr intercept with error logging --- commcare_export/__init__.py | 17 ------- commcare_export/cli.py | 95 ++++++++++++++++--------------------- 2 files changed, 40 insertions(+), 72 deletions(-) diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index a34779a..7ee6a0c 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -6,36 +6,19 @@ __all__ = [ '__version__', 'get_logger', - 'get_error_logger', 'logger_name_from_filepath', - 'Logger', 'repo_root', ] repo_root = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)) -class Logger: - def __init__(self, logger, level): - self.logger = logger - self.level = level - self.linebuf = '' - - def write(self, buf): - for line in buf.rstrip().splitlines(): - self.logger.log(self.level, line.rstrip()) - - def logger_name_from_filepath(filepath): relative_path = os.path.relpath(filepath, start=repo_root) cleaned_path = relative_path.replace('/', '.') return re.sub(r'\.py$', '', cleaned_path) -def get_error_logger(): - return Logger(logging.getLogger(), logging.ERROR) - - def get_logger(filepath=None): if filepath: logger = logging.getLogger( diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 0a2421d..a31610e 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -28,7 +28,7 @@ from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.utils import get_checkpoint_manager from commcare_export.version import __version__ -from commcare_export import get_logger, get_error_logger +from commcare_export import get_logger EXIT_STATUS_SUCCESS = 0 EXIT_STATUS_ERROR = 1 @@ -190,7 +190,7 @@ def set_up_logging(log_dir=None): :param log_dir: Directory where the log file will be written. If None, uses the current working directory. - :returns tuple: (success, log_file_path, error_msg) + :returns tuple: (success, log_file_path, error_msg, file_handler) """ if log_dir is None: log_dir = os.getcwd() @@ -203,15 +203,10 @@ def set_up_logging(log_dir=None): with open(log_file, 'a'): # Test write permissions pass - logging.basicConfig( - filename=log_file, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', - filemode='a', - ) - sys.stderr = get_error_logger() - return True, log_file, None + file_handler = logging.FileHandler(log_file, mode='a') + return True, log_file, None, file_handler except (OSError, IOError, PermissionError) as err: - return False, log_file, f"{type(err).__name__}: {err}" + return False, log_file, f"{type(err).__name__}: {err}", None def main(argv): @@ -229,24 +224,22 @@ def main(argv): if errors: raise Exception(f"Could not proceed. Following issues were found: {', '.join(errors)}.") + handlers = [logging.StreamHandler()] if not args.no_logfile: - success, log_file, error = set_up_logging(args.log_dir) + success, log_file, error, file_handler = set_up_logging(args.log_dir) if success: + handlers.append(file_handler) print(f'Writing logs to {log_file}') else: print(f'Warning: Unable to write to log file {log_file}: {error}') print('Logging to console only.') - if args.verbose: - logging.basicConfig( - level=logging.DEBUG, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s' - ) - else: - logging.basicConfig( - level=logging.WARN, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s' - ) + log_level = logging.DEBUG if args.verbose else logging.WARN + logging.basicConfig( + level=log_level, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + handlers=handlers + ) logging.getLogger('alembic').setLevel(logging.WARN) logging.getLogger('backoff').setLevel(logging.FATAL) @@ -258,13 +251,7 @@ def main(argv): if not args.project: error_msg = "commcare-export: error: argument --project is required" - # output to log file through sys.stderr - print( - error_msg, - file=sys.stderr - ) - # Output to console for debugging - print(error_msg) + logger.error(error_msg) sys.exit(1) print("Running export...") @@ -364,11 +351,9 @@ def _get_writer(output_format, output, strict_types): elif output_format == 'csv': if not output.endswith(".zip"): print( - "WARNING: csv output is a zip file, but " - f"will be written to {output}" - ) - print( - "Consider appending .zip to the file name to avoid confusion." + 'WARNING: CSV output is a zip file, but will be written to ' + f"'{output}'.\n" + "Consider appending '.zip' to the file name to avoid confusion." ) return writers.CsvTableWriter(output) elif output_format == 'json': @@ -383,8 +368,9 @@ def _get_writer(output_format, output, strict_types): charset_split = output.split('charset=') if len(charset_split) > 1 and charset_split[1] != 'utf8mb4': raise Exception( - f"The charset '{charset_split[1]}' might cause problems with the export. " - f"It is recommended that you use 'utf8mb4' instead." + f"The charset '{charset_split[1]}' might cause problems " + "with the export. It is recommended that you use " + "'utf8mb4' instead." ) return writers.SqlTableWriter(output, strict_types) @@ -414,8 +400,7 @@ def _get_checkpoint_manager(args): args.query ): logger.warning( - "Checkpointing disabled for non builtin, " - "non file-based query" + "Checkpointing disabled for non builtin, non file-based query" ) elif args.since or args.until: logger.warning( @@ -442,29 +427,30 @@ def evaluate_query(env, query): lazy_result = query.eval(env) force_lazy_result(lazy_result) return 0 - except requests.exceptions.RequestException as e: - if e.response and e.response.status_code == 401: - print( - "\nAuthentication failed. Please check your credentials.", - file=sys.stderr + except requests.exceptions.RequestException as err: + if err.response and err.response.status_code == 401: + logger.error( + "Authentication failed. Please check your credentials." ) return EXIT_STATUS_ERROR else: raise - except ResourceRepeatException as e: - print('Stopping because the export is stuck') - print(e.message) - print('Try increasing --batch-size to overcome the error') + except ResourceRepeatException as err: + logger.error( + 'Stopping because the export is stuck.\n' + f'{err.message}\n' + f'Try increasing --batch-size to overcome the error' + ) return EXIT_STATUS_ERROR except ( sqlalchemy.exc.DataError, sqlalchemy.exc.InternalError, sqlalchemy.exc.ProgrammingError - ) as e: - print('Stopping because of database error:\n', e) + ) as err: + logger.error(f'Stopping because of database error:\n{err}') return EXIT_STATUS_ERROR except KeyboardInterrupt: - print('\nExport aborted', file=sys.stderr) + logger.error("Export aborted") return EXIT_STATUS_ERROR @@ -473,10 +459,9 @@ def main_with_args(args): writer = _get_writer(args.output_format, args.output, args.strict_types) if args.query is None and args.users is False and args.locations is False: - print( - 'At least one the following arguments is required: ' - '--query, --users, --locations', - file=sys.stderr + logger.error( + "At least one the following arguments is required: " + "--query, --users, --locations" ) return EXIT_STATUS_ERROR @@ -500,8 +485,8 @@ def main_with_args(args): lp = LocationInfoProvider(api_client, page_size=args.batch_size) try: query = get_queries(args, writer, lp, column_enforcer) - except DataExportException as e: - print(e.message, file=sys.stderr) + except DataExportException as err: + logger.error(err.message) return EXIT_STATUS_ERROR if args.dump_query: From 417a7339b301fff0c308e7b541fdb20b8a1318b1 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 21 Jan 2026 21:11:47 +0000 Subject: [PATCH 2/9] Use simple formatting for stderr --- commcare_export/cli.py | 47 +++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index a31610e..0622d7f 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -184,7 +184,7 @@ def add_to_parser(self, parser, **additional_kwargs): ] -def set_up_logging(log_dir=None): +def set_up_file_logging(log_dir=None): """ Set up file-based logging. @@ -209,6 +209,34 @@ def set_up_logging(log_dir=None): return False, log_file, f"{type(err).__name__}: {err}", None +def set_up_logging(args): + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(logging.Formatter('%(message)s')) + handlers = [stream_handler] + if not args.no_logfile: + success, log_file, error, file_handler = set_up_file_logging( + args.log_dir + ) + if success: + file_handler.setFormatter( + logging.Formatter( + '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + ) + ) + handlers.append(file_handler) + print(f'Writing logs to {log_file}') + else: + print(f'Warning: Unable to write to log file {log_file}: {error}') + print('Logging to console only.') + + log_level = logging.DEBUG if args.verbose else logging.WARN + root_logger = logging.getLogger() + root_logger.handlers.clear() + root_logger.setLevel(log_level) + for handler in handlers: + root_logger.addHandler(handler) + + def main(argv): parser = argparse.ArgumentParser( 'commcare-export', 'Output a customized export of CommCareHQ data.' @@ -224,22 +252,7 @@ def main(argv): if errors: raise Exception(f"Could not proceed. Following issues were found: {', '.join(errors)}.") - handlers = [logging.StreamHandler()] - if not args.no_logfile: - success, log_file, error, file_handler = set_up_logging(args.log_dir) - if success: - handlers.append(file_handler) - print(f'Writing logs to {log_file}') - else: - print(f'Warning: Unable to write to log file {log_file}: {error}') - print('Logging to console only.') - - log_level = logging.DEBUG if args.verbose else logging.WARN - logging.basicConfig( - level=log_level, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', - handlers=handlers - ) + set_up_logging(args) logging.getLogger('alembic').setLevel(logging.WARN) logging.getLogger('backoff').setLevel(logging.FATAL) From 1ec25bbde29b1961e4645676d3239a0cd2f113e1 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 21 Jan 2026 21:23:00 +0000 Subject: [PATCH 3/9] Tests --- pyproject.toml | 1 + tests/test_cli.py | 102 +++++++++++++++++++++++++++++++++------------- 2 files changed, 75 insertions(+), 28 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 15ddad4..e7811a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ [project.optional-dependencies] test = [ "pytest", + "pytest-unmagic", "psycopg2-binary", "mock", ] diff --git a/tests/test_cli.py b/tests/test_cli.py index 8931a23..89ddaef 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,5 @@ import csv +import logging import os import re import unittest @@ -11,6 +12,7 @@ from tests.utils import SqlWriterWithTearDown import pytest +from unmagic import fixture from commcare_export.checkpoint import ( Checkpoint, CheckpointManager, @@ -48,6 +50,18 @@ def make_args(project='test', username='test', password='test', **kwargs): return namespace +@fixture +def restore_root_logger(): + root_logger = logging.getLogger() + previous_handlers = list(root_logger.handlers) + previous_level = root_logger.level + yield + root_logger.handlers.clear() + root_logger.setLevel(previous_level) + for handler in previous_handlers: + root_logger.addHandler(handler) + + def mock_hq_client(include_parent): return MockCommCareHqClient({ 'form': [({ @@ -315,92 +329,108 @@ def test_cli_table_plus_locations(self, mock_client): + get_expected_locations_results(True) ) - @mock.patch('logging.basicConfig') @mock.patch('os.getcwd', return_value='/mock/cwd') @mock.patch('os.makedirs') @mock.patch('builtins.open', new_callable=mock.mock_open) + @mock.patch('logging.FileHandler') def test_log_dir_default( self, + mock_file_handler, mock_open, mock_makedirs, mock_getcwd, - mock_basicConfig, ): - from commcare_export.cli import set_up_logging + from commcare_export.cli import set_up_file_logging - success, log_file, error = set_up_logging() + handler = mock.Mock() + mock_file_handler.return_value = handler + success, log_file, error, file_handler = set_up_file_logging() assert success is True assert log_file == '/mock/cwd/commcare_export.log' assert error is None + assert file_handler is handler mock_makedirs.assert_called_once_with('/mock/cwd', exist_ok=True) - mock_basicConfig.assert_called_once() - call_kwargs = mock_basicConfig.call_args[1] - assert call_kwargs['filename'] == '/mock/cwd/commcare_export.log' - assert call_kwargs['filemode'] == 'a' + mock_file_handler.assert_called_once_with( + '/mock/cwd/commcare_export.log', + mode='a' + ) - @mock.patch('logging.basicConfig') @mock.patch('os.makedirs') @mock.patch('builtins.open', new_callable=mock.mock_open) + @mock.patch('logging.FileHandler') def test_log_dir_custom( self, + mock_file_handler, mock_open, mock_makedirs, - mock_basicConfig, ): - from commcare_export.cli import set_up_logging + from commcare_export.cli import set_up_file_logging - success, log_file, error = set_up_logging('/custom/log/path') + handler = mock.Mock() + mock_file_handler.return_value = handler + success, log_file, error, file_handler = set_up_file_logging( + '/custom/log/path' + ) assert success is True assert log_file == '/custom/log/path/commcare_export.log' assert error is None + assert file_handler is handler mock_makedirs.assert_called_once_with('/custom/log/path', exist_ok=True) - mock_basicConfig.assert_called_once() - call_kwargs = mock_basicConfig.call_args[1] - assert call_kwargs['filename'] == '/custom/log/path/commcare_export.log' + mock_file_handler.assert_called_once_with( + '/custom/log/path/commcare_export.log', + mode='a' + ) @mock.patch('os.makedirs', side_effect=PermissionError("Permission denied")) def test_log_dir_permission_error( self, mock_makedirs, ): - from commcare_export.cli import set_up_logging + from commcare_export.cli import set_up_file_logging - success, log_file, error = set_up_logging('/restricted/path') + success, log_file, error, file_handler = set_up_file_logging( + '/restricted/path' + ) assert success is False assert log_file == '/restricted/path/commcare_export.log' assert error == "PermissionError: Permission denied" + assert file_handler is None mock_makedirs.assert_called_once_with('/restricted/path', exist_ok=True) - @mock.patch('logging.basicConfig') @mock.patch('os.getcwd', return_value='/test/dir') @mock.patch('os.makedirs') @mock.patch('builtins.open', new_callable=mock.mock_open) + @mock.patch('logging.FileHandler') def test_log_append_mode( self, + mock_file_handler, mock_open, mock_makedirs, mock_getcwd, - mock_basicConfig, ): - from commcare_export.cli import set_up_logging + from commcare_export.cli import set_up_file_logging - success, log_file, error = set_up_logging() + handler = mock.Mock() + mock_file_handler.return_value = handler + success, log_file, error, file_handler = set_up_file_logging() assert success is True - mock_basicConfig.assert_called_once() - call_kwargs = mock_basicConfig.call_args[1] - assert call_kwargs['filemode'] == 'a' + assert file_handler is handler + mock_file_handler.assert_called_once_with( + '/test/dir/commcare_export.log', + mode='a' + ) @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(True)) - @mock.patch('commcare_export.cli.set_up_logging') + @mock.patch('commcare_export.cli.set_up_file_logging') @mock.patch('sys.exit') - def test_no_logfile_still_works( + def test_no_logfile( self, mock_exit, - mock_set_up_logging, + mock_set_up_file_logging, mock_client, ): from commcare_export.cli import main @@ -415,7 +445,23 @@ def test_no_logfile_still_works( '--log-dir', '/some/path' # Should be ignored ]) - mock_set_up_logging.assert_not_called() + mock_set_up_file_logging.assert_not_called() + + @restore_root_logger + def test_set_up_logging_uses_message_only_formatter(self): + from commcare_export.cli import set_up_logging + + args = Namespace( + verbose=False, + no_logfile=True, + log_dir=None, + ) + root_logger = logging.getLogger() + set_up_logging(args) + assert len(root_logger.handlers) == 1 + formatter = root_logger.handlers[0].formatter + assert formatter is not None + assert formatter._style._fmt == '%(message)s' @pytest.fixture(scope='function') From 9054caa53795681f0840b16813006bccfa3b5bbb Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 21 Jan 2026 21:53:22 +0000 Subject: [PATCH 4/9] Replace get_logger with standard Python --- commcare_export/__init__.py | 22 ---------------------- commcare_export/checkpoint.py | 5 +++-- commcare_export/cli.py | 5 ++--- commcare_export/commcare_hq_client.py | 3 +-- commcare_export/commcare_minilinq.py | 4 ++-- commcare_export/location_info_provider.py | 5 +++-- commcare_export/minilinq.py | 4 ++-- commcare_export/writers.py | 4 ++-- 8 files changed, 15 insertions(+), 37 deletions(-) diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index 7ee6a0c..53422e9 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -1,31 +1,9 @@ -import logging import os -import re from .version import __version__ __all__ = [ '__version__', - 'get_logger', - 'logger_name_from_filepath', 'repo_root', ] repo_root = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)) - - -def logger_name_from_filepath(filepath): - relative_path = os.path.relpath(filepath, start=repo_root) - cleaned_path = relative_path.replace('/', '.') - return re.sub(r'\.py$', '', cleaned_path) - - -def get_logger(filepath=None): - if filepath: - logger = logging.getLogger( - logger_name_from_filepath(filepath) - ) - else: - logger = logging.getLogger() - - logger.setLevel(logging.DEBUG) - return logger diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 62c76e9..e604ce8 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -1,4 +1,5 @@ import datetime +import logging import os import uuid from contextlib import contextmanager @@ -12,9 +13,9 @@ from commcare_export.commcare_minilinq import PaginationMode from commcare_export.exceptions import DataExportException from commcare_export.writers import SqlMixin -from commcare_export import get_logger, repo_root +from commcare_export import repo_root -logger = get_logger(__file__) +logger = logging.getLogger(__name__) Base = declarative_base() diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 0622d7f..5591d37 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -4,7 +4,6 @@ import json import os import sys -import logging import dateutil.parser import requests import sqlalchemy @@ -28,11 +27,11 @@ from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.utils import get_checkpoint_manager from commcare_export.version import __version__ -from commcare_export import get_logger +import logging EXIT_STATUS_SUCCESS = 0 EXIT_STATUS_ERROR = 1 -logger = get_logger(__file__) +logger = logging.getLogger(__name__) commcare_hq_aliases = { 'local': 'http://localhost:8000', diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 685f8ae..318c112 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -11,7 +11,6 @@ import commcare_export from commcare_export.repeatable_iterator import RepeatableIterator -from commcare_export import get_logger AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' @@ -20,7 +19,7 @@ LATEST_KNOWN_VERSION = '0.5' RESOURCE_REPEAT_LIMIT = 10 -logger = get_logger(__file__) +logger = logging.getLogger(__name__) def on_wait(details): diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 72d4954..30fee2d 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -5,6 +5,7 @@ API directly. """ import json +import logging from enum import Enum from typing import Any from urllib.parse import parse_qs, urlparse @@ -14,9 +15,8 @@ from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap -from commcare_export import get_logger -logger = get_logger(__file__) +logger = logging.getLogger(__name__) SUPPORTED_RESOURCES = { 'form', diff --git a/commcare_export/location_info_provider.py b/commcare_export/location_info_provider.py index e18eb93..495f6f8 100644 --- a/commcare_export/location_info_provider.py +++ b/commcare_export/location_info_provider.py @@ -1,9 +1,10 @@ +import logging + from commcare_export.commcare_minilinq import SimplePaginator from commcare_export.misc import unwrap_val -from commcare_export import get_logger -logger = get_logger(__file__) +logger = logging.getLogger(__name__) # LocationInfoProvider uses the /location_type/ endpoint of the API to # retrieve location type data, stores that information in a dictionary diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 794a0c0..9c16566 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Dict from typing import List as ListType from typing import Optional @@ -6,9 +7,8 @@ from commcare_export.misc import unwrap, unwrap_val from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.specs import TableSpec -from commcare_export import get_logger -logger = get_logger(__file__) +logger = logging.getLogger(__name__) class MiniLinq: diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 4426292..e143bbc 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -1,5 +1,6 @@ import csv import datetime +import logging from tempfile import NamedTemporaryFile import zipfile from itertools import zip_longest @@ -12,9 +13,8 @@ from alembic.operations import Operations from commcare_export.data_types import UnknownDataType, get_sqlalchemy_type from commcare_export.specs import TableSpec -from commcare_export import get_logger -logger = get_logger(__file__) +logger = logging.getLogger(__name__) MAX_COLUMN_SIZE = 2000 From c3a05f20108f26d67de126a91c66bc1e471d2a19 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 22 Jan 2026 22:55:12 +0000 Subject: [PATCH 5/9] Drop repo_root Unused outside of tests --- commcare_export/__init__.py | 4 ---- tests/test_commcare_export.py | 33 --------------------------------- 2 files changed, 37 deletions(-) delete mode 100644 tests/test_commcare_export.py diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index 53422e9..6c36bb8 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -1,9 +1,5 @@ -import os from .version import __version__ __all__ = [ '__version__', - 'repo_root', ] - -repo_root = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)) diff --git a/tests/test_commcare_export.py b/tests/test_commcare_export.py deleted file mode 100644 index cd367c8..0000000 --- a/tests/test_commcare_export.py +++ /dev/null @@ -1,33 +0,0 @@ -import os -from commcare_export import logger_name_from_filepath, repo_root - - -class TestLoggerNameFromFilePath: - - @staticmethod - def _file_path(rel_path): - return os.path.join(repo_root, rel_path) - - def test_file_in_root(self): - path = self._file_path("file.py") - assert logger_name_from_filepath(path) == 'file' - - def test_file_in_subdirectory(self): - path = self._file_path("subdir/file.py") - assert logger_name_from_filepath(path) == 'subdir.file' - - def test_file_in_deeper_subdirectory(self): - path = self._file_path("subdir/another_sub/file.py") - assert logger_name_from_filepath(path) == 'subdir.another_sub.file' - - def test_file_contains_py(self): - path = self._file_path("subdir/pytest.py") - assert logger_name_from_filepath(path) == 'subdir.pytest' - - def test_file_dir_contains_periods(self): - path = self._file_path("sub.dir/pytest.py") - assert logger_name_from_filepath(path) == 'sub.dir.pytest' - - def test_random_file_name(self): - path = self._file_path("pyppy.excel_query.py") - assert logger_name_from_filepath(path) == 'pyppy.excel_query' From 4d00d3342afa52f284d29d17120330611163d7a9 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 22 Jan 2026 23:13:34 +0000 Subject: [PATCH 6/9] Fix mypy runner --- .github/workflows/test.yml | 5 ++--- pyproject.toml | 10 ++++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8520051..b10e3f4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -110,13 +110,12 @@ jobs: - name: Build distribution run: uv build - - name: Install package with mypy + - name: Install package with test dependencies run: | uv pip install --system dist/*.whl - uv pip install --system mypy uv pip install --system -e ".[test]" - - run: mypy --install-types --non-interactive commcare_export/ tests/ migrations/ + - run: mypy commcare_export/ tests/ finish: needs: test diff --git a/pyproject.toml b/pyproject.toml index e7811a1..06ac67d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,10 +49,16 @@ dependencies = [ [project.optional-dependencies] test = [ + "mock", + "mypy", + "psycopg2-binary", "pytest", "pytest-unmagic", - "psycopg2-binary", - "mock", + "types-mock", + "types-python-dateutil", + "types-pytz", + "types-requests", + "types-simplejson", ] base_sql = [ "SQLAlchemy", From 138be6bf944762aef4acf660e6b36974771ab835 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 22 Jan 2026 09:40:15 +0000 Subject: [PATCH 7/9] Move migrations into commcare-export package Moves migrations into the package and updates CheckpointManager to resolve the migration path via importlib.resources, plus packaging updates so the migrations ship in wheels/sdists. - commcare_export/checkpoint.py now uses `resources.files("commcare_export") / "migrations"` and `resources.as_file(...)` when configuring Alembic so packaged installs can find migration files. - commcare_export/migrations/ is now the migrations home, and its README references the new path. - Packaging config includes the new subpackages and migration files so PyPI installs contain them. --- MANIFEST.in | 2 +- commcare_export/checkpoint.py | 18 +++++++++--------- commcare_export/migrations/README.md | 16 ++++++++++++++++ .../migrations}/__init__.py | 0 .../migrations}/alembic.ini | 0 .../migrations}/env.py | 0 .../migrations}/script.py.mako | 0 ...7e2bf6_rename_time_of_run_to_since_param.py | 0 .../3b37b3b06104_added_cursor_to_checkpoint.py | 0 .../versions/53f1aad98e33_add_args_columns.py | 0 ...161ab6_add_pagination_mode_to_checkpoint.py | 0 .../9945abb4ec70_add_back_time_of_run.py | 0 .../migrations}/versions/__init__.py | 0 .../a56c82a8d02e_add_detail_to_checkpoint.py | 0 ...c36489c5a628_create_commcare_export_runs.py | 0 .../versions/d3ce9dc9907a_add_final_column.py | 0 .../versions/d82e1d06a82c_add_key_column.py | 0 .../f4fd4c80f40a_add_table_name_column.py | 0 migrations/README.md | 16 ---------------- pyproject.toml | 14 ++++++++++++-- 20 files changed, 38 insertions(+), 28 deletions(-) create mode 100644 commcare_export/migrations/README.md rename {migrations => commcare_export/migrations}/__init__.py (100%) rename {migrations => commcare_export/migrations}/alembic.ini (100%) rename {migrations => commcare_export/migrations}/env.py (100%) rename {migrations => commcare_export/migrations}/script.py.mako (100%) rename {migrations => commcare_export/migrations}/versions/29c27e7e2bf6_rename_time_of_run_to_since_param.py (100%) rename {migrations => commcare_export/migrations}/versions/3b37b3b06104_added_cursor_to_checkpoint.py (100%) rename {migrations => commcare_export/migrations}/versions/53f1aad98e33_add_args_columns.py (100%) rename {migrations => commcare_export/migrations}/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py (100%) rename {migrations => commcare_export/migrations}/versions/9945abb4ec70_add_back_time_of_run.py (100%) rename {migrations => commcare_export/migrations}/versions/__init__.py (100%) rename {migrations => commcare_export/migrations}/versions/a56c82a8d02e_add_detail_to_checkpoint.py (100%) rename {migrations => commcare_export/migrations}/versions/c36489c5a628_create_commcare_export_runs.py (100%) rename {migrations => commcare_export/migrations}/versions/d3ce9dc9907a_add_final_column.py (100%) rename {migrations => commcare_export/migrations}/versions/d82e1d06a82c_add_key_column.py (100%) rename {migrations => commcare_export/migrations}/versions/f4fd4c80f40a_add_table_name_column.py (100%) delete mode 100644 migrations/README.md diff --git a/MANIFEST.in b/MANIFEST.in index 073a699..20bf2e5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -recursive-include migrations *.py *.ini +recursive-include commcare_export/migrations *.py *.ini *.mako include commcare_export/VERSION diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index e604ce8..540243a 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -2,6 +2,7 @@ import logging import os import uuid +from importlib import resources from contextlib import contextmanager from operator import attrgetter @@ -13,7 +14,6 @@ from commcare_export.commcare_minilinq import PaginationMode from commcare_export.exceptions import DataExportException from commcare_export.writers import SqlMixin -from commcare_export import repo_root logger = logging.getLogger(__name__) Base = declarative_base() @@ -86,7 +86,7 @@ def session_scope(Session): class CheckpointManager(SqlMixin): table_name = 'commcare_export_runs' - migrations_repository = os.path.join(repo_root, 'migrations') + migrations_repository = resources.files("commcare_export") / "migrations" def __init__( self, @@ -201,13 +201,13 @@ def _set_checkpoint( def create_checkpoint_table(self, revision='head'): from alembic import command, config - cfg = config.Config( - os.path.join(self.migrations_repository, 'alembic.ini') - ) - cfg.set_main_option('script_location', self.migrations_repository) - with self.engine.begin() as connection: - cfg.attributes['connection'] = connection - command.upgrade(cfg, revision) + + with resources.as_file(self.migrations_repository) as migrations_path: + cfg = config.Config(os.path.join(migrations_path, 'alembic.ini')) + cfg.set_main_option('script_location', str(migrations_path)) + with self.engine.begin() as connection: + cfg.attributes['connection'] = connection + command.upgrade(cfg, revision) def _cleanup(self): self._validate_tables() diff --git a/commcare_export/migrations/README.md b/commcare_export/migrations/README.md new file mode 100644 index 0000000..2654fea --- /dev/null +++ b/commcare_export/migrations/README.md @@ -0,0 +1,16 @@ +## Migrations + +Migrations use [alembic](http://alembic.zzzcomputing.com/en/latest). + +**Create new migration** + +``` +$ alembic -c commcare_export/migrations/alembic.ini revision -m "description" +``` + + +**Run migrations from command line** + +``` +$ alembic -c commcare_export/migrations/alembic.ini -x "url=" upgrade +``` diff --git a/migrations/__init__.py b/commcare_export/migrations/__init__.py similarity index 100% rename from migrations/__init__.py rename to commcare_export/migrations/__init__.py diff --git a/migrations/alembic.ini b/commcare_export/migrations/alembic.ini similarity index 100% rename from migrations/alembic.ini rename to commcare_export/migrations/alembic.ini diff --git a/migrations/env.py b/commcare_export/migrations/env.py similarity index 100% rename from migrations/env.py rename to commcare_export/migrations/env.py diff --git a/migrations/script.py.mako b/commcare_export/migrations/script.py.mako similarity index 100% rename from migrations/script.py.mako rename to commcare_export/migrations/script.py.mako diff --git a/migrations/versions/29c27e7e2bf6_rename_time_of_run_to_since_param.py b/commcare_export/migrations/versions/29c27e7e2bf6_rename_time_of_run_to_since_param.py similarity index 100% rename from migrations/versions/29c27e7e2bf6_rename_time_of_run_to_since_param.py rename to commcare_export/migrations/versions/29c27e7e2bf6_rename_time_of_run_to_since_param.py diff --git a/migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py b/commcare_export/migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py similarity index 100% rename from migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py rename to commcare_export/migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py diff --git a/migrations/versions/53f1aad98e33_add_args_columns.py b/commcare_export/migrations/versions/53f1aad98e33_add_args_columns.py similarity index 100% rename from migrations/versions/53f1aad98e33_add_args_columns.py rename to commcare_export/migrations/versions/53f1aad98e33_add_args_columns.py diff --git a/migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py b/commcare_export/migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py similarity index 100% rename from migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py rename to commcare_export/migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py diff --git a/migrations/versions/9945abb4ec70_add_back_time_of_run.py b/commcare_export/migrations/versions/9945abb4ec70_add_back_time_of_run.py similarity index 100% rename from migrations/versions/9945abb4ec70_add_back_time_of_run.py rename to commcare_export/migrations/versions/9945abb4ec70_add_back_time_of_run.py diff --git a/migrations/versions/__init__.py b/commcare_export/migrations/versions/__init__.py similarity index 100% rename from migrations/versions/__init__.py rename to commcare_export/migrations/versions/__init__.py diff --git a/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py b/commcare_export/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py similarity index 100% rename from migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py rename to commcare_export/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py diff --git a/migrations/versions/c36489c5a628_create_commcare_export_runs.py b/commcare_export/migrations/versions/c36489c5a628_create_commcare_export_runs.py similarity index 100% rename from migrations/versions/c36489c5a628_create_commcare_export_runs.py rename to commcare_export/migrations/versions/c36489c5a628_create_commcare_export_runs.py diff --git a/migrations/versions/d3ce9dc9907a_add_final_column.py b/commcare_export/migrations/versions/d3ce9dc9907a_add_final_column.py similarity index 100% rename from migrations/versions/d3ce9dc9907a_add_final_column.py rename to commcare_export/migrations/versions/d3ce9dc9907a_add_final_column.py diff --git a/migrations/versions/d82e1d06a82c_add_key_column.py b/commcare_export/migrations/versions/d82e1d06a82c_add_key_column.py similarity index 100% rename from migrations/versions/d82e1d06a82c_add_key_column.py rename to commcare_export/migrations/versions/d82e1d06a82c_add_key_column.py diff --git a/migrations/versions/f4fd4c80f40a_add_table_name_column.py b/commcare_export/migrations/versions/f4fd4c80f40a_add_table_name_column.py similarity index 100% rename from migrations/versions/f4fd4c80f40a_add_table_name_column.py rename to commcare_export/migrations/versions/f4fd4c80f40a_add_table_name_column.py diff --git a/migrations/README.md b/migrations/README.md deleted file mode 100644 index 2704817..0000000 --- a/migrations/README.md +++ /dev/null @@ -1,16 +0,0 @@ -## Migrations - -Migrations use [alembic](http://alembic.zzzcomputing.com/en/latest). - -**Create new migration** - -``` -$ alembic -c migrations/alembic.ini revision -m "description" -``` - - -**Run migrations from command line** - -``` -$ alembic -c migrations/alembic.ini -x "url=" upgrade -``` diff --git a/pyproject.toml b/pyproject.toml index 06ac67d..30e6403 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,11 +102,21 @@ commcare-export = "commcare_export.cli:entry_point" commcare-export-utils = "commcare_export.utils_cli:entry_point" [tool.setuptools] -packages = ["commcare_export"] +packages = [ + "commcare_export", + "commcare_export.migrations", + "commcare_export.migrations.versions", +] include-package-data = true [tool.setuptools.package-data] -commcare_export = ["VERSION"] +commcare_export = [ + "VERSION", + "migrations/*.ini", + "migrations/*.mako", + "migrations/*.py", + "migrations/versions/*.py", +] [tool.setuptools_scm] version_file = "commcare_export/VERSION" From e029914d139e579e30d3946a1b6e58c85921be28 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Fri, 23 Jan 2026 13:04:33 +0000 Subject: [PATCH 8/9] Fix tests for logging changes --- tests/test_cli.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 89ddaef..8d3ff64 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -427,6 +427,7 @@ def test_log_append_mode( @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(True)) @mock.patch('commcare_export.cli.set_up_file_logging') @mock.patch('sys.exit') + @restore_root_logger def test_no_logfile( self, mock_exit, @@ -798,29 +799,25 @@ def _pull_mock_data( class TestCLIWithDatabaseErrors: def test_cli_database_error( - self, strict_writer, all_db_checkpoint_manager, capfd + self, strict_writer, all_db_checkpoint_manager, caplog ): _pull_mock_data( strict_writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, 'tests/013_ConflictingTypes.xlsx' ) - out, err = capfd.readouterr() - expected_re = re.compile('Stopping because of database error') - assert re.search(expected_re, out) + assert re.search(expected_re, caplog.text) def test_cli_database_error_checkpoint( - self, strict_writer, all_db_checkpoint_manager, capfd + self, strict_writer, all_db_checkpoint_manager, caplog ): _pull_mock_data( strict_writer, all_db_checkpoint_manager, get_conflicting_types_checkpoint_client(), 'tests/013_ConflictingTypes.xlsx' ) - out, err = capfd.readouterr() - expected_re = re.compile('Stopping because of database error') - assert re.search(expected_re, out), out + assert re.search(expected_re, caplog.text), caplog.text # expect checkpoint to have the date from the first batch and # not the 2nd From 36ad4f52c5fe13e165254742658868defbb5d2b2 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 27 Jan 2026 13:31:38 +0000 Subject: [PATCH 9/9] Use `/` instead of `os.path.join()` --- commcare_export/checkpoint.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 540243a..bfffbe6 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -1,6 +1,5 @@ import datetime import logging -import os import uuid from importlib import resources from contextlib import contextmanager @@ -203,7 +202,7 @@ def create_checkpoint_table(self, revision='head'): from alembic import command, config with resources.as_file(self.migrations_repository) as migrations_path: - cfg = config.Config(os.path.join(migrations_path, 'alembic.ini')) + cfg = config.Config(str(migrations_path / 'alembic.ini')) cfg.set_main_option('script_location', str(migrations_path)) with self.engine.begin() as connection: cfg.attributes['connection'] = connection