From e323294cb73ee6a685dc68317b369c8d6aafdf22 Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Tue, 7 Oct 2025 11:08:01 -0400 Subject: [PATCH 1/2] added install-from-source documentation, linked to main readme --- Makefile | 4 ++-- README.md | 1 + documentation/introduction.rst | 26 ++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3b6fe22e..424e787d 100644 --- a/Makefile +++ b/Makefile @@ -25,8 +25,8 @@ testcaches = .hypothesis .pytest_cache .pytype coverage.xml htmlcov .coverage all: version test build develop: devversion package test - python3 setup.py develop --uninstall - python3 setup.py develop + python3 -m pip uninstall mmif-python -y + python3 -m pip install -e . publish: distclean version package test test `git branch --show-current` = "master" diff --git a/README.md b/README.md index b3e8f98a..2a111f66 100644 --- a/README.md +++ b/README.md @@ -15,4 +15,5 @@ including ; ## For more ... * [Version history and patch notes](https://github.com/clamsproject/mmif-python/blob/main/CHANGELOG.md) * [MMIF Python API documentation](https://clamsproject.github.io/mmif-python) + * [Getting started ...](https://clams.ai/mmif-python/latest/introduction.html) (includes installation instruction) * [MMIF JSON specification and schema](https://clamsproject.github.io/mmif) diff --git a/documentation/introduction.rst b/documentation/introduction.rst index cfd9eaf7..07b9f2be 100644 --- a/documentation/introduction.rst +++ b/documentation/introduction.rst @@ -27,6 +27,32 @@ Package ``mmif-python`` is distributed via the official PyPI. Users are supposed This will install a package `mmif` to local python. + +Installing from source tree for development +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + This is not necessary for most users who just want to use the ``mmif-python`` package. This is only for developers who want to modify the source code. + +Following these instructions will install the ``mmif-python`` package in `"editable" mode `_. This means that any changes you make to the source code will be immediately available to the installed package. + +1. First, you need a general developer toolchain that includes the ``make`` command. +2. Then, run the following command in the root of the source tree: + + .. code-block:: bash + + make develop + + This will install all dependencies, run all the tests, and then install the package in editable mode. + +3. If you want to skip the testing, you can run `make package` first, and then manually install the package in editable mode: + + .. code-block:: bash + + make package + python3 -m pip install -e . + + The MMIF format and specification is evolving over time, and ``mmif-python`` package will be updated along with the changes in MMIF format. .. note:: MMIF format is not always backward-compatible. To find out more about relations between MMIF specification versions and ``mmif-python`` versions, please take time to read our decision on the subject `here `_. If you need to know which python SDK supports which specification version, see :ref:`target-versions` page. From bc42cd040180f7d26fb83901c42dc53e4e014cd3 Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Wed, 29 Oct 2025 10:50:32 -0400 Subject: [PATCH 2/2] setup.py to pyproject migration first implementation --- .gitignore | 1 + Makefile | 21 ++++ build_tools/README.md | 47 ++++++++ build_tools/__init__.py | 0 build_tools/hooks.py | 199 +++++++++++++++++++++++++++++++++ build_tools/resources.py | 182 +++++++++++++++++++++++++++++++ build_tools/version.py | 206 +++++++++++++++++++++++++++++++++++ build_tools/vocabulary.py | 224 ++++++++++++++++++++++++++++++++++++++ scripts/build_docs.py | 200 ++++++++++++++++++++++++++++++++++ scripts/clean.py | 184 +++++++++++++++++++++++++++++++ scripts/manage_version.py | 217 ++++++++++++++++++++++++++++++++++++ setup.py | 21 ++++ 12 files changed, 1502 insertions(+) create mode 100644 build_tools/README.md create mode 100644 build_tools/__init__.py create mode 100644 build_tools/hooks.py create mode 100644 build_tools/resources.py create mode 100644 build_tools/version.py create mode 100644 build_tools/vocabulary.py create mode 100644 scripts/build_docs.py create mode 100644 scripts/clean.py create mode 100644 scripts/manage_version.py diff --git a/.gitignore b/.gitignore index 84afd451..b60b3d15 100644 --- a/.gitignore +++ b/.gitignore @@ -82,3 +82,4 @@ mmif/vocabulary documentation/_build/ /VERSION +_issues diff --git a/Makefile b/Makefile index 424e787d..8ad068f1 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,24 @@ +################################################################################ +# COMPLETELY DEPRECATED - LEFT FOR HISTORICAL REFERENCE ONLY +# +# This Makefile is no longer needed and will be removed in a future release. +# Use Python scripts and modern build tools instead: +# +# python scripts/manage_version.py # Instead of: make version/devversion +# python -m build # Instead of: make package +# pip install -e . # Instead of: make develop +# pytest # Instead of: make test +# python scripts/build_docs.py # Instead of: make docs/doc +# python scripts/clean.py # Instead of: make clean/distclean +# +# This file is kept temporarily for: +# 1. Legacy CI/CD pipelines that haven't been updated yet +# 2. Historical reference during migration period +# 3. Will be removed once all workflows are migrated (target: 3-6 months) +# +# See README.md "Development" section for modern workflow instructions. +################################################################################ + # check for dependencies SHELL := /bin/bash deps = curl jq git python3 diff --git a/build_tools/README.md b/build_tools/README.md new file mode 100644 index 00000000..bacd3ef9 --- /dev/null +++ b/build_tools/README.md @@ -0,0 +1,47 @@ +# build_tools + +This directory contains library modules for the mmif-python build system. These modules are **not meant to be run directly** by developers. + +## Architecture + +The `build_tools/` package provides core functionality that is used in two ways: + +1. **During package builds** - Automatically invoked by setuptools via the entry point defined in `pyproject.toml` +2. **By CLI wrapper scripts** - Called by user-facing scripts in the `scripts/` directory + + +## For Developers + +**DO NOT run these modules directly.** Instead, use the CLI wrapper scripts in the `scripts/` directory: + +```bash +# Version management +python scripts/manage_version.py # Interactive version setting +python scripts/manage_version.py --dev # Generate dev version +python scripts/manage_version.py --set 1.0.0 # Set specific version + +# Documentation +python scripts/build_docs.py # Build single-version docs +python scripts/build_docs.py --multi # Build multi-version docs + +# Cleanup +python scripts/clean.py # Clean all build artifacts +python scripts/clean.py --dist-only # Clean only distribution files +``` + +## For Package Builds + +The `hooks.py` module is automatically invoked by setuptools when building the package: + +```bash +# These commands automatically trigger build_tools.hooks:setup_hooks() +pip install -e . # Development install +python -m build # Build wheel and sdist +``` + +The entry point is configured in `pyproject.toml`: + +```toml +[project.entry-points."setuptools.finalize_distribution_options"] +build_hooks = "build_tools.hooks:setup_hooks" +``` diff --git a/build_tools/__init__.py b/build_tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/build_tools/hooks.py b/build_tools/hooks.py new file mode 100644 index 00000000..5c458286 --- /dev/null +++ b/build_tools/hooks.py @@ -0,0 +1,199 @@ +""" +Setuptools build hooks for mmif-python. + +This module provides the integration between the build tools and setuptools. +It implements custom command classes that run during the build process to: +- Generate the ver/ package with version info +- Generate the res/ package with MMIF spec resources +- Generate the vocabulary/ package with type enums +""" + +import os +from typing import Optional + +import yaml + +from . import version as version_utils +from . import resources as resource_utils +from . import vocabulary as vocab_utils + + +def setup_hooks(dist=None) -> None: + """ + Setuptools entry point hook for build-time code generation. + + This function is called by setuptools via the finalize_distribution_options + entry point defined in pyproject.toml. It runs before building wheels/sdists. + + Args: + dist: setuptools Distribution object (optional, provided by setuptools) + """ + run_build_hooks() + + +def run_build_hooks(package_version: Optional[str] = None, + local_mmif_path: Optional[str] = None) -> None: + """ + Execute all build hooks to generate required packages. + + This is the main entry point called by setuptools command classes or entry point. + + Args: + package_version: Package version (if None, will read from VERSION file) + local_mmif_path: Path to local MMIF repository (if None, will check env var) + """ + # Get version info + if package_version is None: + package_version, spec_version, git_ref = version_utils.get_version_info( + local_mmif_path=local_mmif_path + ) + else: + spec_version = version_utils.get_spec_version_for_build( + package_version, local_mmif_path + ) + git_ref = version_utils.get_git_ref_for_resources( + package_version, spec_version, local_mmif_path + ) + + if local_mmif_path is None: + local_mmif_path = version_utils.get_local_mmif_path() + + if local_mmif_path: + print(f"==== Using local MMIF files at '{local_mmif_path}' ====") + + print(f"Building mmif-python {package_version} targeting MMIF spec {spec_version}") + print(f"Fetching resources from git ref: {git_ref}") + + # Package names + mmif_package = 'mmif' + ver_package = 'ver' + res_package = 'res' + vocab_package = 'vocabulary' + + # Generate ver/ package + generate_ver_package(mmif_package, ver_package, package_version, spec_version) + + # Fetch resources + fetcher = resource_utils.ResourceFetcher(spec_version, git_ref, local_mmif_path) + resources = fetcher.fetch_all_resources() + + # Generate res/ package + generate_res_package(mmif_package, res_package, resources) + + # Generate vocabulary/ package + generate_vocabulary_package( + mmif_package, vocab_package, spec_version, git_ref, + resources, local_mmif_path + ) + + +def generate_ver_package(parent_package: str, + subpackage_name: str, + package_version: str, + spec_version: str) -> None: + """ + Generate the ver/ package with version information. + + Args: + parent_package: Parent package name (e.g., "mmif") + subpackage_name: Subpackage name (e.g., "ver") + package_version: mmif-python package version + spec_version: MMIF specification version + """ + init_contents = f'__version__ = "{package_version}"\n__specver__ = "{spec_version}"\n' + + vocab_utils.create_subpackage(parent_package, subpackage_name, init_contents) + print(f"Generated {parent_package}/{subpackage_name}/ package") + + +def generate_res_package(parent_package: str, + subpackage_name: str, + resources: dict) -> None: + """ + Generate the res/ package with MMIF spec resources. + + Args: + parent_package: Parent package name (e.g., "mmif") + subpackage_name: Subpackage name (e.g., "res") + resources: Dictionary of resources (schema, vocabulary) + """ + res_dir = vocab_utils.create_subpackage(parent_package, subpackage_name) + + # Write schema file + resource_utils.write_resource_file(res_dir, 'mmif.json', resources['schema']) + + # Write vocabulary file + resource_utils.write_resource_file( + res_dir, 'clams.vocabulary.yaml', resources['vocabulary'] + ) + + print(f"Generated {parent_package}/{subpackage_name}/ package") + + +def generate_vocabulary_package(parent_package: str, + subpackage_name: str, + spec_version: str, + git_ref: str, + resources: dict, + local_mmif_path: Optional[str]) -> None: + """ + Generate the vocabulary/ package with type enums. + + Args: + parent_package: Parent package name (e.g., "mmif") + subpackage_name: Subpackage name (e.g., "vocabulary") + spec_version: MMIF specification version + git_ref: Git ref used for fetching resources + resources: Dictionary of resources including attypeversions + local_mmif_path: Path to local MMIF repository (optional) + """ + attypeversions = resources['attypeversions'] + package_version_is_dev = version_utils.is_dev_version( + version_utils.read_version_file() + ) + + if package_version_is_dev: + # For dev versions, we need to handle version increments + latest_vocab_yaml = resource_utils.fetch_clams_vocabulary( + spec_version, local_mmif_path + ) + dev_vocab_yaml = resources['vocabulary'] + + type_versions = vocab_utils.determine_type_versions_for_dev( + latest_vocab_yaml, dev_vocab_yaml, attypeversions + ) + else: + # For release versions, use the versions from the spec + type_versions = attypeversions + + # Generate the vocabulary package + vocab_utils.generate_vocabulary_package( + parent_package, spec_version, type_versions + ) + + print(f"Generated {parent_package}/{subpackage_name}/ package") + + +def create_build_hook_decorator(setuptools_cmd_class): + """ + Create a decorator that adds build hooks to a setuptools command class. + + This decorator wraps the run() method of a setuptools command class + to execute our build hooks before the original run() method. + + Args: + setuptools_cmd_class: A setuptools command class (e.g., sdist, develop) + + Returns: + The decorated command class with build hooks integrated + """ + original_run = setuptools_cmd_class.run + + def new_run(self): + # Run our build hooks + run_build_hooks() + # Then run the original command + original_run(self) + + setuptools_cmd_class.run = new_run + return setuptools_cmd_class diff --git a/build_tools/resources.py b/build_tools/resources.py new file mode 100644 index 00000000..6cc8fba6 --- /dev/null +++ b/build_tools/resources.py @@ -0,0 +1,182 @@ +""" +Resource fetching for MMIF specification files. + +This module handles fetching MMIF specification resources from either: +- Remote GitHub repository (default) +- Local MMIF git repository (via LOCALMMIF environment variable) + +Resources include: +- JSON schema files +- YAML vocabulary files +- Annotation type version mappings +""" + +import json +import os +import subprocess +from typing import Union, Optional +from urllib import request + + +def get_spec_file_at_gitref(git_ref: str, + filepath: str, + local_mmif_path: Optional[str] = None) -> bytes: + """ + Fetch a file from the MMIF specification repository at a specific git ref. + + Args: + git_ref: Git tag or branch name (e.g., "1.0.0" or "develop") + filepath: Path to the file within the repository + May contain {version} placeholder (e.g., "docs/{version}/vocabulary/...") + local_mmif_path: Path to local MMIF repository (optional) + + Returns: + File contents as bytes + + Raises: + RuntimeError: If file cannot be fetched from local or remote repository + """ + # Substitute version placeholder if present + filepath = filepath.format(version=git_ref) + + if local_mmif_path is not None: + # Fetch from local git repository + result = subprocess.run( + ['git', '--git-dir', f'{local_mmif_path}/.git', '--no-pager', + 'show', f'{git_ref}:{filepath}'], + capture_output=True + ) + if result.returncode != 0: + raise RuntimeError( + f"Failed to get {filepath} at {git_ref} from local MMIF repo: " + f"{result.stderr.decode('utf-8')}" + ) + return result.stdout + else: + # Fetch from GitHub + file_url = f"https://raw.githubusercontent.com/clamsproject/mmif/{git_ref}/{filepath}" + try: + return request.urlopen(file_url).read() + except Exception as e: + raise RuntimeError(f"Failed to fetch {file_url}: {e}") + + +def write_resource_file(resource_dir: str, + resource_name: str, + resource_data: Union[bytes, str]) -> None: + """ + Write a resource file to disk. + + Args: + resource_dir: Directory to write the file to + resource_name: Name of the file to write + resource_data: File contents (bytes or string) + """ + os.makedirs(resource_dir, exist_ok=True) + + mode = 'wb' if isinstance(resource_data, bytes) else 'w' + filepath = os.path.join(resource_dir, resource_name) + + with open(filepath, mode) as f: + f.write(resource_data) + + +def fetch_mmif_schema(git_ref: str, + local_mmif_path: Optional[str] = None) -> bytes: + """ + Fetch the MMIF JSON schema file. + + Args: + git_ref: Git tag or branch to fetch from + local_mmif_path: Path to local MMIF repository (optional) + + Returns: + JSON schema as bytes + """ + schema_path = 'schema/mmif.json' + return get_spec_file_at_gitref(git_ref, schema_path, local_mmif_path) + + +def fetch_clams_vocabulary(git_ref: str, + local_mmif_path: Optional[str] = None) -> bytes: + """ + Fetch the CLAMS vocabulary YAML file. + + Args: + git_ref: Git tag or branch to fetch from + local_mmif_path: Path to local MMIF repository (optional) + + Returns: + Vocabulary YAML as bytes + """ + vocab_path = 'vocabulary/clams.vocabulary.yaml' + + if local_mmif_path is not None: + # For local repos, read directly from filesystem + filepath = os.path.join(local_mmif_path, vocab_path) + if git_ref == 'develop' and os.path.exists(filepath): + with open(filepath, 'rb') as f: + return f.read() + + # Otherwise fetch from git + return get_spec_file_at_gitref(git_ref, vocab_path, local_mmif_path) + + +def fetch_annotation_type_versions(spec_version: str, + local_mmif_path: Optional[str] = None) -> dict: + """ + Fetch the annotation type versions mapping. + + Args: + spec_version: MMIF specification version tag (e.g., "1.0.0") + local_mmif_path: Path to local MMIF repository (optional) + + Returns: + Dictionary mapping annotation type names to version strings + """ + attypevers_path = 'docs/{version}/vocabulary/attypeversions.json' + data = get_spec_file_at_gitref(spec_version, attypevers_path, local_mmif_path) + return json.loads(data) + + +class ResourceFetcher: + """ + Helper class for fetching multiple MMIF spec resources. + + This class encapsulates the logic for fetching all necessary resources + for a build, handling both remote and local sources. + """ + + def __init__(self, + spec_version: str, + git_ref: str, + local_mmif_path: Optional[str] = None): + """ + Initialize the resource fetcher. + + Args: + spec_version: MMIF spec version tag (e.g., "1.0.0") + git_ref: Git ref to fetch resources from (tag or branch) + local_mmif_path: Path to local MMIF repository (optional) + """ + self.spec_version = spec_version + self.git_ref = git_ref + self.local_mmif_path = local_mmif_path + + def fetch_all_resources(self) -> dict: + """ + Fetch all required MMIF spec resources. + + Returns: + Dictionary with keys: + - 'schema': MMIF JSON schema (bytes) + - 'vocabulary': CLAMS vocabulary YAML (bytes) + - 'attypeversions': Annotation type versions (dict) + """ + return { + 'schema': fetch_mmif_schema(self.git_ref, self.local_mmif_path), + 'vocabulary': fetch_clams_vocabulary(self.git_ref, self.local_mmif_path), + 'attypeversions': fetch_annotation_type_versions( + self.spec_version, self.local_mmif_path + ) + } diff --git a/build_tools/version.py b/build_tools/version.py new file mode 100644 index 00000000..718a4737 --- /dev/null +++ b/build_tools/version.py @@ -0,0 +1,206 @@ +""" +Version management for mmif-python package. + +This module handles: +- Reading and validating the VERSION file +- Inferring the target MMIF specification version from git tags +- Supporting both remote (GitHub) and local MMIF repositories +""" + +import json +import os +import re +import subprocess +from typing import Tuple, Optional +from urllib import request + + +def read_version_file(version_file: str = "VERSION") -> str: + """ + Read the VERSION file and return the version string. + + Args: + version_file: Path to the VERSION file (default: "VERSION") + + Returns: + Version string (e.g., "1.0.0" or "1.0.0.dev1") + + Raises: + FileNotFoundError: If VERSION file doesn't exist + ValueError: If VERSION file is empty or contains invalid format + """ + if not os.path.exists(version_file): + raise FileNotFoundError( + f"Cannot find {version_file} file. " + f"Use `python scripts/manage_version.py` to generate one." + ) + + with open(version_file, 'r') as f: + version = f.read().strip() + + if not version: + raise ValueError(f"{version_file} is empty") + + # Validate version format: X.Y.Z or X.Y.Z.devN + if not re.match(r'^\d+\.\d+\.\d+(?:\.dev\d+)?$', version): + raise ValueError( + f"Invalid version format in {version_file}: {version}. " + f"Expected format: X.Y.Z or X.Y.Z.devN" + ) + + return version + + +def is_dev_version(version: str) -> bool: + """Check if a version string represents a development version.""" + return '.dev' in version + + +def get_local_mmif_path() -> Optional[str]: + """ + Get the local MMIF repository path from LOCALMMIF environment variable. + + Returns: + Path to local MMIF repository, or None if not set + """ + return os.environ.get('LOCALMMIF') + + +def get_latest_mmif_git_tag(local_mmif_path: Optional[str] = None) -> str: + """ + Get the latest MMIF specification git tag. + + This function retrieves the latest version tag from the MMIF specification + repository. It supports both local git repositories and remote GitHub queries. + + Args: + local_mmif_path: Path to local MMIF git repository (optional). + If not provided, will check LOCALMMIF environment variable. + If neither is available, fetches from GitHub. + + Returns: + Latest MMIF specification version tag (e.g., "1.0.0") + + Raises: + RuntimeError: If no valid version tags are found + """ + if local_mmif_path is None: + local_mmif_path = get_local_mmif_path() + + if local_mmif_path is not None: + # Get tags from local git repository + result = subprocess.run( + ['git', '--git-dir', f'{local_mmif_path}/.git', '--no-pager', 'tag'], + capture_output=True, + text=True + ) + if result.returncode != 0: + raise RuntimeError( + f"Failed to get git tags from local MMIF repo at {local_mmif_path}: " + f"{result.stderr}" + ) + tags = result.stdout.split('\n') + else: + # Fetch tags from GitHub API + tags = [] + page = 1 + while True: + url = f'https://api.github.com/repos/clamsproject/mmif/tags?per_page=100&page={page}' + try: + res = request.urlopen(url) + body = json.loads(res.read()) + except Exception as e: + raise RuntimeError(f"Failed to fetch tags from GitHub: {e}") + + if not body: + break + + tags.extend([tag['name'] for tag in body]) + page += 1 + + # Filter for version tags matching X.Y.Z format + # Note: Some legacy tags had prefixes like "spec-X.Y.Z" or "vocab-X.Y.Z" + version_pattern = re.compile(r'(?:spec-|vocab-)?(\d+\.\d+\.\d+)$') + valid_tags = [] + for tag in tags: + match = version_pattern.match(tag.strip()) + if match: + # Extract just the version part (without prefix) + valid_tags.append(match.group(1)) + + if not valid_tags: + raise RuntimeError("No valid MMIF specification version tags found") + + # Sort by version numbers and return the highest + def version_key(v): + return tuple(map(int, v.split('.'))) + + return sorted(valid_tags, key=version_key)[-1] + + +def get_spec_version_for_build(package_version: str, + local_mmif_path: Optional[str] = None) -> str: + """ + Determine which MMIF specification version to use for a build. + + Args: + package_version: The mmif-python package version being built + local_mmif_path: Path to local MMIF repository (optional) + + Returns: + MMIF specification version string (e.g., "1.0.0") + """ + latest_tag = get_latest_mmif_git_tag(local_mmif_path) + + # For release versions, use the latest stable tag + # For dev versions, we still reference the latest tag for base types + # (the actual vocabulary may come from develop branch) + return latest_tag + + +def get_git_ref_for_resources(package_version: str, + latest_spec_tag: str, + local_mmif_path: Optional[str] = None) -> str: + """ + Determine which git ref to use when fetching MMIF spec resources. + + For release versions: use the latest stable tag + For dev versions: use 'develop' branch + + Args: + package_version: The mmif-python package version being built + latest_spec_tag: The latest MMIF spec tag + local_mmif_path: Path to local MMIF repository (optional) + + Returns: + Git ref string (tag name or branch name) + """ + if is_dev_version(package_version): + return 'develop' + else: + return latest_spec_tag + + +def get_version_info(version_file: str = "VERSION", + local_mmif_path: Optional[str] = None) -> Tuple[str, str, str]: + """ + Get complete version information for a build. + + This is the main entry point for build scripts to get all version info. + + Args: + version_file: Path to the VERSION file + local_mmif_path: Path to local MMIF repository (optional) + + Returns: + Tuple of (package_version, spec_version, git_ref_for_resources) + + Example: + >>> package_ver, spec_ver, git_ref = get_version_info() + >>> print(f"Building mmif-python {package_ver} targeting MMIF spec {spec_ver}") + """ + package_version = read_version_file(version_file) + spec_version = get_spec_version_for_build(package_version, local_mmif_path) + git_ref = get_git_ref_for_resources(package_version, spec_version, local_mmif_path) + + return package_version, spec_version, git_ref diff --git a/build_tools/vocabulary.py b/build_tools/vocabulary.py new file mode 100644 index 00000000..d3a2a7c3 --- /dev/null +++ b/build_tools/vocabulary.py @@ -0,0 +1,224 @@ +""" +Vocabulary enum generation for MMIF Python SDK. + +This module generates Python enum classes from CLAMS vocabulary definitions: +- Reads vocabulary YAML files +- Generates Python class files from templates +- Handles version updates for annotation types +""" + +import io +import os +import shutil +import string +from typing import Dict, List, Tuple + +import yaml + + +def create_subpackage(parent_package: str, + subpackage_name: str, + init_contents: str = "") -> str: + """ + Create a Python subpackage with __init__.py and a warning file. + + Args: + parent_package: Parent package directory (e.g., "mmif") + subpackage_name: Name of subpackage to create (e.g., "vocabulary") + init_contents: Contents for __init__.py file + + Returns: + Path to the created subpackage directory + """ + subpack_dir = os.path.join(parent_package, subpackage_name) + + # Remove existing directory if present + shutil.rmtree(subpack_dir, ignore_errors=True) + + # Create new directory + os.makedirs(subpack_dir, exist_ok=True) + + # Write warning file + warning_file = os.path.join(subpack_dir, 'do-not-edit.txt') + with open(warning_file, 'w') as f: + f.write("Contents of this directory is automatically generated and should not be manually edited.\n") + f.write("Any manual changes will be wiped at next build time.\n") + + # Write __init__.py + init_file = os.path.join(subpack_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write(init_contents) + + return subpack_dir + + +def generate_vocab_enum_module(spec_version: str, + type_versions: List[Tuple[str, str]], + module_name: str, + template_path: str) -> str: + """ + Generate a vocabulary enum module from a template. + + Args: + spec_version: MMIF specification version (e.g., "1.0.0") + type_versions: List of (type_name, version) tuples + module_name: Name of the module (e.g., "annotation_types") + template_path: Path to template directory + + Returns: + Generated module contents as a string + """ + template_file = os.path.join(template_path, f'{module_name}.txt') + + if not os.path.exists(template_file): + raise FileNotFoundError(f"Template not found: {template_file}") + + # Determine base class name + if module_name.startswith('annotation'): + base_class_name = 'AnnotationTypesBase' + elif module_name.startswith('document'): + base_class_name = 'DocumentTypesBase' + else: + base_class_name = 'ClamsTypesBase' + + # Read template and substitute version + with open(template_file, 'r') as f: + template_content = f.read() + + output = io.StringIO() + output.write(string.Template(template_content).safe_substitute(VERSION=spec_version)) + + # Generate enum entries + for type_name, type_ver in type_versions: + vocab_url = f'http://mmif.clams.ai/vocabulary/{type_name}/{type_ver}' + output.write(f" {type_name} = {base_class_name}('{vocab_url}')\n") + + # Add version mapping dictionary + output.write(f" _typevers = {dict(type_versions)}\n") + + result = output.getvalue() + output.close() + return result + + +def determine_type_versions_for_dev( + latest_vocab_yaml: bytes, + dev_vocab_yaml: bytes, + latest_attypeversions: Dict[str, str] +) -> Dict[str, str]: + """ + Determine annotation type versions for a dev build. + + For dev builds, we need to: + 1. Start with versions from the latest release + 2. For new types not in latest: assign 'v1' + 3. For modified types: increment version + + Args: + latest_vocab_yaml: Vocabulary YAML from latest release tag + dev_vocab_yaml: Vocabulary YAML from develop branch + latest_attypeversions: Type versions from latest release + + Returns: + Dictionary mapping type names to version strings + """ + # Parse both vocabularies + latest_types = { + t['name']: t + for t in yaml.safe_load_all(latest_vocab_yaml) + if t # Filter out None values + } + + dev_types = { + t['name']: t + for t in yaml.safe_load_all(dev_vocab_yaml) + if t # Filter out None values + } + + # Start with latest versions + type_versions = latest_attypeversions.copy() + + # Process each type in dev vocabulary + for type_name, type_def in dev_types.items(): + if type_name not in latest_types: + # New type - assign v1 + type_versions[type_name] = 'v1' + elif latest_types[type_name] != type_def: + # Modified type - increment version + current_ver = type_versions.get(type_name, 'v1') + if current_ver.startswith('v'): + ver_num = int(current_ver[1:]) + type_versions[type_name] = f'v{ver_num + 1}' + else: + type_versions[type_name] = 'v2' + + return type_versions + + +def generate_vocabulary_package( + package_dir: str, + spec_version: str, + type_versions: Dict[str, str], + template_path: str = 'templates/python/vocabulary' +) -> str: + """ + Generate the complete vocabulary package. + + Args: + package_dir: Parent package directory (e.g., "mmif") + spec_version: MMIF specification version + type_versions: Dictionary mapping type names to versions + template_path: Path to template directory + + Returns: + Path to the generated vocabulary package + """ + # Categorize types + base_types = [] + document_types = [] + annotation_types = [] + + for type_name, type_ver in type_versions.items(): + if type_name == 'Thing': + base_types.append((type_name, type_ver)) + elif 'Document' in type_name: + document_types.append((type_name, type_ver)) + else: + annotation_types.append((type_name, type_ver)) + + # Define module structure + modules = { + 'base_types': base_types, + 'annotation_types': annotation_types, + 'document_types': document_types + } + + type_classes = { + 'base_types': ['ThingTypesBase', 'ThingType', 'ClamsTypesBase', + 'AnnotationTypesBase', 'DocumentTypesBase'], + 'annotation_types': ['AnnotationTypes'], + 'document_types': ['DocumentTypes'] + } + + # Generate __init__.py imports + init_imports = '\n'.join( + f"from .{mod_name} import {class_name}" + for mod_name, classes in type_classes.items() + for class_name in classes + ) + init_imports += '\n\n' + init_imports += "_typevers = {**ThingType._typevers, **AnnotationTypes._typevers, **DocumentTypes._typevers}\n" + + # Create vocabulary package + vocab_dir = create_subpackage(package_dir, 'vocabulary', init_imports) + + # Generate each module + for module_name, type_list in modules.items(): + module_content = generate_vocab_enum_module( + spec_version, type_list, module_name, template_path + ) + module_file = os.path.join(vocab_dir, f'{module_name}.py') + with open(module_file, 'w') as f: + f.write(module_content) + + return vocab_dir diff --git a/scripts/build_docs.py b/scripts/build_docs.py new file mode 100644 index 00000000..058c789b --- /dev/null +++ b/scripts/build_docs.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +""" +Documentation build script for mmif-python. + +Replaces `make docs` and `make doc` Makefile targets. + +Usage: + python scripts/build_docs.py # Single-version build (for development) + python scripts/build_docs.py --multi # Multi-version build (for publication) +""" + +import argparse +import os +import subprocess +import sys +from pathlib import Path + + +def get_latest_version_tag(): + """Get the latest version tag from git.""" + try: + result = subprocess.run( + ['git', 'tag'], + capture_output=True, + text=True, + check=True + ) + tags = [tag.strip() for tag in result.stdout.split('\n') if tag.strip()] + + # Filter and sort version tags + import re + version_pattern = re.compile(r'^\d+\.\d+\.\d+$') + version_tags = [tag for tag in tags if version_pattern.match(tag)] + + if not version_tags: + return None + + # Sort by version number + return sorted(version_tags, key=lambda v: [int(x) for x in v.split('.')])[-1] + except subprocess.CalledProcessError: + return None + + +def install_dependencies(): + """Install documentation dependencies.""" + print("Installing documentation dependencies...") + subprocess.run( + [sys.executable, '-m', 'pip', 'install', '--upgrade', '-r', 'requirements.txt'], + check=True + ) + subprocess.run( + [sys.executable, '-m', 'pip', 'install', '--upgrade', '-r', 'requirements.old'], + check=False # This file may not exist + ) + + +def build_single_version(output_dir='docs'): + """ + Build single-version documentation for development. + + This is equivalent to `make doc` in the old Makefile. + """ + print("Building single-version documentation...") + + # Remove existing docs + if os.path.exists(output_dir): + import shutil + shutil.rmtree(output_dir) + + # Run sphinx-build + subprocess.run( + ['sphinx-build', 'documentation', output_dir, '-b', 'html', '-a'], + check=True + ) + + print(f"Documentation built successfully in {output_dir}/") + + +def build_multi_version(output_dir='docs'): + """ + Build multi-version documentation for publication. + + This is equivalent to `make docs` in the old Makefile. + """ + print("Building multi-version documentation...") + + # Install dependencies + install_dependencies() + + # Get latest version + latest = get_latest_version_tag() + if not latest: + print("Warning: No version tags found. Using 'main' as latest.", file=sys.stderr) + latest = 'main' + + # Remove existing docs + if os.path.exists(output_dir): + import shutil + shutil.rmtree(output_dir) + + # Run sphinx-multiversion + subprocess.run( + ['sphinx-multiversion', 'documentation', output_dir, '-b', 'html', '-a'], + check=True + ) + + # Create .nojekyll file for GitHub Pages + nojekyll_path = os.path.join(output_dir, '.nojekyll') + Path(nojekyll_path).touch() + + # Create symlink to latest version + latest_link = os.path.join(output_dir, 'latest') + latest_target = latest + + # Remove existing symlink if present + if os.path.islink(latest_link): + os.unlink(latest_link) + + # Create symlink (works on Unix-like systems) + try: + os.symlink(latest_target, latest_link) + print(f"Created symlink: latest -> {latest_target}") + except OSError as e: + print(f"Warning: Could not create symlink: {e}", file=sys.stderr) + print("You may need to create the symlink manually on Windows.", file=sys.stderr) + + # Create redirect index.html + index_html = """ + + + Redirect to latest version + + + + +

Redirecting to latest documentation...

+ + +""" + index_path = os.path.join(output_dir, 'index.html') + with open(index_path, 'w') as f: + f.write(index_html) + + print(f"Multi-version documentation built successfully in {output_dir}/") + print(f"Latest version: {latest}") + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Build documentation for mmif-python" + ) + parser.add_argument( + '--multi', + action='store_true', + help="Build multi-version documentation (for publication)" + ) + parser.add_argument( + '--output', + '-o', + metavar='DIR', + default='docs', + help="Output directory (default: docs)" + ) + + args = parser.parse_args() + + # Ensure we're in the project root + if not os.path.exists('documentation'): + print("Error: documentation/ directory not found.", file=sys.stderr) + print("Please run this script from the project root directory.", file=sys.stderr) + sys.exit(1) + + # Ensure VERSION file exists + if not os.path.exists('VERSION'): + print("Error: VERSION file not found.", file=sys.stderr) + print("Run 'python scripts/manage_version.py' first.", file=sys.stderr) + sys.exit(1) + + # Ensure generated code exists + if not os.path.exists('mmif/ver'): + print("Error: Generated code not found (mmif/ver/).", file=sys.stderr) + print("Run 'pip install -e .' or 'python -m build' first.", file=sys.stderr) + sys.exit(1) + + try: + if args.multi: + build_multi_version(args.output) + else: + build_single_version(args.output) + except subprocess.CalledProcessError as e: + print(f"Error: Documentation build failed: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/scripts/clean.py b/scripts/clean.py new file mode 100644 index 00000000..5ed36e32 --- /dev/null +++ b/scripts/clean.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +""" +Cleanup script for mmif-python build artifacts. + +Replaces `make clean` and `make distclean` Makefile targets. + +Usage: + python scripts/clean.py # Clean all build artifacts + python scripts/clean.py --dist-only # Clean only distribution artifacts +""" + +import argparse +import os +import shutil +import subprocess +import sys + + +def remove_path(path, description=""): + """Remove a file or directory if it exists.""" + if os.path.exists(path): + try: + if os.path.isdir(path): + shutil.rmtree(path) + else: + os.remove(path) + print(f"Removed: {path}" + (f" ({description})" if description else "")) + return True + except Exception as e: + print(f"Warning: Could not remove {path}: {e}", file=sys.stderr) + return False + return False + + +def clean_generated_code(): + """Remove generated code packages.""" + print("\nCleaning generated code...") + paths = [ + ('mmif/ver', 'version package'), + ('mmif/res', 'resources package'), + ('mmif/vocabulary', 'vocabulary package'), + ] + + for path, desc in paths: + remove_path(path, desc) + + +def clean_build_artifacts(): + """Remove build artifacts.""" + print("\nCleaning build artifacts...") + paths = [ + ('build', 'build directory'), + ('mmif_python.egg-info', 'egg-info directory'), + ('__pycache__', 'Python cache'), + ] + + for path, desc in paths: + remove_path(path, desc) + + # Remove all __pycache__ directories + for root, dirs, files in os.walk('.'): + if '__pycache__' in dirs: + cache_dir = os.path.join(root, '__pycache__') + remove_path(cache_dir, 'Python cache') + + +def clean_dist_artifacts(): + """Remove distribution artifacts.""" + print("\nCleaning distribution artifacts...") + paths = [ + ('dist', 'distribution directory'), + ] + + for path, desc in paths: + remove_path(path, desc) + + +def clean_test_artifacts(): + """Remove test and coverage artifacts.""" + print("\nCleaning test artifacts...") + paths = [ + ('.pytest_cache', 'pytest cache'), + ('.coverage', 'coverage data'), + ('coverage.xml', 'coverage XML report'), + ('htmlcov', 'coverage HTML report'), + ('.hypothesis', 'hypothesis cache'), + ('tests/.hypothesis', 'hypothesis test cache'), + ('.pytype', 'pytype cache'), + ] + + for path, desc in paths: + remove_path(path, desc) + + +def clean_docs(): + """Remove generated documentation.""" + print("\nCleaning documentation...") + remove_path('docs', 'generated documentation') + + +def clean_version_files(): + """Remove VERSION files.""" + print("\nCleaning version files...") + paths = [ + ('VERSION', 'version file'), + ('VERSION.dev', 'dev version file'), + ] + + for path, desc in paths: + remove_path(path, desc) + + +def restore_documentation_csv(): + """Restore documentation/target-versions.csv from git.""" + csv_file = 'documentation/target-versions.csv' + if os.path.exists('.git'): + try: + print(f"\nRestoring {csv_file} from git...") + subprocess.run( + ['git', 'checkout', '--', csv_file], + check=True, + capture_output=True + ) + print(f"Restored: {csv_file}") + except subprocess.CalledProcessError: + print(f"Warning: Could not restore {csv_file} from git", file=sys.stderr) + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Clean build artifacts for mmif-python" + ) + parser.add_argument( + '--dist-only', + action='store_true', + help="Clean only distribution artifacts (equivalent to make distclean)" + ) + parser.add_argument( + '--keep-version', + action='store_true', + help="Keep VERSION and VERSION.dev files" + ) + parser.add_argument( + '--keep-docs', + action='store_true', + help="Keep generated documentation" + ) + + args = parser.parse_args() + + print("mmif-python cleanup script") + print("=" * 50) + + if args.dist_only: + # Only clean distribution artifacts (make distclean) + clean_dist_artifacts() + else: + # Full clean (make clean) + clean_dist_artifacts() + clean_build_artifacts() + clean_test_artifacts() + clean_generated_code() + + if not args.keep_version: + clean_version_files() + + if not args.keep_docs: + clean_docs() + + restore_documentation_csv() + + # Remove hidden cache directories + print("\nCleaning hidden cache directories...") + for item in os.listdir('.'): + if item.startswith('.') and item.endswith('cache'): + remove_path(item, 'cache directory') + + print("\n" + "=" * 50) + print("Cleanup complete!") + + +if __name__ == '__main__': + main() diff --git a/scripts/manage_version.py b/scripts/manage_version.py new file mode 100644 index 00000000..be1635dd --- /dev/null +++ b/scripts/manage_version.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +""" +Version management script for mmif-python. + +Replaces `make version` and `make devversion` Makefile targets. + +This is a CLI wrapper around the build_tools.version module. + +Usage: + python scripts/manage_version.py # Interactive release version + python scripts/manage_version.py --dev # Generate dev version + python scripts/manage_version.py --set 1.0.0 # Set specific version +""" + +import argparse +import os +import re +import subprocess +import sys + +# Import from build_tools +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from build_tools import version as version_utils + + +def parse_version(version: str) -> tuple: + """ + Parse a version string into components. + + Args: + version: Version string (e.g., "1.2.3" or "1.2.3.dev4") + + Returns: + Tuple of (major, minor, patch, dev_number) + dev_number is 0 for release versions + """ + match = re.match(r'^(\d+)\.(\d+)\.(\d+)(?:\.dev(\d+))?$', version) + if not match: + raise ValueError(f"Invalid version format: {version}") + + major, minor, patch, dev = match.groups() + return (int(major), int(minor), int(patch), int(dev) if dev else 0) + + +def format_version(major: int, minor: int, patch: int, dev: int = 0) -> str: + """Format version components into a version string.""" + if dev: + return f"{major}.{minor}.{patch}.dev{dev}" + else: + return f"{major}.{minor}.{patch}" + + +def increase_patch(version: str) -> str: + """Increase the patch version number.""" + major, minor, patch, _ = parse_version(version) + return format_version(major, minor, patch + 1) + + +def add_dev(version: str) -> str: + """Convert a version to dev1.""" + major, minor, patch, _ = parse_version(version) + return format_version(major, minor, patch, 1) + + +def increase_dev(version: str) -> str: + """Increase the dev version number.""" + major, minor, patch, dev = parse_version(version) + if dev == 0: + raise ValueError(f"Version {version} is not a dev version") + return format_version(major, minor, patch, dev + 1) + + +def get_local_git_tags() -> str: + """Get latest tag from local git repository.""" + try: + result = subprocess.run( + ['git', 'tag'], + capture_output=True, + text=True, + check=True + ) + tags = result.stdout.strip().split('\n') + version_pattern = re.compile(r'^(\d+\.\d+\.\d+(?:\.dev\d+)?)$') + version_tags = [tag for tag in tags if version_pattern.match(tag)] + + if not version_tags: + return '0.0.0' + + # Sort and return latest + return sorted(version_tags, key=lambda v: [int(x) if x.isdigit() else 0 + for x in re.split(r'[.\D]', v)])[-1] + except subprocess.CalledProcessError: + return '0.0.0' + + +def generate_dev_version() -> str: + """ + Generate a dev version based on latest mmif-python and mmif spec tags. + + Logic: + - If mmif-python major.minor matches mmif spec major.minor: + - If latest mmif-python is dev: increment dev number + - Otherwise: increase patch and add .dev1 + - Otherwise: use mmif spec version with .dev1 + """ + python_ver = get_local_git_tags() + if python_ver == '0.0.0': + # Fetch from GitHub using build_tools + try: + python_ver = version_utils.get_latest_mmif_git_tag() + except RuntimeError: + python_ver = '0.0.0' + + # Fetch MMIF spec version using build_tools + try: + spec_ver = version_utils.get_latest_mmif_git_tag() + except RuntimeError as e: + print(f"Error: Could not fetch MMIF spec version: {e}", file=sys.stderr) + sys.exit(1) + + # Parse versions + py_major, py_minor, py_patch, py_dev = parse_version(python_ver) + spec_major, spec_minor, spec_patch, _ = parse_version(spec_ver) + + # Check if major.minor match + if py_major == spec_major and py_minor == spec_minor: + if py_dev > 0: + # Increment dev number + return increase_dev(python_ver) + else: + # Increase patch and add .dev1 + return add_dev(increase_patch(python_ver)) + else: + # Use spec version with .dev1 + return add_dev(spec_ver) + + +def write_version_file(version: str, filename: str = "VERSION") -> None: + """Write version to file.""" + with open(filename, 'w') as f: + f.write(version + '\n') + print(f"Version set to: {version}") + print(f"Written to: {filename}") + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Manage version numbers for mmif-python" + ) + parser.add_argument( + '--dev', + action='store_true', + help="Generate a development version" + ) + parser.add_argument( + '--set', + metavar='VERSION', + help="Set a specific version (e.g., 1.0.0 or 1.0.0.dev1)" + ) + parser.add_argument( + '--output', + metavar='FILE', + default='VERSION', + help="Output file (default: VERSION)" + ) + + args = parser.parse_args() + + # Check if VERSION.dev exists (from old Makefile system) + version_dev_file = 'VERSION.dev' + if os.path.exists(version_dev_file) and not args.set and not args.dev: + # Use existing VERSION.dev + with open(version_dev_file, 'r') as f: + version = f.read().strip() + write_version_file(version, args.output) + return + + if args.set: + # Validate the version format + try: + parse_version(args.set) + write_version_file(args.set, args.output) + except ValueError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + elif args.dev: + # Generate dev version + version = generate_dev_version() + write_version_file(version, args.output) + # Also write to VERSION.dev for compatibility + write_version_file(version, version_dev_file) + else: + # Interactive mode + current = get_local_git_tags() + if current == '0.0.0': + current = fetch_latest_tag('clamsproject/mmif-python') + + print(f"Current version: {current}") + suggested = increase_patch(current) + print(f"Suggested version (increase patch): {suggested}") + + user_input = input(f"Enter new version (or press Enter for {suggested}): ").strip() + + if user_input: + try: + parse_version(user_input) + write_version_file(user_input, args.output) + except ValueError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + else: + write_version_file(suggested, args.output) + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 48e0056d..dfdb059b 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,25 @@ #! /usr/bin/env python3 +""" +COMPLETELY DEPRECATED - LEFT FOR HISTORICAL REFERENCE ONLY + +This file is no longer needed and will be removed in a future release. +Build-time code generation now uses setuptools entry points defined in pyproject.toml. + + +DO NOT USE THIS FILE. Use modern build tools instead (see more build commands in `Makefile` (top comment) which is also targeted for removal): + pip install . # Standard installation + pip install -e . # Development installation + python -m build # Build distribution packages + +All package metadata and build configuration is in pyproject.toml. +Build hooks are integrated via setuptools entry points (see pyproject.toml line 68). + +This file is kept temporarily for: +1. Legacy CI/CD pipelines that haven't been updated yet +2. Historical reference during migration period +3. Will be removed once all workflows are migrated (target: 3-6 months) +""" + import io import json import os