diff --git a/README.md b/README.md index 945ada6a..aba5f423 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![All Contributors](https://img.shields.io/github/all-contributors/datalad/datalad-next?color=ee8449&style=flat-square)](#contributors) [![Build status](https://ci.appveyor.com/api/projects/status/dxomp8wysjb7x2os/branch/main?svg=true)](https://ci.appveyor.com/project/mih/datalad-next/branch/main) [![codecov](https://codecov.io/gh/datalad/datalad-next/branch/main/graph/badge.svg?token=2P8rak7lSX)](https://codecov.io/gh/datalad/datalad-next) +[![Maintainability](https://api.codeclimate.com/v1/badges/6d6091ba6a7fd5ab5baa/maintainability)](https://codeclimate.com/github/datalad/datalad-next/maintainability) [![docs](https://github.com/datalad/datalad-next/workflows/docs/badge.svg)](https://github.com/datalad/datalad-next/actions?query=workflow%3Adocs) [![Documentation Status](https://readthedocs.org/projects/datalad-next/badge/?version=latest)](http://docs.datalad.org/projects/next/en/latest/?badge=latest) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) diff --git a/datalad_next/annexremotes/tests/test_archivist.py b/datalad_next/annexremotes/tests/test_archivist.py index 23a1c5e4..f4157db6 100644 --- a/datalad_next/annexremotes/tests/test_archivist.py +++ b/datalad_next/annexremotes/tests/test_archivist.py @@ -7,7 +7,7 @@ from .. import UnsupportedRequest from ..archivist import ArchivistRemote from datalad_next.datasets import Dataset -from datalad_next.runners import CommandError +from datalad_core.runners import CommandError from datalad_next.tests import assert_result_count diff --git a/datalad_next/constraints/git.py b/datalad_next/constraints/git.py index 46657c83..2ba16e6d 100644 --- a/datalad_next/constraints/git.py +++ b/datalad_next/constraints/git.py @@ -1,7 +1,7 @@ """Constraints for Git-related concepts and parameters""" from __future__ import annotations -from datalad_next.runners import ( +from datalad_core.runners import ( CommandError, call_git, call_git_oneline, diff --git a/datalad_next/consts/__init__.py b/datalad_next/consts/__init__.py index 2d2e4876..b847d50e 100644 --- a/datalad_next/consts/__init__.py +++ b/datalad_next/consts/__init__.py @@ -16,6 +16,13 @@ ``True`` if executed on the Windows platform. """ +__all__ = [ + 'COPY_BUFSIZE', + 'PRE_INIT_COMMIT_SHA', + 'on_linux', + 'on_windows', +] + # import from "utils", but these really are constants from datalad.utils import ( on_linux, @@ -29,4 +36,4 @@ # from PY3.10 COPY_BUFSIZE = 1024 * 1024 if on_windows else 64 * 1024 -from datalad.consts import PRE_INIT_COMMIT_SHA +from datalad_core.consts import PRE_INIT_COMMIT_SHA diff --git a/datalad_next/gitpathspec/__init__.py b/datalad_next/gitpathspec/__init__.py index b0fb5e20..4cdf0bca 100644 --- a/datalad_next/gitpathspec/__init__.py +++ b/datalad_next/gitpathspec/__init__.py @@ -1,11 +1,23 @@ """Handling of Git's pathspecs with subdirectory mangling support -This functionality can be used to add support for pathspecs to implementations -that rely on Git commands that do not support submodule recursion directly. - +.. deprecated:: 1.6 + This module is deprecated. It has been migrated to the `datasalad library + `__. Imports should be adjusted to + ``datasalad.gitpathspec``. """ __all__ = ['GitPathSpec', 'GitPathSpecs'] -from .pathspec import GitPathSpec -from .pathspecs import GitPathSpecs +import warnings + +from datasalad.gitpathspec import ( + GitPathSpec, + GitPathSpecs, +) + +warnings.warn( + '`datalad_next.gitpathspec` has been migrated to the datasalad library, ' + 'adjust imports to `datasalad.gitpathspec`', + DeprecationWarning, + stacklevel=1, +) diff --git a/datalad_next/gitpathspec/pathspec.py b/datalad_next/gitpathspec/pathspec.py deleted file mode 100644 index 7ed96ec1..00000000 --- a/datalad_next/gitpathspec/pathspec.py +++ /dev/null @@ -1,322 +0,0 @@ -# -# Intentionally written without importing datalad code -# -from __future__ import annotations - -from dataclasses import dataclass -from fnmatch import fnmatch -import posixpath -from typing import Generator - - -@dataclass(frozen=True) -class GitPathSpec: - """Support class for patterns used to limit paths in Git commands - - From the Git documentation: - - Pathspecs are used on the command line of "git ls-files", "git ls-tree", - "git add", "git grep", "git diff", "git checkout", and many other - commands to limit the scope of operations to some subset of the tree - or working tree. - - Apart from providing a dedicated type for a pathspec, the main purpose - of this functionality is to take a pathspec that is valid in the context - of one (top-level) repository, and translate it such that the set of - pathspecs given to the same command running on/in a submodule/subdirectory - gives the same results, as if the initial top-level invocation reported - them (if it even could). See the ``for_subdir()`` method for more. - - >>> # simple stripping of leading directory - >>> ps = GitPathSpec.from_pathspec_str('dir/*.jpg') - >>> [str(i) for i in ps.for_subdir('dir')] - ['*.jpg'] - >>> # match against magic pathspecs - >>> ps = GitPathSpec.from_pathspec_str(':(glob)**r/*.jpg') - >>> # longest and shortest match are produced - >>> [str(i) for i in ps.for_subdir('dir')] - [':(glob)**r/*.jpg', ':(glob)*.jpg'] - >>> [str(i) for i in ps.for_subdir('root/some/dir')] - [':(glob)**r/*.jpg', ':(glob)*.jpg'] - >>> # support for special 'no-pathspec' pathspec - >>> ps = GitPathSpec.from_pathspec_str(':') - >>> ps.is_nopathspecs - True - - .. seealso:: - - - Entry in the Git glossary: - https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec - - Informative, more elaborate description of pathspecs: - https://css-tricks.com/git-pathspecs-and-how-to-use-them/ - """ - # TODO think about adding support for another magic that represents - # the root of a repository hierarchy (amending 'top', which is - # the root of the working tree -- but presumably for a single repository - spectypes: tuple[str, ...] - """Long-form pathspec type identifiers""" - dirprefix: str - """Directory prefix (pathspec up to the last slash) limiting the scope""" - pattern: str | None - """Pattern to match paths against using ``fnmatch``""" - - @property - def is_nopathspecs(self) -> bool: - """Whether this pathspec is the "no pathspecs" pathspec, AKA ``':'``""" - return not self.spectypes and not self.dirprefix and not self.pattern - - def __str__(self) -> str: - """Generate normalized (long-form) pathspec""" - if self.is_nopathspecs: - return ':' - ps = '' - if self.spectypes: - ps += ':(' - ps += ','.join(self.spectypes) - ps += ')' - ps += self._get_joined_pattern() - return ps - - def _get_joined_pattern(self): - return f'{self.dirprefix if self.dirprefix else ""}' \ - f'{"/" if self.dirprefix else ""}' \ - f'{self.pattern if self.pattern else ""}' - - def for_subdir(self, subdir: str) -> list[GitPathSpec]: - """Translate a pathspec into the scope of a subdirectory. - - The processing implemented here is purely lexical. This means that it - works without matching against actual file system (or Git tree) - content. Consequently, to some degree, overly broad results are - produced, but at the same time use cases are supported where there - is nothing (yet) to match against (e.g., a not-yet-cloned submodule). - - A pathspec with a ``top`` magic is produced unmodified, as there are - defined relative to the root of a repository, not relative to a base - directory. As a consequence, such pathspecs will automatically - refer to a submodule root when the target directory is contained in - one. - - Parameters - ---------- - subdir: str - Relative path in POSIX notation - - Returns - ------- - list - When an empty list is returned, this indicates that the pathsspec - cannot be translated to the given ``subdir``, because it does - not match the ``subdir`` itself. If a pathspec translates to - "no pathspecs" (``':'``), a list with a dedicated ':' pathspec is - returned. - """ - # special case of a non-translation (pretty much only here to - # make some test implementations simpler - if not subdir: - return [self] - - return list(yield_subdir_match_remainder_pathspecs(subdir, self)) - - @classmethod - def from_pathspec_str( - cls, - pathspec: str, - ) -> GitPathSpec: - """Parse a string-form pathspec into types, prefix, and pattern""" - spectypes = [] - dirprefix = None - pattern = None - - if pathspec == ':': - # shortcut for the special no-path-spec pathspec - return GitPathSpec(tuple(), '', None) - elif pathspec.startswith(':('): - # long-form magic - magic, pattern = pathspec[2:].split(')', maxsplit=1) - spectypes = magic.split(',') - elif pathspec.startswith(':'): - # short-form magic - magic_signatures = { - '/': 'top', - '!': 'exclude', - '^': 'exclude', - ':': None, - } - pattern = pathspec[1:] - spectypes = [] - for i in range(1, len(pathspec)): - sig = magic_signatures.get(pathspec[i]) - if sig is None: - pattern = pathspec[i:] - break - spectypes.append(sig) - else: - pattern = pathspec - - # raise when glob and literal magic markers are present - # simultaneously - if 'glob' in spectypes and 'literal' in spectypes: - raise ValueError( - "'glob' magic is incompatible with 'literal' magic") - - # split off dirprefix - dirprefix, pattern = GitPathSpec._split_prefix_pattern(pattern) - - return cls( - spectypes=tuple(spectypes), - dirprefix=dirprefix, - pattern=pattern, - ) - - @staticmethod - def _split_prefix_pattern(pathspec): - # > the pathspec up to the last slash represents a directory prefix. - # > The scope of that pathspec is limited to that subtree. - try: - last_slash_idx = pathspec[::-1].index('/') - except ValueError: - # everything is the pattern - dirprefix = None - pattern = pathspec - else: - dirprefix = pathspec[:-last_slash_idx - 1] - pattern = pathspec[-last_slash_idx:] \ - if last_slash_idx > 0 else None - return dirprefix, pattern - - -def yield_subdir_match_remainder_pathspecs( - subdir: str, - pathspec: GitPathSpec, -) -> Generator[GitPathSpec, None, None]: - """Translate a pathspec into a set of possible subdirectory pathspecs - - The processing implemented here is purely lexical. This means that it - works without matching against actual file system (or Git tree) content. - This means that it yields, to some degree, overly broad results, but also - that it works in cases where there is nothing (yet) to match against. - For example, a not-yet-cloned submodule. - - This function does not perform any validatity checking of pathspecs. Only - valid pathspecs and well-formed paths are supported. - - A pathspec with the ``top`` magic is returned immediately and as-is. These - pathspecs have an absolute reference and do not require a translation into - a subdirectory namespace. - - Parameters - ---------- - subdir: str - POSIX-notation relative path of a subdirectory. The reference directory - match be the same as that of the pathspec to be translated. - pathspec: GitPathSpec - To-be-translated pathspec - - Yields - ------ - GitPathSpec - Any number of pathspecs that an input pathspec decomposed into upon - translation into the namespace of a subdirectory. - """ - if 'top' in pathspec.spectypes or pathspec.is_nopathspecs: - # pathspec with an absolute reference, or "no pathspecs" - # no translation needed - yield pathspec - return - - # add a trailing directory separator to prevent undesired - # matches of partial directory names - subdir = subdir \ - if subdir.endswith('/') \ - else f'{subdir}/' - tp = pathspec._get_joined_pattern() - - if 'icase' in pathspec.spectypes: - subdir = subdir.casefold() - tp = tp.casefold() - - # literal pathspecs - if 'literal' in pathspec.spectypes: - # append a trailing slash to allow for full matches - tp_endslash = f'{tp}/' - if not tp_endslash.startswith(subdir): - # no match - # BUT - # we might have a multi-level subdir, and we might match an - # intermediate subdir and could still yield a 'no pathspec' - # result - while subdir := posixpath.split(subdir)[0]: - if tp_endslash.startswith(subdir): - yield GitPathSpec.from_pathspec_str(':') - return - return - - remainder = tp[len(subdir):] - if not remainder: - # full match - yield GitPathSpec.from_pathspec_str(':') - else: - yield GitPathSpec( - pathspec.spectypes, - *GitPathSpec._split_prefix_pattern(remainder) - ) - return - - # tokenize the testpattern using the wildcard that also matches - # directories - token_delim = '**' if 'glob' in pathspec.spectypes else '*' - tp_chunks = tp.split(token_delim) - prefix_match = '' - yielded = set() - for i, chunk in enumerate(tp_chunks): - last_chunk = i + 1 == len(tp_chunks) - if last_chunk: - trymatch = \ - f'{prefix_match}{chunk}{"" if chunk.endswith("/") else "/"}' - else: - trymatch = f'{prefix_match}{chunk}*' - if not fnmatch(subdir, f'{trymatch}'): - # each chunk needs match in order, first non-match ends the - # algorithm - # BUT - # we have an (initial) chunk that points already - # inside the target subdir - submatch = trymatch - while submatch := posixpath.split(submatch)[0]: - if fnmatch(f'{subdir}', f'{submatch}/'): - ps = GitPathSpec( - pathspec.spectypes, - *GitPathSpec._split_prefix_pattern( - # +1 for trailing slash - tp[len(submatch) + 1:]) - ) - if ps not in yielded: - yield ps - return - # OR - # we might have a multi-level subdir, and we might match an - # intermediate subdir and could still yield a 'no pathspec' - # result - while subdir := posixpath.split(subdir)[0]: - if fnmatch(f'{subdir}/', trymatch): - yield GitPathSpec.from_pathspec_str(':') - return - return - - remainder = tp_chunks[i + 1:] - if all(not c for c in remainder): - # direct hit, no pathspecs after translation - yield GitPathSpec.from_pathspec_str(':') - return - else: - ps = GitPathSpec( - pathspec.spectypes, - *GitPathSpec._split_prefix_pattern( - f'{token_delim}{token_delim.join(remainder)}', - ) - ) - yield ps - yielded.add(ps) - # extend prefix for the next round - prefix_match = trymatch diff --git a/datalad_next/gitpathspec/pathspecs.py b/datalad_next/gitpathspec/pathspecs.py deleted file mode 100644 index 5f626288..00000000 --- a/datalad_next/gitpathspec/pathspecs.py +++ /dev/null @@ -1,140 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterable -from itertools import chain -from pathlib import ( - PurePosixPath, -) - -from .pathspec import GitPathSpec - - -class GitPathSpecs: - """Convenience container for any number of pathspecs (or none) - - This class can facilitate implementing support for pathspec-constraints, - including scenarios involving submodule recursion. - - >>> # can except a "default" argument for no pathspecs - >>> ps = GitPathSpecs(None) - >>> not ps - True - >>> ps.arglist() - [] - >>> # deal with any number of pathspecs - >>> ps = GitPathSpecs(['*.jpg', 'dir/*.png']) - >>> ps.any_match_subdir(PurePosixPath('dummy')) - True - >>> ps.for_subdir(PurePosixPath('dir')) - GitPathSpecs(['*.jpg', '*.png']) - """ - def __init__( - self, - pathspecs: Iterable[str | GitPathSpec] | GitPathSpecs | None, - ): - """Pathspecs can be given as an iterable (string-form and/or - ``GitPathSpec``), another ``GitPathSpecs`` instance, or ``None``. - ``None``, or empty iterable indicate a 'no constraint' scenario, - equivalent to a single ``':'`` pathspec. - """ - self._pathspecs: list[GitPathSpec] | None = None - if pathspecs is None: - return - elif isinstance(pathspecs, GitPathSpecs): - self._pathspecs = list(pathspecs._pathspecs) \ - if pathspecs._pathspecs else None - else: - self._pathspecs = _normalize_gitpathspec(pathspecs) - - def __repr__(self) -> str: - return f"{self.__class__.__name__}([{', '.join(repr(p) for p in self.arglist())}])" - - def __len__(self) -> int: - return len(self._pathspecs) if self._pathspecs is not None else 0 - - def __eq__(self, obj): - return self._pathspecs == obj._pathspecs - - # TODO lru_cache decorator? - # this would prevent repeated conversion cost for the usage pattern of - # - test if we would have a match for a subdir - # - run code with the matching pathspecs - # without having to implement caching logic in client code - def for_subdir( - self, - path: PurePosixPath, - ) -> GitPathSpecs: - """Translate pathspecs into the scope of a subdirectory - - Raises - ------ - ValueError - Whenever no pathspec can be translated into the scope of the target - directory. - """ - if not self._pathspecs: - return GitPathSpecs(None) - translated = list(chain.from_iterable( - ps.for_subdir(str(path)) - for ps in self._pathspecs - )) - if not translated: - # not a single pathspec could be translated into the subdirectory - # scope. This means none was applicable, and not that the whole - # subdirectory is matched. We raise in order to allow client code - # to distinguish a no-match from an all-match scenario. Returning - # the equivalent of an empty list would code "no constraint", - # rather than "no match" - raise ValueError(f"No pathspecs translate to {path=}") - return GitPathSpecs(translated) - - def any_match_subdir( - self, - path: PurePosixPath, - ) -> bool: - """Returns whether any pathspec could match subdirectory content - - In other words, ``False`` is returned whenever ``.for_subdir()`` - would raise ``ValueError``. - - Parameters - ---------- - path: PurePosixPath - Relative path of the subdirectory to run the test for. - """ - if self._pathspecs is None: - return False - path_s = str(path) - for ps in self._pathspecs: - if ps.for_subdir(path_s): - # any match is sufficient for a decision - return True - # nothing matches - return False - - def arglist(self) -> list[str]: - """Convert pathspecs to a CLI argument list - - This list is suitable for use with any Git command that supports - pathspecs, after a ``--`` (that disables the interpretation of further - arguments as options). - - When no pathspecs are present an empty list is returned. - """ - if self._pathspecs is None: - return [] - return list(str(ps) for ps in self._pathspecs) - - -def _normalize_gitpathspec( - specs: Iterable[str | GitPathSpec] | None, -) -> list[GitPathSpec] | None: - """Normalize path specs to a plain list of GitPathSpec instances""" - if not specs: - return None - else: - return [ - ps if isinstance(ps, GitPathSpec) - else GitPathSpec.from_pathspec_str(ps) - for ps in specs - ] diff --git a/datalad_next/gitpathspec/tests/__init__.py b/datalad_next/gitpathspec/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/datalad_next/gitpathspec/tests/test_gitpathspec.py b/datalad_next/gitpathspec/tests/test_gitpathspec.py deleted file mode 100644 index efddc430..00000000 --- a/datalad_next/gitpathspec/tests/test_gitpathspec.py +++ /dev/null @@ -1,384 +0,0 @@ -from pathlib import Path -import pytest -import subprocess -import sys - -from .. import ( - GitPathSpec, - GitPathSpecs, -) -from ..pathspec import yield_subdir_match_remainder_pathspecs - - -def _list_files(path, pathspecs): - return [ - i for i in subprocess.run( - ['git', 'ls-files', '-z', '--other', '--', *pathspecs], - capture_output=True, - cwd=path, - ).stdout.decode('utf-8').split('\0') - if i - ] - - -@pytest.fixture(scope="function") -def pathspec_match_testground(tmp_path_factory): - """Create a Git repo with no commit and many untracked files - - In this playground, `git ls-files --other` can be used to testrun - pathspecs. - - See the top item in `testcases` for a summary of the content - """ - p = tmp_path_factory.mktemp('pathspec_match') - probe = p / 'pr?be' - # check for case insensitive file systems - crippled_fs = Path(str(p).upper()).exists() - try: - probe.touch() - probe.unlink() - except OSError: - crippled_fs = True - - subprocess.run(['git', 'init'], cwd=p, check=True) - p_sub = p / 'sub' - p_sub.mkdir() - for d in (p, p_sub): - p_a = d / 'aba' - p_b = d / 'a?a' - for sp in (p_a,) if crippled_fs else (p_a, p_b): - sp.mkdir() - for fname in ('a.txt', 'A.txt', 'a.JPG'): - (sp / fname).touch() - # add something that is unique to sub/ - (p_sub / 'b.dat').touch() - - testcases = [ - # valid - dict( - ps=':', - fordir={ - None: {'specs': [':'], - 'match': [ - 'aba/a.JPG', 'aba/a.txt', - 'sub/aba/a.JPG', 'sub/aba/a.txt', - 'sub/b.dat'] if crippled_fs else [ - 'a?a/A.txt', 'a?a/a.JPG', 'a?a/a.txt', - 'aba/A.txt', 'aba/a.JPG', 'aba/a.txt', - 'sub/a?a/A.txt', 'sub/a?a/a.JPG', 'sub/a?a/a.txt', - 'sub/aba/A.txt', 'sub/aba/a.JPG', 'sub/aba/a.txt', - 'sub/b.dat'], - }, - 'sub': {'specs': [':'], - 'match': [ - 'aba/a.JPG', 'aba/a.txt', - 'b.dat'] if crippled_fs else [ - 'a?a/A.txt', 'a?a/a.JPG', 'a?a/a.txt', - 'aba/A.txt', 'aba/a.JPG', 'aba/a.txt', - 'b.dat'], - }, - }, - ), - dict( - ps='aba', - fordir={ - None: {'match': [ - 'aba/a.JPG', 'aba/a.txt', - ] if crippled_fs else [ - 'aba/A.txt', 'aba/a.JPG', 'aba/a.txt'], - }, - 'aba': {'specs': [':'], - 'match': [ - 'a.JPG', 'a.txt'] if crippled_fs else [ - 'A.txt', 'a.JPG', 'a.txt'], - }, - }, - ), - # same as above, but with a trailing slash - dict( - ps='aba/', - fordir={ - None: {'match': [ - 'aba/a.JPG', 'aba/a.txt', - ] if crippled_fs else [ - 'aba/A.txt', 'aba/a.JPG', 'aba/a.txt'], - }, - 'aba': {'specs': [':'], - 'match': [ - 'a.JPG', 'a.txt'] if crippled_fs else [ - 'A.txt', 'a.JPG', 'a.txt'], - }, - }, - ), - # TODO same as above, but as a literal - - dict( - ps=':(glob)aba/*.txt', - fordir={ - None: {'match': [ - 'aba/a.txt', - ] if crippled_fs else ['aba/A.txt', 'aba/a.txt']}, - 'sub': {'specs': []}, - }, - ), - dict( - ps=':/aba/*.txt', - norm=':(top)aba/*.txt', - fordir={ - None: {'match': [ - 'aba/a.txt', - ] if crippled_fs else ['aba/A.txt', 'aba/a.txt']}, - # for a subdir a keeps matching the exact same items - # not only be name, but by location - 'sub': {'specs': [':(top)aba/*.txt'], - 'match': ['../aba/a.txt'] if crippled_fs else [ - '../aba/A.txt', '../aba/a.txt']}, - }, - ), - dict( - ps='aba/*.txt', - fordir={ - None: {'match': ['aba/a.txt'] if crippled_fs else [ - 'aba/A.txt', 'aba/a.txt'], - }, - # not applicable - 'sub': {'specs': []}, - # but this is - 'aba': {'specs': ['*.txt']}, - }, - ), - dict( - ps='sub/aba/*.txt', - fordir={ - None: {'match': ['sub/aba/a.txt'] if crippled_fs else [ - 'sub/aba/A.txt', 'sub/aba/a.txt']}, - 'sub': {'specs': ['aba/*.txt'], - 'match': ['aba/a.txt'] if crippled_fs else [ - 'aba/A.txt', 'aba/a.txt']}, - }, - ), - dict( - ps='*.JPG', - fordir={ - None: {'match': [ - 'aba/a.JPG', 'sub/aba/a.JPG'] if crippled_fs else [ - 'a?a/a.JPG', 'aba/a.JPG', 'sub/a?a/a.JPG', - 'sub/aba/a.JPG']}, - # unchanged - 'sub': {'specs': ['*.JPG']}, - }, - ), - dict( - ps='*ba*.JPG', - fordir={ - None: {'match': ['aba/a.JPG', 'sub/aba/a.JPG']}, - 'aba': {'specs': ['*ba*.JPG', '*.JPG'], - 'match': ['a.JPG']}, - }, - ), - # invalid - # - # conceptual conflict and thereby unsupported by Git - # makes sense and is easy to catch that - dict(ps=':(glob,literal)broken', raises=ValueError), - ] - if not crippled_fs: - testcases.extend([ - # literal magic is only needed for non-crippled FS - dict( - ps=':(literal)a?a/a.JPG', - fordir={ - None: dict( - match=['a?a/a.JPG'], - ), - "a?a": dict( - specs=[':(literal)a.JPG'], - match=['a.JPG'], - ), - }, - ), - dict( - ps=':(literal,icase)SuB/A?A/a.jpg', - fordir={ - None: {'match': ['sub/a?a/a.JPG']}, - "sub/a?a": { - 'specs': [':(literal,icase)a.jpg'], - # given the spec transformation matches - # MIH would really expect to following, - # but it is not coming from Git :( - #'match': ['a.JPG'], - 'match': [], - }, - }, - ), - dict( - ps=':(icase)A?A/a.jpg', - fordir={ - None: {'match': ['a?a/a.JPG', 'aba/a.JPG']}, - "aba": { - 'specs': [':(icase)a.jpg'], - 'match': ['a.JPG'], - }, - }, - ), - dict( - ps=':(literal,icase)A?A/a.jpg', - fordir={ - None: {'match': ['a?a/a.JPG']}, - "a?a": { - 'specs': [':(literal,icase)a.jpg'], - 'match': ['a.JPG'], - }, - # the target subdir does not match the pathspec - "aba": {'specs': set()}, - }, - ), - ]) - - yield p, testcases - - -def test_pathspecs(pathspec_match_testground): - tg, testcases = pathspec_match_testground - - for testcase in testcases: - if testcase.get('raises'): - # test case states how `GitPathSpec` will blow up - # on this case. Verify and skip any further testing - # on this case - with pytest.raises(testcase['raises']): - GitPathSpec.from_pathspec_str(testcase['ps']) - continue - # create the instance - ps = GitPathSpec.from_pathspec_str(testcase['ps']) - # if no deviating normalized representation is given - # it must match the original one - assert str(ps) == testcase.get('norm', testcase['ps']) - # test translations onto subdirs now - # `None` is a special subdir that means "self", i.e. - # not translation other than normalization, we can use it - # to test matching behavior of the full pathspec - for subdir, target in testcase.get('fordir', {}).items(): - # translate -- a single input pathspec can turn into - # multiple translated ones. This is due to - subdir_specs = [str(s) for s in ps.for_subdir(subdir)] - if 'specs' in target: - assert set(subdir_specs) == set(target['specs']), \ - f'Mismatch for {testcase["ps"]!r} -> subdir {subdir!r} {target}' - if subdir and not target.get('specs') and 'match' in target: - raise ValueError( - 'invalid test specification: no subdir specs expected, ' - f'but match declared: {testcase!r}') - if subdir_specs and 'match' in target: - tg_subdir = tg / subdir if subdir else tg - assert _list_files(tg_subdir, subdir_specs) == target['match'] - - -def test_yield_subdir_match_remainder_pathspecs(): - testcases = [ - # FORMAT: target path, pathspec, subdir pathspecs - ('abc', ':', [':']), - # top-magic is returned as-is - ('murks', ':(top)crazy*^#', [':(top)crazy*^#']), - # no match - ('abc', 'not', []), - ('abc', 'ABC', [':'] if sys.platform.startswith('win') else []), - # direct hits, resolve to "no pathspecs" - ('abc', 'a?c', [':']), - ('abc', 'abc', [':']), - ('abc', 'abc/', [':']), - # icase-magic - ('abc', ':(icase)ABC', [':']), - ('ABC', ':(icase)abc', [':']), - # some fairly common fnmatch-style pathspec - ('abc', 'abc/*.jpg', ['*.jpg']), - ('abc', '*.jpg', ['*.jpg']), - ('abc', '*/*.jpg', ['*/*.jpg', '*.jpg']), - ('abc', '*/*.jpg', ['*/*.jpg', '*.jpg']), - ('abc', '*bc*.jpg', ['*bc*.jpg', '*.jpg']), - # adding an glob-unrelated magic does not impact the result - ('abc', ':(exclude)*/*.jpg', [':(exclude)*/*.jpg', ':(exclude)*.jpg']), - ('abc', ':(attr:export-subst)*/*.jpg', - [':(attr:export-subst)*/*.jpg', ':(attr:export-subst)*.jpg']), - ('abc', ':(icase,exclude)*/*.jpg', - [':(icase,exclude)*/*.jpg', ':(icase,exclude)*.jpg']), - # glob-magic - ('abc', ':(glob)*bc*.jpg', []), - ('abc', ':(glob)*bc**.jpg', [':(glob)**.jpg']), - # 2nd-level subdir - ('abc/123', 'some.jpg', []), - ('abc/123', '*.jpg', ['*.jpg']), - ('abc/123', 'abc/*', [':']), - ('abc/123', 'abc', [':']), - ('abc/123', ':(glob)abc', [':']), - ('abc/123', '*123', ['*123', ':']), - ('abc/123', '*/123', ['*/123', ':']), - ('abc/123', ':(glob)*/123', [':']), - # literal-magic - ('abc', ':(literal)a?c', []), - ('a?c', ':(literal)a?c', [':']), - ('a?c', ':(literal)a?c/*?ab*', [':(literal)*?ab*']), - ('a?c/123', ':(literal)a?c', [':']), - # more complex cases - ('abc/123/ABC', 'a*/1?3/*.jpg', - ['*/1?3/*.jpg', '*.jpg', '1?3/*.jpg']), - # exclude-magic - ('abc', ':(exclude)abc', [':']), - ('abc/123', ':(exclude)abc', [':']), - ('a?c', ':(exclude,literal)a?c', [':']), - # stuff that was problematic at some point - # initial, non-wildcard part already points inside the - # target directory - ('sub', 'sub/aba/*.txt', ['aba/*.txt']), - # no directory-greedy wildcard whatsoever - ('abc', ':(icase)A?C/a.jpg', [':(icase)a.jpg']), - # no directory-greedy wildcard in later chunk - ('nope/abc', 'no*/a?c/a.jpg', ['*/a?c/a.jpg', 'a.jpg']), - ] - for ts in testcases: - # always test against the given subdir, and also against the subdir - # given with a trailing slash - for target_path in (ts[0], f'{ts[0]}/'): - tsps = GitPathSpec.from_pathspec_str(ts[1]) - remainders = list( - yield_subdir_match_remainder_pathspecs( - target_path, - tsps, - ) - ) - assert [str(ps) for ps in remainders] == ts[2], \ - f'Mismatch for {ts}' - # arglist processing of the GitPathSpecs container comes to the - # same result - assert GitPathSpecs(remainders).arglist() == ts[2] - # now we produce the same result with the GitPathSpecs handler - try: - assert \ - GitPathSpecs([ts[1]]).for_subdir(target_path).arglist() \ - == [str(ps) for ps in remainders] - except ValueError: - # translation must raise when there would not be a remainder - assert not remainders - # if we are supposed to get any remainder out, the test for a - # subdir match also gives an analog result - if ts[2]: - assert GitPathSpecs([tsps]).any_match_subdir(target_path) - else: - assert not GitPathSpecs([tsps]).any_match_subdir(target_path) - - -def test_GitPathSpecs(): - ps = GitPathSpecs(['mike/*', '*.jpg']) - # we can create a GitPathSpecs object from another - assert GitPathSpecs(ps).arglist() == ps.arglist() - - # going over the properties - assert repr(ps) == "GitPathSpecs(['mike/*', '*.jpg'])" - assert len(ps) == 2 - - # we can have "no pathspecs" - # TODO shouldn't this be ':'? - # TODO how about the semantic distinction between None and []? - nops = GitPathSpecs(None) - assert GitPathSpecs(None).for_subdir('doesntmatter') == nops - assert GitPathSpecs(None).any_match_subdir('doesntmatter') is False diff --git a/datalad_next/gitremotes/datalad_annex.py b/datalad_next/gitremotes/datalad_annex.py index 5c267c18..42ce084f 100755 --- a/datalad_next/gitremotes/datalad_annex.py +++ b/datalad_next/gitremotes/datalad_annex.py @@ -212,7 +212,7 @@ LegacyGitRepo as GitRepo, ) from datalad_next.exceptions import CapturedException -from datalad_next.runners import ( +from datalad_core.runners import ( CommandError, call_git, call_git_oneline, diff --git a/datalad_next/iter_collections/annexworktree.py b/datalad_next/iter_collections/annexworktree.py index 4e950bb9..e534156a 100644 --- a/datalad_next/iter_collections/annexworktree.py +++ b/datalad_next/iter_collections/annexworktree.py @@ -29,7 +29,7 @@ from datalad_next.consts import on_windows from datalad_next.repo_utils import has_initialized_annex -from datalad_next.runners import iter_git_subproc +from datalad_core.runners import iter_git_subproc from .gitworktree import ( GitWorktreeItem, diff --git a/datalad_next/iter_collections/gitdiff.py b/datalad_next/iter_collections/gitdiff.py index e27d9d52..f5be0069 100644 --- a/datalad_next/iter_collections/gitdiff.py +++ b/datalad_next/iter_collections/gitdiff.py @@ -24,14 +24,12 @@ ) from datalad_next.consts import PRE_INIT_COMMIT_SHA -from datalad_next.gitpathspec import GitPathSpecs -from datalad_next.runners import ( +from datasalad.gitpathspec import GitPathSpecs +from datalad_core.runners import ( CommandError, - iter_git_subproc, -) -from datalad_next.runners import ( call_git, call_git_oneline, + iter_git_subproc, ) from .gittree import ( diff --git a/datalad_next/iter_collections/gitstatus.py b/datalad_next/iter_collections/gitstatus.py index 4abafb37..ea10733f 100644 --- a/datalad_next/iter_collections/gitstatus.py +++ b/datalad_next/iter_collections/gitstatus.py @@ -12,7 +12,7 @@ ) from datalad_next.consts import PRE_INIT_COMMIT_SHA -from datalad_next.runners import ( +from datalad_core.runners import ( call_git_lines, ) from datalad_next.repo_utils import ( diff --git a/datalad_next/iter_collections/gittree.py b/datalad_next/iter_collections/gittree.py index 83a85582..85eb98eb 100644 --- a/datalad_next/iter_collections/gittree.py +++ b/datalad_next/iter_collections/gittree.py @@ -21,7 +21,7 @@ itemize, ) -from datalad_next.runners import iter_git_subproc +from datalad_core.runners import iter_git_subproc from .utils import PathBasedItem diff --git a/datalad_next/iter_collections/gitworktree.py b/datalad_next/iter_collections/gitworktree.py index 9de92adc..893b7681 100644 --- a/datalad_next/iter_collections/gitworktree.py +++ b/datalad_next/iter_collections/gitworktree.py @@ -23,8 +23,8 @@ itemize, ) -from datalad_next.runners import iter_git_subproc -from datalad_next.gitpathspec import GitPathSpecs +from datalad_core.runners import iter_git_subproc +from datasalad.gitpathspec import GitPathSpecs from .utils import ( FileSystemItem, FileSystemItemType, diff --git a/datalad_next/iter_collections/tests/test_itergitstatus.py b/datalad_next/iter_collections/tests/test_itergitstatus.py index fca05642..fc165c55 100644 --- a/datalad_next/iter_collections/tests/test_itergitstatus.py +++ b/datalad_next/iter_collections/tests/test_itergitstatus.py @@ -2,7 +2,7 @@ import pytest from datalad_next.datasets import Dataset -from datalad_next.runners import ( +from datalad_core.runners import ( call_git_success, ) diff --git a/datalad_next/iterable_subprocess/__init__.py b/datalad_next/iterable_subprocess/__init__.py index 48ea6080..d68881a4 100644 --- a/datalad_next/iterable_subprocess/__init__.py +++ b/datalad_next/iterable_subprocess/__init__.py @@ -1,9 +1,9 @@ """Context manager to communicate with a subprocess using iterables .. deprecated:: 1.6 - - This code has been moved to the datasalad library. - Use it from ``datasalad.iterable_subprocess`` instead. + This module is deprecated. It has been migrated to the `datasalad library + `__. Imports should be adjusted to + ``datasalad.iterable_subprocess``. This offers a higher level interface to subprocesses than Python's built-in subprocess module, and is particularly helpful when data won't fit in memory @@ -22,4 +22,13 @@ __all__ = ['iterable_subprocess'] +import warnings + from datasalad.iterable_subprocess import iterable_subprocess + +warnings.warn( + '`datalad_next.iterable_subprocess` has been migrated to the datasalad ' + 'library, adjust imports to `datasalad.iterable_subprocess`', + DeprecationWarning, + stacklevel=1, +) diff --git a/datalad_next/itertools/__init__.py b/datalad_next/itertools/__init__.py index f2ddf002..cef78546 100644 --- a/datalad_next/itertools/__init__.py +++ b/datalad_next/itertools/__init__.py @@ -1,9 +1,9 @@ """Various iterators, e.g., for subprocess pipelining and output processing .. deprecated:: 1.6 - - This code has been moved to the datasalad library. - Use it from ``datasalad.itertools`` instead. + This module is deprecated. It has been migrated to the `datasalad library + `__. Imports should be adjusted to + ``datasalad.itertools``. """ __all__ = [ @@ -17,7 +17,10 @@ 'route_out', ] +import warnings + from datasalad.itertools import ( + StoreOnly, align_pattern, decode_bytes, itemize, @@ -25,5 +28,11 @@ load_json_with_flag, route_in, route_out, - StoreOnly, +) + +warnings.warn( + '`datalad_next.itertools` has been migrated to the datasalad library, ' + 'adjust imports to `datasalad.itertools`', + DeprecationWarning, + stacklevel=1, ) diff --git a/datalad_next/patches/replace_sshremoteio.py b/datalad_next/patches/replace_sshremoteio.py index 6e47b388..1a73ca83 100644 --- a/datalad_next/patches/replace_sshremoteio.py +++ b/datalad_next/patches/replace_sshremoteio.py @@ -50,7 +50,7 @@ from datalad_next.exceptions import CapturedException from datalad_next.patches import apply_patch -from datalad_next.runners import CommandError +from datalad_core.runners import CommandError from datalad_next.shell import ( FixedLengthResponseGeneratorPosix, shell, diff --git a/datalad_next/repo_utils/annex.py b/datalad_next/repo_utils/annex.py index f08f969a..07616d11 100644 --- a/datalad_next/repo_utils/annex.py +++ b/datalad_next/repo_utils/annex.py @@ -1,6 +1,6 @@ from pathlib import Path -from datalad_next.runners import call_git_success +from datalad_core.runners import call_git_success def has_initialized_annex( diff --git a/datalad_next/repo_utils/tests/test_head.py b/datalad_next/repo_utils/tests/test_head.py index 5530f447..9163ce1c 100644 --- a/datalad_next/repo_utils/tests/test_head.py +++ b/datalad_next/repo_utils/tests/test_head.py @@ -1,6 +1,6 @@ import pytest -from datalad_next.runners import call_git +from datalad_core.runners import call_git from .. import get_worktree_head diff --git a/datalad_next/repo_utils/worktree.py b/datalad_next/repo_utils/worktree.py index e5fb7426..2222fdf3 100644 --- a/datalad_next/repo_utils/worktree.py +++ b/datalad_next/repo_utils/worktree.py @@ -3,7 +3,7 @@ from pathlib import Path from datalad_next.exceptions import CapturedException -from datalad_next.runners import ( +from datalad_core.runners import ( CommandError, call_git_lines, ) diff --git a/datalad_next/runners/__init__.py b/datalad_next/runners/__init__.py index 6e6ab831..c2658b29 100644 --- a/datalad_next/runners/__init__.py +++ b/datalad_next/runners/__init__.py @@ -1,5 +1,10 @@ """Execution of subprocesses +.. deprecated:: 1.6 + This module is deprecated. It has been partially migrated to the + `datalad-core library `__. Imports + should be adjusted to ``datalad_core.runners``. + This module provides all relevant components for subprocess execution. The main work horse is :func:`~datalad_next.runners.iter_subproc`, a context manager that enables interaction with a subprocess in the form of an iterable @@ -63,59 +68,95 @@ StdOutErrCapture """ -from .iter_subproc import ( - iter_subproc, -) -from .git import ( - call_git, - call_git_lines, - call_git_oneline, - call_git_success, - iter_git_subproc, +__all__ = [ + 'call_git', + 'call_git_lines', + 'call_git_oneline', + 'call_git_success', + 'iter_git_subproc', + 'iter_subproc', + 'CommandError', + 'GitRunner', + 'KillOutput', + 'NoCapture', + 'Protocol', + 'Runner', + 'StdErrCapture', + 'StdOutCapture', + 'StdOutErrCapture', + 'STDERR_FILENO', + 'STDOUT_FILENO', + 'ThreadedRunner', + 'LineSplitter', + 'GeneratorMixIn', + 'NoCaptureGeneratorProtocol', + 'StdOutCaptureGeneratorProtocol', + 'DEVNULL', +] + +import warnings + +# TODO: REMOVE FOR V2.0 +from subprocess import ( + DEVNULL, ) # runners -# TODO REMOVE FOR V2.0 -from datalad.runner import ( - GitRunner, - Runner, -) -# TODO REMOVE FOR V2.0 -from datalad.runner.nonasyncrunner import ThreadedRunner +# TODO: REMOVE FOR V2.0 # protocols -# TODO REMOVE FOR V2.0 +# TODO: REMOVE FOR V2.0 from datalad.runner import ( + GitRunner, KillOutput, NoCapture, Protocol, - StdOutCapture, + Runner, StdErrCapture, + StdOutCapture, StdOutErrCapture, ) -# TODO REMOVE FOR V2.0 -from datalad.runner.protocol import GeneratorMixIn -# TODO REMOVE FOR V2.0 -from .protocols import ( - NoCaptureGeneratorProtocol, - StdOutCaptureGeneratorProtocol, -) -# exceptions -# The following import supports legacy code that uses `CommandError` from this -# module. If you are writing new code, please use `CommandError` from -# `datalad.support.exceptions`. We intend to remove this import in the future. -from datalad_next.exceptions import CommandError +# TODO: REMOVE FOR V2.0 # utilities -# TODO REMOVE FOR V2.0 +# TODO: REMOVE FOR V2.0 from datalad.runner.nonasyncrunner import ( - STDOUT_FILENO, STDERR_FILENO, + STDOUT_FILENO, + ThreadedRunner, ) -# TODO REMOVE FOR V2.0 + +# TODO: REMOVE FOR V2.0 +from datalad.runner.protocol import GeneratorMixIn + +# TODO: REMOVE FOR V2.0 from datalad.runner.utils import ( LineSplitter, ) -# TODO REMOVE FOR V2.0 -from subprocess import ( - DEVNULL, +from datalad_core.runners import ( + call_git, + call_git_lines, + call_git_oneline, + call_git_success, + iter_git_subproc, + iter_subproc, +) + +# exceptions +# The following import supports legacy code that uses `CommandError` from this +# module. If you are writing new code, please use `CommandError` from +# `datalad_core.runners`. We intend to remove this import in the future. +from datalad_next.exceptions import CommandError + +# TODO: REMOVE FOR V2.0 +from .protocols import ( + NoCaptureGeneratorProtocol, + StdOutCaptureGeneratorProtocol, +) + +warnings.warn( + '`datalad_next.runners` has been partially migrated to the ' + 'datalad-core library, ' + 'check docs, and adjust imports to `datalad_core.runners`', + DeprecationWarning, + stacklevel=1, ) diff --git a/datalad_next/runners/git.py b/datalad_next/runners/git.py deleted file mode 100644 index 9dcdf2a7..00000000 --- a/datalad_next/runners/git.py +++ /dev/null @@ -1,212 +0,0 @@ -from __future__ import annotations - -import os -from pathlib import Path -import subprocess - -from datalad_next.exceptions import CapturedException - -from .iter_subproc import ( - CommandError, - iter_subproc, -) - - -def _call_git( - args: list[str], - *, - capture_output: bool = False, - cwd: Path | None = None, - check: bool = False, - text: bool | None = None, - input: str | bytes | None = None, - force_c_locale: bool = False, -) -> subprocess.CompletedProcess: - """Wrapper around ``subprocess.run`` for calling Git command - - ``args`` is a list of argument for the Git command. This list must not - contain the Git executable itself. It will be prepended (unconditionally) - to the arguments before passing them on. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - - All other argument are pass on to ``subprocess.run()`` verbatim. - """ - env = None - if force_c_locale: - env = dict(os.environ, LC_ALL='C') - - # make configurable - git_executable = 'git' - cmd = [git_executable, *args] - try: - return subprocess.run( - cmd, - capture_output=capture_output, - cwd=cwd, - check=check, - text=text, - input=input, - env=env, - ) - except subprocess.CalledProcessError as e: - # TODO we could support post-error forensics, but some client - # might call this knowing that it could fail, and may not - # appreciate the slow-down. Add option `expect_fail=False`? - # - # normalize exception to datalad-wide standard - raise CommandError( - cmd=cmd, - code=e.returncode, - stdout=e.stdout, - stderr=e.stderr, - cwd=cwd, - ) from e - - -def call_git( - args: list[str], - *, - cwd: Path | None = None, - force_c_locale: bool = False, -) -> None: - """Call Git with no output capture, raises on non-zero exit. - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - """ - _call_git( - args, - capture_output=False, - cwd=cwd, - check=True, - force_c_locale=force_c_locale, - ) - - -def call_git_success( - args: list[str], - *, - cwd: Path | None = None, - capture_output: bool = False, -) -> bool: - """Call Git and report success or failure of the command - - ``args`` is a list of arguments for the Git command. This list must not - contain the Git executable itself. It will be prepended (unconditionally) - to the arguments before passing them on. - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``capture_output`` is ``True``, process output is captured, but not - returned. By default process output is not captured. - """ - try: - _call_git( - args, - capture_output=capture_output, - cwd=cwd, - check=True, - ) - except CommandError as e: - CapturedException(e) - return False - return True - - -def call_git_lines( - args: list[str], - *, - cwd: Path | None = None, - input: str | None = None, - force_c_locale: bool = False, -) -> list[str]: - """Call Git for any (small) number of lines of output - - ``args`` is a list of arguments for the Git command. This list must not - contain the Git executable itself. It will be prepended (unconditionally) - to the arguments before passing them on. - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``input`` is not None, the argument becomes the subprocess’s stdin. - This is intended for small-scale inputs. For call that require processing - large inputs, ``iter_git_subproc()`` is to be preferred. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - - Raises - ------ - CommandError if the call exits with a non-zero status. - """ - res = _call_git( - args, - capture_output=True, - cwd=cwd, - check=True, - text=True, - input=input, - force_c_locale=force_c_locale, - ) - return res.stdout.splitlines() - - -def call_git_oneline( - args: list[str], - *, - cwd: Path | None = None, - input: str | None = None, - force_c_locale: bool = False, -) -> str: - """Call Git for a single line of output - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``input`` is not None, the argument becomes the subprocess’s stdin. - This is intended for small-scale inputs. For call that require processing - large inputs, ``iter_git_subproc()`` is to be preferred. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - - Raises - ------ - CommandError if the call exits with a non-zero status. - AssertionError if there is more than one line of output. - """ - lines = call_git_lines(args, cwd=cwd, input=input, - force_c_locale=force_c_locale) - if len(lines) > 1: - raise AssertionError( - f"Expected Git {args} to return a single line, but got {lines}" - ) - return lines[0] - - -def iter_git_subproc( - args: list[str], - **kwargs -): - """``iter_subproc()`` wrapper for calling Git commands - - All argument semantics are identical to those of ``iter_subproc()``, - except that ``args`` must not contain the Git binary, but need to be - exclusively arguments to it. The respective `git` command/binary is - automatically added internally. - """ - cmd = ['git'] - cmd.extend(args) - - return iter_subproc(cmd, **kwargs) diff --git a/datalad_next/runners/iter_subproc.py b/datalad_next/runners/iter_subproc.py deleted file mode 100644 index 24154773..00000000 --- a/datalad_next/runners/iter_subproc.py +++ /dev/null @@ -1,122 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import ( - Iterable, - List, -) -from datasalad.runners import CommandError as SaladCommandError -from datasalad.iterable_subprocess import iterable_subprocess - -from datalad_next.exceptions import CommandError -from datalad_next.consts import COPY_BUFSIZE - -__all__ = ['iter_subproc'] - - -def iter_subproc( - args: List[str], - *, - input: Iterable[bytes] | None = None, - chunk_size: int = COPY_BUFSIZE, - cwd: Path | None = None, - bufsize: int = -1, -): - """Context manager to communicate with a subprocess using iterables - - .. deprecated:: 1.6 - - Use ``datasalad.runners.iter_proc`` instead. Renamed ``input`` argument - to ``inputs``, and raises datalad's ``CommandError``. - - This offers a higher level interface to subprocesses than Python's - built-in ``subprocess`` module. It allows a subprocess to be naturally - placed in a chain of iterables as part of a data processing pipeline. - It is also helpful when data won't fit in memory and has to be streamed. - - This is a convenience wrapper around ``datalad_next.iterable_subprocess``, - which itself is a slightly modified (for use on Windows) fork of - https://github.com/uktrade/iterable-subprocess, written by - Michal Charemza. - - This function provides a context manager. - On entering the context, the subprocess is started, the thread to read - from standard error is started, the thread to populate subprocess - input is started. - When running, the standard input thread iterates over the input, - passing chunks to the process, while the standard error thread - fetches the error output, and while the main thread iterates over - the process's output from client code in the context. - - On context exit, the main thread closes the process's standard output, - waits for the standard input thread to exit, waits for the standard error - thread to exit, and wait for the process to exit. If the process exited - with a non-zero return code, a ``CommandError`` is raised, - containing the process's return code. - - If the context is exited due to an exception that was raised in the - context, the main thread terminates the process via ``Popen.terminate()``, - closes the process's standard output, waits for the standard input - thread to exit, waits for the standard error thread to exit, waits - for the process to exit, and re-raises the exception. - - Note, if an exception is raised in the context, this exception will bubble - up to the main thread. That means no ``CommandError`` will - be raised if the subprocess exited with a non-zero return code. - To access the return code in case of an exception inside the context, - use the ``code``-attribute of the ``as``-variable. - This object will always contain the return code of the subprocess. - For example, the following code will raise a ``StopIteration``-exception - in the context (by repeatedly using :func:`next`). The subprocess - will exit with ``2`` due to the illegal option ``-@``, and no - ``CommandError`` is raised. The return code is read from - the variable ``ls_stdout`` - - .. code-block:: python - - >>> from datalad_next.runners import iter_subproc - >>> try: - ... with iter_subproc(['ls', '-@']) as ls_stdout: - ... while True: - ... next(ls_stdout) - ... except Exception as e: - ... print(repr(e), ls_stdout.returncode) - StopIteration() 2 - - - Parameters - ---------- - args: list - Sequence of program arguments to be passed to ``subprocess.Popen``. - input: iterable, optional - If given, chunks of ``bytes`` to be written, iteratively, to the - subprocess's ``stdin``. - chunk_size: int, optional - Size of chunks to read from the subprocess's stdout/stderr in bytes. - cwd: Path - Working directory for the subprocess, passed to ``subprocess.Popen``. - bufsize: int, optional - Buffer size to use for the subprocess's ``stdin``, ``stdout``, and - ``stderr``. See ``subprocess.Popen`` for details. - - Returns - ------- - contextmanager - """ - try: - return iterable_subprocess( - args, - tuple() if input is None else input, - chunk_size=chunk_size, - cwd=cwd, - bufsize=bufsize, - ) - except SaladCommandError as e: - raise CommandError( - cmd=e.cmd, - msg=e.msg, - code=e.returncode, - stdout=e.stdout, - stderr=e.stderr, - cwd=e.cwd, - ) from e diff --git a/datalad_next/runners/tests/__init__.py b/datalad_next/runners/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/datalad_next/runners/tests/test_git.py b/datalad_next/runners/tests/test_git.py deleted file mode 100644 index 33bbed67..00000000 --- a/datalad_next/runners/tests/test_git.py +++ /dev/null @@ -1,47 +0,0 @@ -import pytest - -from ..git import ( - CommandError, - call_git, - call_git_lines, - call_git_oneline, - call_git_success, - iter_git_subproc, -) - - -def test_call_git(): - # smoke test - call_git(['--version']) - # raises properly - with pytest.raises(CommandError): - call_git(['notacommand']) - - -def test_call_git_success(): - assert call_git_success(['--version']) - assert not call_git_success(['notacommand']) - - -def test_call_git_lines(): - lines = call_git_lines(['--version']) - assert len(lines) == 1 - assert lines[0].startswith('git version') - # check that we can force Git into LC_ALL mode. - # this test is only meaningful on systems that - # run with some other locale - call_git_lines(['-h'])[0].casefold().startswith('usage') - - -def test_call_git_oneline(): - line = call_git_oneline(['--version']) - assert line.startswith('git version') - with pytest.raises(AssertionError): - # TODO may not yield multiple lines on all systems - call_git_oneline(['config', '-l']) - - -def test_iter_git_subproc(): - # just a smoke test that 'git' gets prepended - with iter_git_subproc(['--version']) as g: - assert list(g) diff --git a/datalad_next/tests/fixtures.py b/datalad_next/tests/fixtures.py index 8edc5f22..50bee141 100644 --- a/datalad_next/tests/fixtures.py +++ b/datalad_next/tests/fixtures.py @@ -11,7 +11,7 @@ from urllib.request import urlopen from datalad_next.datasets import Dataset -from datalad_next.runners import ( +from datalad_core.runners import ( call_git_lines, call_git_success, ) diff --git a/datalad_next/url_operations/ssh.py b/datalad_next/url_operations/ssh.py index 47410747..8a2d33c7 100644 --- a/datalad_next/url_operations/ssh.py +++ b/datalad_next/url_operations/ssh.py @@ -31,7 +31,7 @@ from datalad_next.consts import COPY_BUFSIZE from datalad_next.config import ConfigManager -from datalad_next.runners import CommandError +from datalad_core.runners import CommandError from datalad_next.shell import ( FixedLengthResponseGeneratorPosix, ShellCommandExecutor, diff --git a/docs/source/index.rst b/docs/source/index.rst index 435f2bd6..ad9f1944 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -72,11 +72,11 @@ as stable as possible. This includes signatures and return value behavior. As an example:: - from datalad_next.runners import iter_git_subproc + from datalad_next.repo_utils import get_worktree_head imports a part of the public API, but:: - from datalad_next.runners.git import iter_git_subproc + from datalad_next.repo_utils.worktree import get_worktree_head does not. diff --git a/pyproject.toml b/pyproject.toml index efd14367..5548c277 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,11 +36,16 @@ classifiers = [ dependencies = [ "annexremote", "datalad >= 0.18.4", - "datasalad >= 0.2.1", + "datalad_core @ git+https://github.com/datalad/datalad-core", + #"datasalad >= 0.2.1", + "datasalad @ git+https://github.com/datalad/datasalad", "humanize", "more-itertools", ] +[tool.hatch.metadata] +allow-direct-references = true + [project.urls] Homepage = "https://github.com/datalad/datalad-next" Documentation = "https://docs.datalad.org/projects/next/en/latest/"