Skip to content

Commit 557fbd2

Browse files
authored
bug/medcat: CU-869bbj5u4 Fix core dependencies (#251)
* CU-869bbj5u4: Add explict checks for optional deps for conversion modules * CU-869bbj5u4: Add missing core dependencies (packaging, pyyaml, requests) * CU-869bbj5u4: Add script to be able to test taht base install can import al its parts (or has explicit checks for optional extras) * CU-869bbj5u4: Rename base install check module * CU-869bbj5u4: Include __main__ in import checks * CU-869bbj5u4: Update workflow to add check of base installs * CU-869bbj5u4: Fix python version
1 parent 11e7ed6 commit 557fbd2

File tree

5 files changed

+168
-5
lines changed

5 files changed

+168
-5
lines changed

.github/workflows/medcat-v2_main.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,25 @@ defaults:
1111
run:
1212
working-directory: ./medcat-v2
1313
jobs:
14-
build:
14+
base-install-imports:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v5
18+
# NOTE: using oldest supported python version
19+
- name: Install uv for Python 3.10
20+
uses: astral-sh/setup-uv@v7
21+
with:
22+
python-version: "3.10"
23+
enable-cache: true
24+
cache-dependency-glob: "medcat-v2/uv.lock"
25+
- name: Install the project
26+
run: |
27+
uv sync # NO extras
28+
- name: Check that all packages / modules can be imported with default / no-extras install
29+
run: |
30+
uv run python tests/other/check_base_install_can_import_all.py medcat
31+
32+
types-lints-tests-regression:
1533
runs-on: ubuntu-latest
1634
strategy:
1735
matrix:

medcat-v2/medcat/utils/legacy/convert_meta_cat.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import json
44
import logging
55

6-
import torch
7-
86
from medcat.components.addons.meta_cat import MetaCAT, MetaCATAddon
97
from medcat.components.addons.meta_cat.mctokenizers.tokenizers import (
108
TokenizerWrapperBase, load_tokenizer)
@@ -13,6 +11,13 @@
1311

1412
from medcat.utils.legacy.helpers import fix_old_style_cnf
1513

14+
# NOTE: needs to be before torch since default doesn't include torch
15+
from medcat.utils.import_utils import ensure_optional_extras_installed
16+
_EXTRA_NAME = "meta-cat"
17+
ensure_optional_extras_installed("medcat", _EXTRA_NAME)
18+
19+
import torch # noqa
20+
1621

1722
logger = logging.getLogger(__name__)
1823

medcat-v2/medcat/utils/legacy/convert_rel_cat.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
import json
33
import logging
44

5-
import torch
6-
75
from medcat.cdb import CDB
86
from medcat.components.addons.relation_extraction.rel_cat import (
97
RelCAT, RelCATAddon)
@@ -13,6 +11,13 @@
1311
from medcat.tokenizing.tokenizers import BaseTokenizer, create_tokenizer
1412
from medcat.utils.legacy.helpers import fix_old_style_cnf
1513

14+
# NOTE: needs to be before torch since default doesn't include torch
15+
from medcat.utils.import_utils import ensure_optional_extras_installed
16+
_EXTRA_NAME = "rel-cat"
17+
ensure_optional_extras_installed("medcat", _EXTRA_NAME)
18+
19+
import torch # noqa
20+
1621

1722
logger = logging.getLogger(__name__)
1823

medcat-v2/pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ dependencies = [ # Optional
6565
"xxhash>=3.5.0,<4.0",
6666
"pydantic>2.0",
6767
"typing-extensions",
68+
"packaging",
69+
"pyyaml",
70+
"requests",
6871
# TODO - others
6972
]
7073

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
from typing import Iterable
2+
import os
3+
import subprocess
4+
import sys
5+
import re
6+
from collections import Counter
7+
8+
MISSING_DEP_PATTERN = re.compile(
9+
r"The optional dependency set '([\w\-_]*)' is missing")
10+
11+
12+
def walk_packages(path: str,
13+
base_pkg_name: str,
14+
base_path: str = '') -> Iterable[str]:
15+
if not base_path:
16+
base_path = path
17+
pkg_path = path.removeprefix(base_path).replace(
18+
os.path.sep, '.').strip(".")
19+
pkg_to_here = f"{base_pkg_name}.{pkg_path}" if pkg_path else base_pkg_name
20+
for fn in os.listdir(path):
21+
cur_path = os.path.join(path, fn)
22+
if os.path.isdir(cur_path) and (
23+
not fn.startswith("__") and not fn.endswith("__")):
24+
yield from walk_packages(cur_path, base_pkg_name=base_pkg_name,
25+
base_path=base_path)
26+
continue
27+
if not fn.endswith(".py"):
28+
continue
29+
if fn == "__init__.py":
30+
yield pkg_to_here
31+
continue
32+
yield f"{pkg_to_here}.{fn.removesuffix('.py')}"
33+
34+
35+
def find_all_modules(package_name, package_path=None):
36+
"""Find all importable modules in a package."""
37+
if package_path is None:
38+
# Import the package to get its path
39+
try:
40+
pkg = __import__(package_name)
41+
package_path = pkg.__path__
42+
except ImportError:
43+
print(f"Could not import {package_name}")
44+
return []
45+
46+
modules = []
47+
for modname in walk_packages(package_path[0],
48+
base_pkg_name=package_name):
49+
modules.append(modname)
50+
51+
return modules
52+
53+
54+
def test_import(module_name):
55+
"""Test if a module can be imported in isolation."""
56+
code = f"import {module_name}"
57+
result = subprocess.run(
58+
[sys.executable, "-c", code],
59+
capture_output=True,
60+
text=True,
61+
timeout=30,
62+
)
63+
return result.returncode == 0, result.stderr
64+
65+
66+
def get_missing_dep_set(error: str) -> str | None:
67+
err1 = error.strip().split('\n')[-1]
68+
if "MissingDependenciesError" not in err1:
69+
return None
70+
matches = MISSING_DEP_PATTERN.findall(err1)
71+
if len(matches) != 1:
72+
raise ValueError(f"Unknown error:\n'{error}'\nLookin at:\n{err1}"
73+
f"\ngot: {matches}")
74+
return matches[0]
75+
76+
77+
def main():
78+
if len(sys.argv) < 2:
79+
print("Usage: python check_imports.py <package_name>")
80+
sys.exit(1)
81+
82+
package_name = sys.argv[1]
83+
84+
print(f"Finding all modules in {package_name}...")
85+
modules = find_all_modules(package_name)
86+
87+
if not modules:
88+
print(f"No modules found in {package_name}")
89+
sys.exit(1)
90+
91+
print(f"Found {len(modules)} modules. Testing imports...\n")
92+
93+
successful = []
94+
missing_opt_dep_expl = []
95+
failed = []
96+
97+
for module in modules:
98+
success, error = test_import(module)
99+
if success:
100+
successful.append(module)
101+
print(f"✓ {module}")
102+
elif (missing_dep := get_missing_dep_set(error)):
103+
missing_opt_dep_expl.append((module, missing_dep))
104+
print(f"M {module}: missing {missing_dep}")
105+
else:
106+
failed.append((module, error))
107+
print(f"✗ {module}")
108+
# Print the first line of error for quick diagnosis
109+
first_error_line = (
110+
error.strip().split('\n')[-1] if error else "Unknown error")
111+
print(f" → {first_error_line}")
112+
113+
# Summary
114+
print("\n" + "="*60)
115+
per_opt_dep_missing = Counter()
116+
for _, missing_dep in missing_opt_dep_expl:
117+
per_opt_dep_missing[missing_dep] += 1
118+
print(f"Results: {len(successful)} successful, "
119+
f"{len(missing_opt_dep_expl)} missing optional deps "
120+
f"({per_opt_dep_missing}), {len(failed)} failed")
121+
print("="*60)
122+
123+
if failed:
124+
print("\nFailed imports:")
125+
for module, error in failed:
126+
print(f"\n{module}:")
127+
print(error)
128+
sys.exit(1)
129+
130+
131+
if __name__ == "__main__":
132+
main()

0 commit comments

Comments
 (0)