l6a · nayera16 · Nov 12, 2025 · Nov 19, 2025 · Jan 10, 2026 · Jan 11, 2026
diff --git a/demos/ParaFrame.ipynb b/demos/ParaFrame.ipynb
diff --git a/demos/data/.hallmark.yaml b/demos/data/.hallmark.yaml
@@ -0,0 +1,13 @@
+data:
+- fmt: /{mag:d}a{aspin}_w{win:d}.h5
+  # path_to_fmt: m5/data
+  encoding:
+    aspin: m([0-9]+(\.[0-9]+)?|\.[0-9]+)
+
+- fmt: /a_{a:d}/b_{b:d}.txt
+  # path_to_fmt: data
+
+- fmt: /a{aspin}/b_{b:d}.txt
+  # path_to_fmt: data
+  encoding:
+    aspin: ''
diff --git a/mod/hallmark/__init__.py b/mod/hallmark/__init__.py
@@ -14,3 +14,4 @@
 # limitations under the License.
 
 from .core import ParaFrame as ParaFrame
+from .helper_functions import set_rel_yaml_path as set_rel_yaml_path
diff --git a/mod/hallmark/core.py b/mod/hallmark/core.py
@@ -13,13 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from glob import glob
 
 import re
 import parse
 import pandas as pd
 import numpy as np
+from pathlib import Path
+
+from .helper_functions import (get_rel_yaml_path, 
+                                load_encodings_yaml, 
+                                find_spec_by_fmt, 
+                                regex_sub)
 
 class ParaFrame(pd.DataFrame):
     """
@@ -32,21 +37,22 @@ class ParaFrame(pd.DataFrame):
       parameters from a format pattern (using ``glob`` + ``parse``).
     * ``__call__``/``filter``: convenience filtering by column values.
     """
+
     @property
     def _constructor(self):
         return ParaFrame
 
     def __call__(self, **kwds):
         return self.filter(**kwds)
-    
+
     def filter(self, **kwargs):
         """
         Filter a pandas ``DataFrame`` by matching column values.
 
         This function utlizes provided **kwargs to filter an existing
-        ``ParaFrame`` by masking based on column values. Filtering supports 
-        single- and multi-conditioned queries, returning rows that satisfy 
-        any of the provided conditions. 
+        ``ParaFrame`` by masking based on column values. Filtering supports
+        single- and multi-conditioned queries, returning rows that satisfy
+        any of the provided conditions.
 
         Args:
          **kwargs: Arbitrary keyword arguments specifying column names
@@ -63,13 +69,102 @@ def filter(self, **kwargs):
         mask = [False] * len(self)
         for k, v in kwargs.items():
             if isinstance(v, (tuple, list)):
-                mask |= np.isin(np.array(self[k]),np.array(v))
+                mask |= np.isin(np.array(self[k]), np.array(v))
             else:
                 mask |= np.array(self[k]) == v
         return self[mask]
 
     @classmethod
-    def parse(cls, fmt, *args, debug=False, **kwargs):
+    def glob_search(cls, fmt, *args, debug=False, return_pattern=False, 
+                    encoding=False, **kwargs):
+
+        pmax = len(fmt) // 3  # to specify a parameter, we need at least
+        # three characters '{p}'; the maximum number
+        # of possible parameters is `len(fmt) // 3`.
+
+        encodings = load_encodings_yaml()
+        for i in range(len(encodings)):
+            if encodings[i]['fmt'] in fmt:
+                fmt_enc = encodings[i]['fmt']
+                break
+            else:
+                fmt_enc = fmt
+
+        yaml_encodings = find_spec_by_fmt(fmt_enc)
+
+
+
+        if yaml_encodings is None:
+            raise ValueError(
+                f"Error: The format '{fmt_enc}' is missing from .hallmark.yaml."
+            )
+
+        needs_encoding = False
+
+        for i in range(len(encodings)):
+            if 'encoding' not in encodings[i].keys():
+                needs_encoding = False
+            else:
+                enc_dict = yaml_encodings.get("encoding", {})
+                for key in enc_dict:
+                    if enc_dict[key] != "":
+                        needs_encoding = True
+
+        for key in enc_dict:
+            if enc_dict[key] != "":
+                needs_encoding = True
+
+        if needs_encoding and not encoding:
+            raise ValueError(
+                f'''Error: '{fmt_enc}' has a regex spec, 
+                so you must use encoding=True'''
+            )
+
+        if not needs_encoding and encoding:
+            raise ValueError(
+                f'''Error: '{fmt_enc}' does not have a 
+                regex spec, so you must use encoding=False'''
+            )
+
+        # Construct the glob pattern for search files
+        base = str(get_rel_yaml_path().parent)
+        pattern = base + fmt
+        print(pattern)
+        fmt_g = fmt_enc.lstrip("/")
+
+        for i in range(pmax):
+            if debug:
+                print(i, pattern, args, kwargs)
+            try:
+                pattern = pattern.format(*args, **kwargs)
+                break
+            except KeyError as e:
+                k = e.args[0]
+                pattern = re.sub(r"\{" + k + r":?.*?\}", "{" + k + ":s}", pattern)
+                fmt_g = re.sub(r"\{" + k + r":?.*?\}", "{" + k + ":g}", fmt_g)
+                kwargs[e.args[0]] = "*"
+
+        # Obtain list of files based on the glob pattern
+        globbed_files = sorted(glob(pattern))
+
+        # Print the glob pattern and a summary of matches
+        if debug:
+            print(f'Pattern: "{pattern}"')
+            n = len(globbed_files)
+            if n > 1:
+                print(f'{n} matches, e.g., "{globbed_files[0]}"')
+            elif n > 0:
+                print(f'{n} match, i.e., "{globbed_files[0]}"')
+            else:
+                print("No match; please check format string")
+
+        if return_pattern:
+            return (globbed_files, pattern)
+        else:
+            return (yaml_encodings, fmt_g, globbed_files)
+
+    @classmethod
+    def parse(cls, fmt, *args, debug=False, encoding=False, **kwargs):
         """
         Construct a ``ParaFrame`` by parsing file paths that match a pattern.
 
@@ -107,45 +202,25 @@ def parse(cls, fmt, *args, debug=False, **kwargs):
         0  data/run1_p10.csv  1   10
         1  data/run2_p20.csv  2   20
         """
-        pmax = len(fmt) // 3  # to specify a parameter, we need at least
-                              # three characters '{p}'; the maximum number
-                              # of possible parameters is `len(fmt) // 3`.
-
-        # Construct the glob pattern for search files
-        pattern = fmt
-        for i in range(pmax):
-            if debug:
-                print(i, pattern, args, kwargs)
-            try:
-                pattern = pattern.format(*args, **kwargs)
-                break
-            except KeyError as e:
-                k = e.args[0]
-                pattern = re.sub(r'\{'+k+r':?.*?\}', '{'+k+':s}', pattern)
-                kwargs[e.args[0]] = '*'
-
-        # Obtain list of files based on the glob pattern
-        files = sorted(glob(pattern))
+        # Parse list of file names back to parameters
+        yaml_encodings, fmt_g, globbed_files = cls.glob_search(fmt, *args, 
+                                                               debug=debug, 
+                                                               encoding=encoding,
+                                                                **kwargs)
+        parser = parse.compile(fmt_g)
+
+        frame = []
 
-        # Print the glob pattern and a summary of matches
-        if debug:
-            print(f'Pattern: "{pattern}"')
-            n = len(files)
-            if n > 1:
-                print(f'{n} matches, e.g., "{files[0]}"')
-            elif n > 0:
-                print(f'{n} match, i.e., "{files[0]}"')
+        for f in globbed_files:
+            f_short = str(Path(f).relative_to(Path(get_rel_yaml_path().parent)))
+            if encoding:
+                f_new = regex_sub(f_short, yaml_encodings)
             else:
-                print('No match; please check format string')
+                f_new = f_short
 
-        # Parse list of file names back to parameters
-        parser = parse.compile(fmt)
-
-        frame = []
-        for f in files:
-            r = parser.parse(f)
+            r = parser.parse(f_new)
             if r is None:
                 print(f'Failed to parse "{f}"')
             else:
-                frame.append({'path':f, **r.named})
-        return cls(frame)
+                frame.append({'path': f_short, **r.named})
+        return cls(frame)
diff --git a/mod/hallmark/helper_functions.py b/mod/hallmark/helper_functions.py
@@ -0,0 +1,63 @@
+from pathlib import Path
+import yaml
+import re
+
+_user_yaml_path = None
+
+def set_rel_yaml_path(path):
+    global _user_yaml_path
+    _user_yaml_path = Path(path).resolve()
+
+def get_rel_yaml_path():
+    if _user_yaml_path is not None:
+        return _user_yaml_path
+    return Path(__file__).parent / ".hallmark.yaml"
+
+def load_encodings_yaml():
+    path = get_rel_yaml_path()
+    yaml_path = Path(path).resolve()
+    f = path.open("r", encoding="utf-8")
+    yaml_file = yaml.safe_load(f)
+    encodings = yaml_file["data"]
+    # Resolve path_to_fmt relative to the yaml file's directory
+    for entry in encodings:
+        if "path_to_fmt" in entry:
+            entry["path_to_fmt"] = str(
+                (yaml_path.parent / entry["path_to_fmt"]).resolve()
+            )
+
+    return encodings
+
+def find_spec_by_fmt(fmt):
+    path = get_rel_yaml_path()
+    f = path.open("r", encoding="utf-8")
+    yaml_file = yaml.safe_load(f)
+    encodings = yaml_file["data"]
+    for spec in encodings:
+        if spec.get("fmt") == fmt:
+            return spec
+    return None
+
+def regex_sub(f, yaml_encodings):
+
+    fmt = f
+
+    if yaml_encodings is None:
+        return fmt
+
+    enc = yaml_encodings.get("encoding", None)
+    if not enc:
+        return fmt
+
+    regex = enc.get("aspin", "")
+    if not regex:
+        return fmt
+
+    if re.search(regex, fmt):
+        matches = re.finditer(regex, fmt)
+        for match in matches:
+            k = match.group(0)
+            k_num = "-" + str(match.group(1))
+            fmt = re.sub(k, k_num, fmt)
+
+    return fmt
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies =[
     "numpy",
     "pandas",
     "parse",
+    "PyYAML",
 ]
 
 [tool.setuptools.packages.find]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,11 +1,73 @@
 import pytest
+import shutil
+import yaml
+from pathlib import Path
+import hallmark
+
+ORIGINAL_YAML = Path("demos/data/.hallmark.yaml")
+
+@pytest.fixture(scope="function")
+def encodings_yaml(tmp_path):
+    tmp_yaml = tmp_path / ".hallmark.yaml"
+    shutil.copy2(ORIGINAL_YAML, tmp_yaml)
+    hallmark.set_rel_yaml_path(tmp_yaml)
+    return tmp_yaml
+
+@pytest.fixture(scope="function", autouse=True)
+def _append_tmp_path_entries_to_encodings_yaml(tmp_path, encodings_yaml):
+    encodings_yaml.write_text("data: []\n", encoding="utf-8")
+    y = yaml.safe_load(encodings_yaml.read_text(encoding="utf-8")) or {}
+    y.setdefault("data", [])
+    fmts = [
+        "/a_{a:d}/b_{b:d}.txt",
+        "/a{aspin}/b_{b:d}.txt",
+        "/{mag:d}_mag{aspin}_w{win:d}.h5",
+    ]
+    for fmt in fmts:
+        y["data"].append(
+            {
+                "fmt": fmt,
+                "encoding": {"aspin": r"m([0-9]+(\.[0-9]+)?|\.[0-9]+)"},
+            }
+        )
+    encodings_yaml.write_text(yaml.safe_dump(y, sort_keys=False), encoding="utf-8")
+    yield
+
+def spin_format(val):
+    if val == 0:
+        return "0"
+    return f"{val:+g}"
 
 @pytest.fixture(scope = "function")
 def create_temp_data(tmp_path):
-    data_dir = tmp_path / "data"
+    data_dir = tmp_path
+    print(data_dir)
     for a in range(10):
         subdir = data_dir / f"a_{a}"
         subdir.mkdir(parents=True)
         for b in range(10, 20):
             (subdir / f"b_{b}.txt").touch()
+    return data_dir
+
+@pytest.fixture(scope = "function")
+def create_temp_data_spin(tmp_path):
+    data_dir = tmp_path
+    spins = [-0.5, 0.0, 0.5]
+    for a in spins:
+        subdir = data_dir / f"a{spin_format(a)}"
+        subdir.mkdir(parents=True)
+        for b in range(10, 20):
+            (subdir / f"b_{b}.txt").touch()
+    return data_dir
+
+@pytest.fixture(scope = "function")
+def create_temp_data_spin_with_m(tmp_path):
+    data_dir = tmp_path
+    spins = ["m0.5", "0", "0.5"]
+
+    for mag in range(0, 2):   
+        for aspin in spins:       
+            for win in range(10, 20):  
+                file_name = f"{mag}_mag{aspin}_w{win}.h5"
+                (data_dir / file_name).touch()
     return data_dir
Original file line number	Diff line number	Diff line change
Expand Up		@@ -14,3 +14,4 @@
		# limitations under the License.

		from .core import ParaFrame as ParaFrame
		from .helper_functions import set_rel_yaml_path as set_rel_yaml_path