Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
3361216
separating out a glob function from the parse function, doesn't work …
nayera16 Nov 12, 2025
fe4fcb3
Updated core.py file with the debug issue fixed along with the latest…
rm1771 Nov 19, 2025
46fee32
Merge 'origin/main' into formatting_issue after package publishing ch…
nayera16 Jan 10, 2026
ba70f39
finalizing glob/parse method split, setting glob up for testing
nayera16 Jan 11, 2026
c978132
added first glob method unit test
nayera16 Jan 11, 2026
9bfaf29
additional glob/parse method separation unit testing
nayera16 Jan 11, 2026
940f836
added pytest fixtures and three xfail tests for formatter testing wit…
nayera16 Jan 12, 2026
9a1063a
skeleton build of a string.Formatter subclass with logic not yet impl…
nayera16 Jan 30, 2026
4863ea0
Updated core file with new_fmt to allow for strings in the input fmt.…
HaydenMarchinek Feb 4, 2026
1c80b76
added the first helper function
HaydenMarchinek Feb 4, 2026
3b0d597
added unit test for yaml loader helper function. fixed yaml file spel…
nayera16 Feb 4, 2026
cc34749
Updated core file with encoding map and removed pre-processing idea
HaydenMarchinek Feb 5, 2026
552a21c
added new version of yaml file that allows multiple fmt's
HaydenMarchinek Feb 10, 2026
1810fd4
updated the core file, encodings yaml file, and helper functions to a…
HaydenMarchinek Feb 11, 2026
7b36db0
Updated core file to allow integer and floats in parse. Added PyTOML …
HaydenMarchinek Feb 11, 2026
1d0a0f2
adapting tests and yaml file for updates to core
nayera16 Feb 11, 2026
8d8df5c
Started updating tests to match with the current Paraframe indexing m…
rm1771 Feb 11, 2026
21bed1b
Added documentation to the helper_functions.py file and made test_par…
Feb 13, 2026
704ba84
updated current spin format unit tests with correct formatter solutio…
nayera16 Feb 13, 2026
8a26a08
Merge branch 'formatting_issue_prototype' of github.com:l6a/hallmark …
nayera16 Feb 13, 2026
80564e0
added unit tests for "m" formatting of spin types. fixed encodings re…
nayera16 Feb 13, 2026
d0d82f7
revert paraframe creation back to using just fmt and no index
nayera16 Feb 13, 2026
01a8569
Attempted to revert back to fmt as an argument with test modifications
rm1771 Feb 13, 2026
5ab3089
Attempted to make a cleaner version for the encoding map
rm1771 Feb 16, 2026
3245925
Added code to the core file that carries out regex sub to only the cu…
Feb 17, 2026
7c97b75
Updated hallmark to allow an absolute fmt string to be input for fmt …
HaydenMarchinek Feb 18, 2026
2802ed6
Updated conftest with a method to back up whats currently stored in y…
HaydenMarchinek Feb 18, 2026
b75cd6e
Changed the way that the yaml file gets overwritten to allow new temp…
HaydenMarchinek Feb 20, 2026
e827434
Changed the encodings.yaml to consider case when no encoding specifie…
Feb 20, 2026
93d0422
cleaning up yaml and core files to get ready for PR
nayera16 Feb 24, 2026
ec709bd
made changes to entire parsing process so that yaml file can be in th…
nayera16 Feb 25, 2026
d166582
Updated conftest such that a new temporary yaml file gets created in …
HaydenMarchinek Feb 26, 2026
925ccf4
fixing linter errors for PR
nayera16 Mar 2, 2026
8bdf0fa
fixing linter errors for PR 2
nayera16 Mar 2, 2026
6d7bd57
fixing linter errors for PR 3
nayera16 Mar 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,156 changes: 51 additions & 1,105 deletions demos/ParaFrame.ipynb

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions demos/data/.hallmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
data:
- fmt: /{mag:d}a{aspin}_w{win:d}.h5
# path_to_fmt: m5/data
encoding:
aspin: m([0-9]+(\.[0-9]+)?|\.[0-9]+)

- fmt: /a_{a:d}/b_{b:d}.txt
# path_to_fmt: data

- fmt: /a{aspin}/b_{b:d}.txt
# path_to_fmt: data
encoding:
aspin: ''
1 change: 1 addition & 0 deletions mod/hallmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
# limitations under the License.

from .core import ParaFrame as ParaFrame
from .helper_functions import set_rel_yaml_path as set_rel_yaml_path
161 changes: 118 additions & 43 deletions mod/hallmark/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.


from glob import glob

import re
import parse
import pandas as pd
import numpy as np
from pathlib import Path

from .helper_functions import (get_rel_yaml_path,
load_encodings_yaml,
find_spec_by_fmt,
regex_sub)

class ParaFrame(pd.DataFrame):
"""
Expand All @@ -32,21 +37,22 @@ class ParaFrame(pd.DataFrame):
parameters from a format pattern (using ``glob`` + ``parse``).
* ``__call__``/``filter``: convenience filtering by column values.
"""

@property
def _constructor(self):
return ParaFrame

def __call__(self, **kwds):
return self.filter(**kwds)

def filter(self, **kwargs):
"""
Filter a pandas ``DataFrame`` by matching column values.

This function utlizes provided **kwargs to filter an existing
``ParaFrame`` by masking based on column values. Filtering supports
single- and multi-conditioned queries, returning rows that satisfy
any of the provided conditions.
``ParaFrame`` by masking based on column values. Filtering supports
single- and multi-conditioned queries, returning rows that satisfy
any of the provided conditions.

Args:
**kwargs: Arbitrary keyword arguments specifying column names
Expand All @@ -63,13 +69,102 @@ def filter(self, **kwargs):
mask = [False] * len(self)
for k, v in kwargs.items():
if isinstance(v, (tuple, list)):
mask |= np.isin(np.array(self[k]),np.array(v))
mask |= np.isin(np.array(self[k]), np.array(v))
else:
mask |= np.array(self[k]) == v
return self[mask]

@classmethod
def parse(cls, fmt, *args, debug=False, **kwargs):
def glob_search(cls, fmt, *args, debug=False, return_pattern=False,
encoding=False, **kwargs):

pmax = len(fmt) // 3 # to specify a parameter, we need at least
# three characters '{p}'; the maximum number
# of possible parameters is `len(fmt) // 3`.

encodings = load_encodings_yaml()
for i in range(len(encodings)):
if encodings[i]['fmt'] in fmt:
fmt_enc = encodings[i]['fmt']
break
else:
fmt_enc = fmt

yaml_encodings = find_spec_by_fmt(fmt_enc)



if yaml_encodings is None:
raise ValueError(
f"Error: The format '{fmt_enc}' is missing from .hallmark.yaml."
)

needs_encoding = False

for i in range(len(encodings)):
if 'encoding' not in encodings[i].keys():
needs_encoding = False
else:
enc_dict = yaml_encodings.get("encoding", {})
for key in enc_dict:
if enc_dict[key] != "":
needs_encoding = True

for key in enc_dict:
if enc_dict[key] != "":
needs_encoding = True

if needs_encoding and not encoding:
raise ValueError(
f'''Error: '{fmt_enc}' has a regex spec,
so you must use encoding=True'''
)

if not needs_encoding and encoding:
raise ValueError(
f'''Error: '{fmt_enc}' does not have a
regex spec, so you must use encoding=False'''
)

# Construct the glob pattern for search files
base = str(get_rel_yaml_path().parent)
pattern = base + fmt
print(pattern)
fmt_g = fmt_enc.lstrip("/")

for i in range(pmax):
if debug:
print(i, pattern, args, kwargs)
try:
pattern = pattern.format(*args, **kwargs)
break
except KeyError as e:
k = e.args[0]
pattern = re.sub(r"\{" + k + r":?.*?\}", "{" + k + ":s}", pattern)
fmt_g = re.sub(r"\{" + k + r":?.*?\}", "{" + k + ":g}", fmt_g)
kwargs[e.args[0]] = "*"

# Obtain list of files based on the glob pattern
globbed_files = sorted(glob(pattern))

# Print the glob pattern and a summary of matches
if debug:
print(f'Pattern: "{pattern}"')
n = len(globbed_files)
if n > 1:
print(f'{n} matches, e.g., "{globbed_files[0]}"')
elif n > 0:
print(f'{n} match, i.e., "{globbed_files[0]}"')
else:
print("No match; please check format string")

if return_pattern:
return (globbed_files, pattern)
else:
return (yaml_encodings, fmt_g, globbed_files)

@classmethod
def parse(cls, fmt, *args, debug=False, encoding=False, **kwargs):
"""
Construct a ``ParaFrame`` by parsing file paths that match a pattern.

Expand Down Expand Up @@ -107,45 +202,25 @@ def parse(cls, fmt, *args, debug=False, **kwargs):
0 data/run1_p10.csv 1 10
1 data/run2_p20.csv 2 20
"""
pmax = len(fmt) // 3 # to specify a parameter, we need at least
# three characters '{p}'; the maximum number
# of possible parameters is `len(fmt) // 3`.

# Construct the glob pattern for search files
pattern = fmt
for i in range(pmax):
if debug:
print(i, pattern, args, kwargs)
try:
pattern = pattern.format(*args, **kwargs)
break
except KeyError as e:
k = e.args[0]
pattern = re.sub(r'\{'+k+r':?.*?\}', '{'+k+':s}', pattern)
kwargs[e.args[0]] = '*'

# Obtain list of files based on the glob pattern
files = sorted(glob(pattern))
# Parse list of file names back to parameters
yaml_encodings, fmt_g, globbed_files = cls.glob_search(fmt, *args,
debug=debug,
encoding=encoding,
**kwargs)
parser = parse.compile(fmt_g)

frame = []

# Print the glob pattern and a summary of matches
if debug:
print(f'Pattern: "{pattern}"')
n = len(files)
if n > 1:
print(f'{n} matches, e.g., "{files[0]}"')
elif n > 0:
print(f'{n} match, i.e., "{files[0]}"')
for f in globbed_files:
f_short = str(Path(f).relative_to(Path(get_rel_yaml_path().parent)))
if encoding:
f_new = regex_sub(f_short, yaml_encodings)
else:
print('No match; please check format string')
f_new = f_short

# Parse list of file names back to parameters
parser = parse.compile(fmt)

frame = []
for f in files:
r = parser.parse(f)
r = parser.parse(f_new)
if r is None:
print(f'Failed to parse "{f}"')
else:
frame.append({'path':f, **r.named})
return cls(frame)
frame.append({'path': f_short, **r.named})
return cls(frame)
63 changes: 63 additions & 0 deletions mod/hallmark/helper_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from pathlib import Path
import yaml
import re

_user_yaml_path = None

def set_rel_yaml_path(path):
global _user_yaml_path
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an attribute of ParaFrame that stores _user_yaml_path when a ParaFrame is instantiated. That way, the user can set the path with this function between creating different ParaFrames.

_user_yaml_path = Path(path).resolve()

def get_rel_yaml_path():
if _user_yaml_path is not None:
return _user_yaml_path
return Path(__file__).parent / ".hallmark.yaml"

def load_encodings_yaml():
path = get_rel_yaml_path()
yaml_path = Path(path).resolve()
f = path.open("r", encoding="utf-8")
yaml_file = yaml.safe_load(f)
encodings = yaml_file["data"]
# Resolve path_to_fmt relative to the yaml file's directory
for entry in encodings:
if "path_to_fmt" in entry:
entry["path_to_fmt"] = str(
(yaml_path.parent / entry["path_to_fmt"]).resolve()
)

return encodings

def find_spec_by_fmt(fmt):
path = get_rel_yaml_path()
f = path.open("r", encoding="utf-8")
yaml_file = yaml.safe_load(f)
encodings = yaml_file["data"]
for spec in encodings:
if spec.get("fmt") == fmt:
return spec
return None

def regex_sub(f, yaml_encodings):

fmt = f

if yaml_encodings is None:
return fmt

enc = yaml_encodings.get("encoding", None)
if not enc:
return fmt

regex = enc.get("aspin", "")
if not regex:
return fmt

if re.search(regex, fmt):
matches = re.finditer(regex, fmt)
for match in matches:
k = match.group(0)
k_num = "-" + str(match.group(1))
fmt = re.sub(k, k_num, fmt)

return fmt
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies =[
"numpy",
"pandas",
"parse",
"PyYAML",
]

[tool.setuptools.packages.find]
Expand Down
64 changes: 63 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,73 @@
import pytest
import shutil
import yaml
from pathlib import Path
import hallmark

ORIGINAL_YAML = Path("demos/data/.hallmark.yaml")

@pytest.fixture(scope="function")
def encodings_yaml(tmp_path):
tmp_yaml = tmp_path / ".hallmark.yaml"
shutil.copy2(ORIGINAL_YAML, tmp_yaml)
hallmark.set_rel_yaml_path(tmp_yaml)
return tmp_yaml

@pytest.fixture(scope="function", autouse=True)
def _append_tmp_path_entries_to_encodings_yaml(tmp_path, encodings_yaml):
encodings_yaml.write_text("data: []\n", encoding="utf-8")
y = yaml.safe_load(encodings_yaml.read_text(encoding="utf-8")) or {}
y.setdefault("data", [])
fmts = [
"/a_{a:d}/b_{b:d}.txt",
"/a{aspin}/b_{b:d}.txt",
"/{mag:d}_mag{aspin}_w{win:d}.h5",
]
for fmt in fmts:
y["data"].append(
{
"fmt": fmt,
"encoding": {"aspin": r"m([0-9]+(\.[0-9]+)?|\.[0-9]+)"},
}
)
encodings_yaml.write_text(yaml.safe_dump(y, sort_keys=False), encoding="utf-8")
yield

def spin_format(val):
if val == 0:
return "0"
return f"{val:+g}"

@pytest.fixture(scope = "function")
def create_temp_data(tmp_path):
data_dir = tmp_path / "data"
data_dir = tmp_path
print(data_dir)
for a in range(10):
subdir = data_dir / f"a_{a}"
subdir.mkdir(parents=True)
for b in range(10, 20):
(subdir / f"b_{b}.txt").touch()
return data_dir

@pytest.fixture(scope = "function")
def create_temp_data_spin(tmp_path):
data_dir = tmp_path
spins = [-0.5, 0.0, 0.5]
for a in spins:
subdir = data_dir / f"a{spin_format(a)}"
subdir.mkdir(parents=True)
for b in range(10, 20):
(subdir / f"b_{b}.txt").touch()
return data_dir

@pytest.fixture(scope = "function")
def create_temp_data_spin_with_m(tmp_path):
data_dir = tmp_path
spins = ["m0.5", "0", "0.5"]

for mag in range(0, 2):
for aspin in spins:
for win in range(10, 20):
file_name = f"{mag}_mag{aspin}_w{win}.h5"
(data_dir / file_name).touch()
return data_dir
Loading