Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions exaflow/algorithms/federated/statistics/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ def hist(
)

def _value_counts(self, series: pd.Series, categories) -> List[int]:
counts = series.value_counts()
# Use a plain dict to avoid pandas Series.get positional fallback on
# integer-like keys (e.g. key=0 on index ["1"] returning first element).
counts = series.value_counts().to_dict()
resolved = []
for cat in categories:
count = counts.get(cat, 0)
Expand All @@ -94,7 +96,7 @@ def _value_counts(self, series: pd.Series, categories) -> List[int]:
resolved.append(int(count))
return resolved

def _value_counts_numeric_fallback(self, counts, cat: str) -> int:
def _value_counts_numeric_fallback(self, counts: Dict, cat: str) -> int:
for caster in (float, int):
try:
coerced = caster(cat)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,31 @@ def test_federated_algorithm_with_multiple_workers(self, case):
y_levels=y_levels,
min_row_count=1,
)

@pytest.mark.parametrize("n_workers", [1, 3])
def test_numeric_like_enum_codes_are_counted_by_label(self, n_workers):
y_levels = ["0", "1", "9"]
df = pd.DataFrame(
{
"y": ["1", "1", "1", "1", "1", "1"],
"group": ["A", "A", "B", "B", "C", "C"],
}
)
metadata = {
"y": {
"is_categorical": True,
"enumerations": {"0": "No", "1": "Yes", "9": "Unknown"},
},
"group": {
"is_categorical": True,
"enumerations": {"A": "A", "B": "B", "C": "C"},
},
}
self.run_comparison(
X=df,
y=np.zeros((df.shape[0],), dtype=float),
n_workers=n_workers,
metadata=metadata,
y_levels=y_levels,
min_row_count=1,
)
Loading