Skip to content

Commit faa8c9e

Browse files
fix: remove pandas.concat future warning (#92)
1 parent d908636 commit faa8c9e

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

mostlyai/qa/_filesystem.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,13 @@ def load_meta(self) -> dict:
124124
def store_bins(self, bins: dict[str, list]) -> None:
125125
df = pd.Series(bins).to_frame("bins").reset_index().rename(columns={"index": "column"})
126126
self.bins_dir.mkdir(exist_ok=True, parents=True)
127-
empty_df = pd.DataFrame(columns=["column", "bins"])
128-
empty_df.to_parquet(self.bins_dir / "empty.parquet")
129127
for i, row in df.iterrows():
130128
row_df = pd.DataFrame([row]).explode("bins")
131129
row_df.to_parquet(self.bins_dir / f"{i:05}.parquet")
132130

133131
def load_bins(self) -> dict[str, list]:
134-
df = pd.concat([pd.read_parquet(p) for p in sorted(self.bins_dir.glob("*.parquet"))])
132+
files = sorted(self.bins_dir.glob("*.parquet"))
133+
df = pd.concat([pd.read_parquet(p) for p in files]) if files else pd.DataFrame(columns=["column", "bins"])
135134
df = df.groupby("column", sort=False).agg(list).reset_index()
136135
# harmonise older prefix formats to <prefix>:: for compatibility with older versions
137136
df["column"] = df["column"].str.replace(_OLD_COL_PREFIX, _NEW_COL_PREFIX, regex=True)
@@ -172,14 +171,15 @@ def store_numeric_uni_kdes(self, trn_kdes: dict[str, pd.Series]) -> None:
172171
columns=["column", "x", "y"],
173172
)
174173
self.numeric_kdes_uni_dir.mkdir(exist_ok=True, parents=True)
175-
empty_df = pd.DataFrame(columns=["column", "x", "y"])
176-
empty_df.to_parquet(self.numeric_kdes_uni_dir / "empty.parquet")
177174
for i, row in trn_kdes.iterrows():
178175
row_df = pd.DataFrame([row]).explode(["x", "y"])
179176
row_df.to_parquet(self.numeric_kdes_uni_dir / f"{i:05}.parquet")
180177

181178
def load_numeric_uni_kdes(self) -> dict[str, pd.Series]:
182-
trn_kdes = pd.concat([pd.read_parquet(p) for p in sorted(self.numeric_kdes_uni_dir.glob("*.parquet"))])
179+
files = sorted(self.numeric_kdes_uni_dir.glob("*.parquet"))
180+
trn_kdes = (
181+
pd.concat([pd.read_parquet(p) for p in files]) if files else pd.DataFrame(columns=["column", "x", "y"])
182+
)
183183
trn_kdes = trn_kdes.groupby("column", sort=False).agg(list).reset_index()
184184
# harmonise older prefix formats to <prefix>:: for compatibility with older versions
185185
trn_kdes["column"] = trn_kdes["column"].str.replace(_OLD_COL_PREFIX, _NEW_COL_PREFIX, regex=True)
@@ -199,15 +199,16 @@ def store_categorical_uni_counts(self, trn_cnts_uni: dict[str, pd.Series]) -> No
199199
columns=["column", "cat", "count"],
200200
)
201201
self.categorical_counts_uni_dir.mkdir(exist_ok=True, parents=True)
202-
empty_df = pd.DataFrame(columns=["column", "cat", "count"])
203-
empty_df.to_parquet(self.categorical_counts_uni_dir / "empty.parquet")
204202
for i, row in trn_cnts_uni.iterrows():
205203
row_df = pd.DataFrame([row]).explode(["cat", "count"])
206204
row_df.to_parquet(self.categorical_counts_uni_dir / f"{i:05}.parquet")
207205

208206
def load_categorical_uni_counts(self) -> dict[str, pd.Series]:
209-
trn_cnts_uni = pd.concat(
210-
[pd.read_parquet(p) for p in sorted(self.categorical_counts_uni_dir.glob("*.parquet"))]
207+
files = sorted(self.categorical_counts_uni_dir.glob("*.parquet"))
208+
trn_cnts_uni = (
209+
pd.concat([pd.read_parquet(p) for p in files])
210+
if files
211+
else pd.DataFrame(columns=["column", "cat", "count"])
211212
)
212213
trn_cnts_uni = trn_cnts_uni.groupby("column", sort=False).agg(list).reset_index()
213214
# harmonise older prefix formats to <prefix>:: for compatibility with older versions

0 commit comments

Comments
 (0)