@@ -124,14 +124,13 @@ def load_meta(self) -> dict:
124124 def store_bins (self , bins : dict [str , list ]) -> None :
125125 df = pd .Series (bins ).to_frame ("bins" ).reset_index ().rename (columns = {"index" : "column" })
126126 self .bins_dir .mkdir (exist_ok = True , parents = True )
127- empty_df = pd .DataFrame (columns = ["column" , "bins" ])
128- empty_df .to_parquet (self .bins_dir / "empty.parquet" )
129127 for i , row in df .iterrows ():
130128 row_df = pd .DataFrame ([row ]).explode ("bins" )
131129 row_df .to_parquet (self .bins_dir / f"{ i :05} .parquet" )
132130
133131 def load_bins (self ) -> dict [str , list ]:
134- df = pd .concat ([pd .read_parquet (p ) for p in sorted (self .bins_dir .glob ("*.parquet" ))])
132+ files = sorted (self .bins_dir .glob ("*.parquet" ))
133+ df = pd .concat ([pd .read_parquet (p ) for p in files ]) if files else pd .DataFrame (columns = ["column" , "bins" ])
135134 df = df .groupby ("column" , sort = False ).agg (list ).reset_index ()
136135 # harmonise older prefix formats to <prefix>:: for compatibility with older versions
137136 df ["column" ] = df ["column" ].str .replace (_OLD_COL_PREFIX , _NEW_COL_PREFIX , regex = True )
@@ -172,14 +171,15 @@ def store_numeric_uni_kdes(self, trn_kdes: dict[str, pd.Series]) -> None:
172171 columns = ["column" , "x" , "y" ],
173172 )
174173 self .numeric_kdes_uni_dir .mkdir (exist_ok = True , parents = True )
175- empty_df = pd .DataFrame (columns = ["column" , "x" , "y" ])
176- empty_df .to_parquet (self .numeric_kdes_uni_dir / "empty.parquet" )
177174 for i , row in trn_kdes .iterrows ():
178175 row_df = pd .DataFrame ([row ]).explode (["x" , "y" ])
179176 row_df .to_parquet (self .numeric_kdes_uni_dir / f"{ i :05} .parquet" )
180177
181178 def load_numeric_uni_kdes (self ) -> dict [str , pd .Series ]:
182- trn_kdes = pd .concat ([pd .read_parquet (p ) for p in sorted (self .numeric_kdes_uni_dir .glob ("*.parquet" ))])
179+ files = sorted (self .numeric_kdes_uni_dir .glob ("*.parquet" ))
180+ trn_kdes = (
181+ pd .concat ([pd .read_parquet (p ) for p in files ]) if files else pd .DataFrame (columns = ["column" , "x" , "y" ])
182+ )
183183 trn_kdes = trn_kdes .groupby ("column" , sort = False ).agg (list ).reset_index ()
184184 # harmonise older prefix formats to <prefix>:: for compatibility with older versions
185185 trn_kdes ["column" ] = trn_kdes ["column" ].str .replace (_OLD_COL_PREFIX , _NEW_COL_PREFIX , regex = True )
@@ -199,15 +199,16 @@ def store_categorical_uni_counts(self, trn_cnts_uni: dict[str, pd.Series]) -> No
199199 columns = ["column" , "cat" , "count" ],
200200 )
201201 self .categorical_counts_uni_dir .mkdir (exist_ok = True , parents = True )
202- empty_df = pd .DataFrame (columns = ["column" , "cat" , "count" ])
203- empty_df .to_parquet (self .categorical_counts_uni_dir / "empty.parquet" )
204202 for i , row in trn_cnts_uni .iterrows ():
205203 row_df = pd .DataFrame ([row ]).explode (["cat" , "count" ])
206204 row_df .to_parquet (self .categorical_counts_uni_dir / f"{ i :05} .parquet" )
207205
208206 def load_categorical_uni_counts (self ) -> dict [str , pd .Series ]:
209- trn_cnts_uni = pd .concat (
210- [pd .read_parquet (p ) for p in sorted (self .categorical_counts_uni_dir .glob ("*.parquet" ))]
207+ files = sorted (self .categorical_counts_uni_dir .glob ("*.parquet" ))
208+ trn_cnts_uni = (
209+ pd .concat ([pd .read_parquet (p ) for p in files ])
210+ if files
211+ else pd .DataFrame (columns = ["column" , "cat" , "count" ])
211212 )
212213 trn_cnts_uni = trn_cnts_uni .groupby ("column" , sort = False ).agg (list ).reset_index ()
213214 # harmonise older prefix formats to <prefix>:: for compatibility with older versions
0 commit comments