-
Notifications
You must be signed in to change notification settings - Fork 13
Open
Description
Our current dataframe structure is highly heterogeneous, which complicates data management, reduces accessibility, and makes the implementation of new features more difficult. Since data in pyMBE is conceptually grouped by pmb_type, it seems natural to organize it through a hierarchical, type-based dataframe structure managed through a unified controller class.
Below is a minimal working draft of how such a system could be structured within the codebase.
Proposed API Design
High-level layout
pyMBE/
└── storage/
├── df_management.py → _DFManagement
├── base_type.py → PMBBaseModel
├── types/
│ ├── particle.py → class Particle
│ ├── particle_state.py → class ParticleState
│ ├── lj.py → class LennardJones
│ ├── bond.py → ...
└── io.py → Save/load utilities
Base Object: PMBBaseModel
from pydantic import BaseModel, Field
from typing import Optional, Dict, Any
class PMBBaseModel(BaseModel):
name: str
pmb_type: str
class Config:
validate_assignment = True
extra = "forbid" # disallow ghost fieldsExample Type Models
class ParticleState(PMBBaseModel):
label: str
es_type: str
charge: float
class Particle(PMBBaseModel):
pmb_type: str = Field(default="particle", frozen=True)
name: str
sigma: QuantityModel
epsilon: QuantityModel
states: Dict[str, ParticleState] = Field(default_factory=dict)
default_state: Optional[str] = None
@field_validator("default_state")
def validate_default(cls, v, values):
if v is not None and v not in values["states"]:
raise ValueError(f"default_state '{v}' not in states: {list(values['states'])}")
return v
def add_state(self, state: ParticleState):
if state.label in self.states:
raise ValueError(f"Duplicate state label '{state.label}'")
self.states[state.label] = state
def get_state(self, label=None) -> ParticleState:
label = label or self.default_state
if label not in self.states:
raise KeyError(f"State '{label}' not found")
return self.states[label]
class LennardJones(PMBBaseModel):
pmb_type: str = "LennardJones"
parameters_of_the_potential: Optional[dict] = None
DatabaseManager API
import pandas as pd
from typing import Dict, Type, List, Callable
class _DFManagement:
def __init__(self):
self.tables: Dict[str, pd.DataFrame] = {}
self.schemas: Dict[str, Type[PMBBaseModel]] = {}
def register_type(self, model: Type[PMBBaseModel]):
self.schemas[model.__fields__['pmb_type'].default] = model
if model.__fields__['pmb_type'].default not in self.tables:
self.tables[model.__fields__['pmb_type'].default] = pd.DataFrame()
def set(self, pmb_type: str, **kwargs) -> PMBBaseModel:
model = self.schemas[pmb_type](**kwargs)
df = self.tables[pmb_type]
self.tables[pmb_type] = pd.concat([df, pd.DataFrame([model.dict()])], ignore_index=True)
return model
def get(self, pmb_type: str) -> pd.DataFrame:
return self.tables[pmb_type].copy()
def query(self, pmb_type: str, fn: Callable[[pd.DataFrame], pd.DataFrame]):
return fn(self.tables[pmb_type].copy())
def update(self, pmb_type: str, index: int, **kwargs):
model = self.schemas[pmb_type](**{**self.tables[pmb_type].iloc[index].to_dict(), **kwargs})
self.tables[pmb_type].loc[index] = model.dict()
def delete(self, pmb_type: str, index: int):
self.tables[pmb_type] = self.tables[pmb_type].drop(index).reset_index(drop=True)
def export(self, folder="pmb_df"):
# one file per type
for p_type, df in self.tables.items():
path = f"{folder}/{p_type}.csv"
df.to_csv(path, index=False)Example usage in pyMBE.py
from pyMBE.storage.df_management import _DFManagement as _DFm
from storage.types.particle import Particle
from storage.types.lennard_jones import LennardJones
class pymbe_library():
from pyMBE.storage.df_management import _DFManagement as _DFm
def __init__(self):
# Create and configure the database
self._DFm = _DFManagement()
# Register available pmb_types here
self._DFm.register_type(Particle)
self._DFm.register_type(LennardJones)
def particle(...):
_DFm.set( "particle", sigma=sigma, epsilon=epsilon,
states={ "protonated": ParticleState(label="protonated", es_type=0, charge=0),
"deprotonated": ParticleState(label="deprotonated", es_type=1, charge=-1)
},
default_state="protonated",