Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ dependencies = [
"scikit-learn >= 1.5",
"xgboost-cpu >= 2.1.4", # regular xgboost includes nvidia libraries which bloat the package size on linux. For PyProphet, we likely would not need GPU support.
"matplotlib",
"pyarrow",
"pypdf",
"psutil",
"pyopenms",
Expand All @@ -55,6 +54,7 @@ dependencies = [
testing = ["pytest", "pytest-regtest", "pytest-xdist"]
docs = ["sphinx", "sphinx-copybutton", "sphinx_rtd_theme", "pydata_sphinx_theme", "sphinx-click"]
dev = ["pyprophet[testing]", "pyprophet[docs]", "black", "ruff", "mypy"]
parquet = ["pyarrow"]

# Define console entry points
[project.scripts]
Expand Down
2 changes: 1 addition & 1 deletion pyprophet/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
Dependencies:
-------------
- `pandas`
- `pyarrow`
- `pyarrow`(optional, for Parquet support)
- `duckdb`
- `sqlite3`
- `loguru`
Expand Down
80 changes: 12 additions & 68 deletions pyprophet/io/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,52 +16,28 @@

from loguru import logger

from .util import (
_get_parquet_reader_class_for_config,
_get_parquet_writer_class_for_config,
)
from .._config import ExportIOConfig, IPFIOConfig, LevelContextIOConfig, RunnerIOConfig

# Export I/O
from .export.osw import OSWReader as ExportOSWReader
from .export.osw import OSWWriter as ExportOSWWriter
from .export.sqmass import SqMassWriter as ExportSqMassWriter
from .export.parquet import (
ParquetReader as ExportParquetReader,
)
from .export.parquet import (
ParquetWriter as ExportParquetWriter,
)
from .export.split_parquet import (
SplitParquetReader as ExportSplitParquetReader,
)
from .export.split_parquet import (
SplitParquetWriter as ExportSplitParquetWriter,
)

# IPF I/O
from .ipf.osw import OSWReader as IPFOSWReader
from .ipf.osw import OSWWriter as IPFOSWWriter
from .ipf.parquet import ParquetReader as IPFParquetReader
from .ipf.parquet import ParquetWriter as IPFParquetWriter
from .ipf.split_parquet import SplitParquetReader as IPFSplitParquetReader
from .ipf.split_parquet import SplitParquetWriter as IPFSplitParquetWriter

# Levels Context I/O
from .levels_context.osw import OSWReader as LevelContextOSWReader
from .levels_context.osw import OSWWriter as LevelContextOSWWriter
from .levels_context.parquet import ParquetReader as LevelContextParquetReader
from .levels_context.parquet import ParquetWriter as LevelContextParquetWriter
from .levels_context.split_parquet import (
SplitParquetReader as LevelContextSplitParquetReader,
)
from .levels_context.split_parquet import (
SplitParquetWriter as LevelContextSplitParquetWriter,
)

# Scoring I/O
from .scoring.osw import OSWReader as ScoringOSWReader
from .scoring.osw import OSWWriter as ScoringOSWWriter
from .scoring.parquet import ParquetReader as ParquetScoringReader
from .scoring.parquet import ParquetWriter as ParquetScoringWriter
from .scoring.split_parquet import SplitParquetReader as SplitParquetScoringReader
from .scoring.split_parquet import SplitParquetWriter as SplitParquetScoringWriter
from .scoring.tsv import TSVReader as ScoringTSVReader
from .scoring.tsv import TSVWriter as ScoringTSVWriter

Expand Down Expand Up @@ -123,29 +99,13 @@ def _get_osw_reader(config):

@staticmethod
def _get_parquet_reader(config):
if isinstance(config, RunnerIOConfig):
return ParquetScoringReader(config)
elif isinstance(config, IPFIOConfig):
return IPFParquetReader(config)
elif isinstance(config, LevelContextIOConfig):
return LevelContextParquetReader(config)
elif isinstance(config, ExportIOConfig):
return ExportParquetReader(config)
else:
raise ValueError(f"Unsupported config context: {type(config).__name__}")
cls = _get_parquet_reader_class_for_config(config, split=False)
return cls(config)

@staticmethod
def _get_split_parquet_reader(config):
if isinstance(config, RunnerIOConfig):
return SplitParquetScoringReader(config)
elif isinstance(config, IPFIOConfig):
return IPFSplitParquetReader(config)
elif isinstance(config, LevelContextIOConfig):
return LevelContextSplitParquetReader(config)
elif isinstance(config, ExportIOConfig):
return ExportSplitParquetReader(config)
else:
raise ValueError(f"Unsupported config context: {type(config).__name__}")
cls = _get_parquet_reader_class_for_config(config, split=True)
return cls(config)

@staticmethod
def _get_tsv_reader(config):
Expand Down Expand Up @@ -223,29 +183,13 @@ def _get_sqmass_writer(config):

@staticmethod
def _get_parquet_writer(config):
if isinstance(config, RunnerIOConfig):
return ParquetScoringWriter(config)
elif isinstance(config, IPFIOConfig):
return IPFParquetWriter(config)
elif isinstance(config, LevelContextIOConfig):
return LevelContextParquetWriter(config)
elif isinstance(config, ExportIOConfig):
return ExportParquetWriter(config)
else:
raise ValueError(f"Unsupported config context: {type(config).__name__}")
cls = _get_parquet_writer_class_for_config(config, split=False)
return cls(config)

@staticmethod
def _get_split_parquet_writer(config):
if isinstance(config, RunnerIOConfig):
return SplitParquetScoringWriter(config)
elif isinstance(config, IPFIOConfig):
return IPFSplitParquetWriter(config)
elif isinstance(config, LevelContextIOConfig):
return LevelContextSplitParquetWriter(config)
elif isinstance(config, ExportIOConfig):
return ExportSplitParquetWriter(config)
else:
raise ValueError(f"Unsupported config context: {type(config).__name__}")
cls = _get_parquet_writer_class_for_config(config, split=True)
return cls(config)

@staticmethod
def _get_tsv_writer(config):
Expand Down
15 changes: 9 additions & 6 deletions pyprophet/io/ipf/parquet.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import os
from typing import Literal
from shutil import copyfile
import pandas as pd
import pyarrow as pa
import duckdb
from typing import Literal

import click
import duckdb
import pandas as pd
from loguru import logger
from ..util import get_parquet_column_names
from .._base import BaseParquetReader, BaseParquetWriter

from ..._config import IPFIOConfig
from .._base import BaseParquetReader, BaseParquetWriter
from ..util import _ensure_pyarrow, get_parquet_column_names

pa, _, _ = _ensure_pyarrow()


class ParquetReader(BaseParquetReader):
Expand Down
15 changes: 8 additions & 7 deletions pyprophet/io/ipf/split_parquet.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import os
import glob
from shutil import copyfile
import os
from typing import Literal
import pandas as pd
import pyarrow as pa
import duckdb

import click
import duckdb
import pandas as pd
from loguru import logger

from ..util import get_parquet_column_names
from .._base import BaseSplitParquetReader, BaseSplitParquetWriter
from ..._config import IPFIOConfig
from .._base import BaseSplitParquetReader, BaseSplitParquetWriter
from ..util import _ensure_pyarrow, get_parquet_column_names

pa, _, _ = _ensure_pyarrow()


class SplitParquetReader(BaseSplitParquetReader):
Expand Down
15 changes: 8 additions & 7 deletions pyprophet/io/levels_context/parquet.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import os
from typing import Literal
from shutil import copyfile
import pandas as pd
import pyarrow as pa
import duckdb

import click
import duckdb
import pandas as pd
from loguru import logger
from ..util import get_parquet_column_names
from .._base import BaseParquetReader, BaseParquetWriter

from ..._config import LevelContextIOConfig
from .._base import BaseParquetReader, BaseParquetWriter
from ..util import _ensure_pyarrow, get_parquet_column_names

pa, _, _ = _ensure_pyarrow()


class ParquetReader(BaseParquetReader):
Expand Down
18 changes: 8 additions & 10 deletions pyprophet/io/levels_context/split_parquet.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import os
import glob
from shutil import copyfile
from typing import Literal
import pandas as pd
import pyarrow as pa
import duckdb
import os

import click
import duckdb
import pandas as pd
from loguru import logger

from .._base import BaseSplitParquetReader, BaseSplitParquetWriter
from ..._config import LevelContextIOConfig
from ..util import (
get_parquet_column_names,
)
from .._base import BaseSplitParquetReader, BaseSplitParquetWriter
from ..util import _ensure_pyarrow, get_parquet_column_names

pa, _, _ = _ensure_pyarrow()


class SplitParquetReader(BaseSplitParquetReader):
Expand Down
13 changes: 8 additions & 5 deletions pyprophet/io/scoring/parquet.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import sys
from shutil import copyfile

import click
import duckdb
import pandas as pd
import polars as pl
import pyarrow as pa
import duckdb
import click
from loguru import logger
from ..util import get_parquet_column_names
from .._base import BaseParquetReader, BaseParquetWriter, RowCountMismatchError

from ..._config import RunnerIOConfig
from .._base import BaseParquetReader, BaseParquetWriter, RowCountMismatchError
from ..util import _ensure_pyarrow, get_parquet_column_names

pa, _, _ = _ensure_pyarrow()


class ParquetReader(BaseParquetReader):
Expand Down
15 changes: 7 additions & 8 deletions pyprophet/io/scoring/split_parquet.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import os
import sys
import glob
from shutil import copyfile
import pandas as pd
import pyarrow as pa
import duckdb

import click
import duckdb
import pandas as pd
from loguru import logger

from ..util import get_parquet_column_names
from .._base import BaseSplitParquetReader, BaseSplitParquetWriter
from ..._config import RunnerIOConfig
from .._base import BaseSplitParquetReader, BaseSplitParquetWriter
from ..util import _ensure_pyarrow, get_parquet_column_names

pa, _, _ = _ensure_pyarrow()


class SplitParquetReader(BaseSplitParquetReader):
Expand Down
Loading