Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Install uv
uses: astral-sh/setup-uv@v1
with:
version: "latest"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
pip install -e .
run: uv sync --dev

- name: Set PYTHONPATH
run: echo "PYTHONPATH=$(pwd):$PYTHONPATH" >> $GITHUB_ENV
Expand All @@ -39,11 +41,11 @@ jobs:
fi

- name: Run tests with coverage
run: pytest --cov ffx
run: make test-cov

- name: Upload coverage to Coveralls
if: matrix.python-version == '3.11'
uses: coverallsapp/github-action@v2
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
coveralls-endpoint: https://coveralls.io
run: uv run coveralls
env:
COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ share/python-wheels/
*.egg
MANIFEST

# uv
.uv/

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
Expand Down
10 changes: 0 additions & 10 deletions .isort.cfg

This file was deleted.

20 changes: 0 additions & 20 deletions .pylintrc

This file was deleted.

4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changes

## 2.1.0

- Migrated FFX to uv / ruff and removed Travis CI for Github Actions

## 2.0.1 / 2.0.2

- Fix ImportError introduced in 2.0.0
Expand Down
45 changes: 32 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,38 @@
pylint:
pylint -j 0 `git ls-files '*.py'` --rcfile=.pylintrc
# Install dependencies
install:
uv sync

black:
black ffx --line-length 100 --target-version py27 --target-version py35 --target-version py36 --target-version py37 --target-version py38 -S --fast --exclude "build/|buck-out/|dist/|_build/|\.eggs/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/"
install-dev:
uv sync --dev

isort:
isort -rc -y
lint:
uv run ruff check ffx

validate: pylint isort black
format:
uv run ruff format ffx

pypi:
rm -rf dist/*
python setup.py sdist bdist_egg bdist_wheel
twine upload dist/*
#twine upload --repository-url https://test.pypi.org/legacy/ dist/* # testpypi
typecheck:
uv run ty check ffx

validate: lint format typecheck

# Testing
test:
pytest ffx_tests/
uv run pytest ffx_tests/

test-cov:
uv run pytest ffx_tests/ --cov=ffx --cov-report=term-missing --cov-report=xml

# Build and publish
build:
uv build

pypi:
uv build
uv publish

# Clean
clean:
rm -rf dist/
rm -rf build/
rm -rf *.egg-info/
7 changes: 0 additions & 7 deletions dev-requirements.txt

This file was deleted.

2 changes: 1 addition & 1 deletion ffx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .api import FFXRegressor, run

__all__ = ['run', 'FFXRegressor']
__all__ = ["run", "FFXRegressor"]
19 changes: 13 additions & 6 deletions ffx/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'''api.py defines user interfaces to FFX. run() runs the complete method.
"""api.py defines user interfaces to FFX. run() runs the complete method.
FFXRegressor is a Scikit-learn style regressor.
'''
"""

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils import check_array, check_X_y
Expand All @@ -10,17 +10,20 @@
def run(train_X, train_y, test_X, test_y, varnames=None, verbose=False):
from .core import MultiFFXModelFactory

return MultiFFXModelFactory().build(train_X, train_y, test_X, test_y, varnames, verbose)
return MultiFFXModelFactory().build(
train_X, train_y, test_X, test_y, varnames, verbose
)


class FFXRegressor(BaseEstimator, RegressorMixin):
'''This class provides a Scikit-learn style estimator.'''
class FFXRegressor(RegressorMixin, BaseEstimator):
"""This class provides a Scikit-learn style estimator."""

def fit(self, X, y):
X, y = check_X_y(X, y, y_numeric=True, multi_output=False)
self.n_features_in_ = X.shape[1] # pylint: disable=attribute-defined-outside-init
# if X is a Pandas DataFrame, we don't have to pass in varnames.
# otherwise we make up placeholders.
if hasattr(X, 'columns'):
if hasattr(X, "columns"):
varnames = None
else:
varnames = ["X%d" % i for i in range(len(X))]
Expand All @@ -33,6 +36,10 @@ def fit(self, X, y):
def predict(self, X):
check_is_fitted(self, "model_")
X = check_array(X, accept_sparse=False)
if X.shape[1] != self.n_features_in_:
raise ValueError(
f"X has {X.shape[1]} features, but {self.__class__.__name__} is expecting {self.n_features_in_} features as input."
)
return self.model_.predict(X)

def complexity(self):
Expand Down
108 changes: 61 additions & 47 deletions ffx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@

@click.group()
def main():
'''Fast Function Extraction (FFX) toolkit.
'''
"""Fast Function Extraction (FFX) toolkit."""


@main.command()
@click.argument('x_file', type=click.Path(exists=True))
@click.argument('y_file', type=click.Path(exists=True))
@click.argument("x_file", type=click.Path(exists=True))
@click.argument("y_file", type=click.Path(exists=True))
def splitdata(x_file, y_file):
'''Usage: ffx splitdata INPUTS_FILE[.csv/.txt] OUTPUTS_FILE[.csv/.txt]
"""Usage: ffx splitdata INPUTS_FILE[.csv/.txt] OUTPUTS_FILE[.csv/.txt]

Given csv-formatted inputs and outputs files, splits them into training and testing data files
of the form INPUTS_FILE_train.csv, OUTPUTS_FILE_train.csv, INPUTS_FILE_test.csv,
Expand All @@ -29,40 +28,47 @@ def splitdata(x_file, y_file):
In the csv files, there is one column for each sample point. The inputs files have one row for
each input variable. The outputs files have just one row total, because the output is scalar.
Values in a given row are separated by spaces.
'''
if not (x_file.endswith('.csv') or x_file.endswith('.txt')):
print('INPUTS_FILE file \'%s\' needs to end with .csv or .txt.' % x_file)
"""
if not (x_file.endswith(".csv") or x_file.endswith(".txt")):
print("INPUTS_FILE file '%s' needs to end with .csv or .txt." % x_file)
return

if not (y_file.endswith('.csv') or y_file.endswith('.txt')):
print('OUTPUTS_FILE file \'%s\' needs to end with .csv or .txt.' % y_file)
if not (y_file.endswith(".csv") or y_file.endswith(".txt")):
print("OUTPUTS_FILE file '%s' needs to end with .csv or .txt." % y_file)
return

# create the target output filenames, and ensure they don't exist
join = lambda n, prefix: os.path.join(os.path.dirname(n), prefix + os.path.basename(n))
train_X_file = join(x_file, 'train_')
train_y_file = join(y_file, 'train_')
test_X_file = join(x_file, 'test_')
test_y_file = join(y_file, 'test_')
def join(n, prefix):
return os.path.join(os.path.dirname(n), prefix + os.path.basename(n))

train_X_file = join(x_file, "train_")
train_y_file = join(y_file, "train_")
test_X_file = join(x_file, "test_")
test_y_file = join(y_file, "test_")

for newfile in [train_X_file, train_y_file, test_X_file, test_y_file]:
if os.path.exists(newfile):
print('New file \'%s\' exists, and should not. Early exit.' % newfile)
print("New file '%s' exists, and should not. Early exit." % newfile)
return

print('Begin ffx splitdata. INPUTS_FILE.csv=%s, OUTPUTS_FILE.csv=%s' % (x_file, y_file))
print(
"Begin ffx splitdata. INPUTS_FILE.csv=%s, OUTPUTS_FILE.csv=%s"
% (x_file, y_file)
)

X = pd.read_csv(x_file) # [sample_i][var_i] : float
y = pd.read_csv(y_file) # [sample_i] : float

if X.shape[0] != y.shape[0]:
X = X.T
assert X.shape[0] == y.shape[0], 'Error: X shape and y shape do not match. Early exit.'
assert X.shape[0] == y.shape[0], (
"Error: X shape and y shape do not match. Early exit."
)

# create train/test data from X,y
I = np.argsort(y)
test_I, train_I = [], []
for (loc, i) in enumerate(I):
for loc, i in enumerate(I):
if loc % 4 == 0:
test_I.append(i)
else:
Expand All @@ -74,42 +80,41 @@ def splitdata(x_file, y_file):
test_y = np.take(y, test_I)

print(
'There will be %d samples in training data, and %d samples in test data'
"There will be %d samples in training data, and %d samples in test data"
% (len(train_y), len(test_y))
)

delimiter = ',' if x_file.endswith('.csv') else '\t'
delimiter = "," if x_file.endswith(".csv") else "\t"
np.savetxt(train_X_file, train_X, delimiter=delimiter)
np.savetxt(train_y_file, train_y, delimiter=delimiter)
np.savetxt(test_X_file, test_X, delimiter=delimiter)
np.savetxt(test_y_file, test_y, delimiter=delimiter)

print('Created these files:')
print(' Training inputs: %s' % train_X_file)
print(' Training outputs: %s' % train_y_file)
print(' Testing inputs: %s' % test_X_file)
print(' Testing outputs: %s' % test_y_file)
print("Created these files:")
print(" Training inputs: %s" % train_X_file)
print(" Training outputs: %s" % train_y_file)
print(" Testing inputs: %s" % test_X_file)
print(" Testing outputs: %s" % test_y_file)


@main.command()
@click.argument('samples-file', type=click.Path(exists=True))
@click.argument("samples-file", type=click.Path(exists=True))
def aboutdata(samples_file):
'''Simply prints the number of variables and number of samples for the given file
'''
"""Simply prints the number of variables and number of samples for the given file"""
d = pd.read_csv(samples_file)
print('Data file: %s' % samples_file)
print('Number of input variables: %d' % d.shape[1])
print('Number of input samples: %d' % d.shape[0])
print("Data file: %s" % samples_file)
print("Number of input variables: %d" % d.shape[1])
print("Number of input samples: %d" % d.shape[0])


@main.command()
@click.argument('train-x', type=click.Path(exists=True))
@click.argument('train-y', type=click.Path(exists=True))
@click.argument('test-x', type=click.Path(exists=True))
@click.argument('test-y', type=click.Path(exists=True))
@click.argument('varnames', type=click.Path())
@click.argument("train-x", type=click.Path(exists=True))
@click.argument("train-y", type=click.Path(exists=True))
@click.argument("test-x", type=click.Path(exists=True))
@click.argument("test-y", type=click.Path(exists=True))
@click.argument("varnames", type=click.Path())
def testffx(train_x, train_y, test_x, test_y, varnames):
'''Usage: runffx test TRAIN_IN.csv TRAIN_OUT.csv TEST_IN.csv TEST_OUT.csv [VARNAMES.csv]
"""Usage: runffx test TRAIN_IN.csv TRAIN_OUT.csv TEST_IN.csv TEST_OUT.csv [VARNAMES.csv]

- Builds a model from training data TRAIN_IN.csv and TRAIN_OUT.csv.
- Computes & prints test nmse using test data TEST_IN.csv TEST_OUT.csv.
Expand All @@ -125,27 +130,36 @@ def testffx(train_x, train_y, test_x, test_y, varnames):
In the training and test files, there is one column for each sample point. The inputs
files have one row for each input variable. The outputs files have just one row total,
because the output is scalar. Values in a given row are separated by spaces.
'''
print('Begin ffx test.')
"""
print("Begin ffx test.")

# get X/y
train_X, train_y, test_X, test_y = [pd.read_csv(f) for f in (train_x, train_y, test_x, test_y)]
train_X, train_y, test_X, test_y = [
pd.read_csv(f) for f in (train_x, train_y, test_x, test_y)
]

# get varnames
varnames = pd.read_csv(varnames) if varnames else ['x%d' % i for i in range(train_X.shape[1])]
varnames = (
pd.read_csv(varnames)
if varnames
else ["x%d" % i for i in range(train_X.shape[1])]
)

# build models
with time_execution_scope() as timer_result:
models = run(train_X, train_y, test_X, test_y, varnames)

output_csv = 'pareto_front_%s.csv' % str(int(timer_result.start_time))
output_csv = "pareto_front_%s.csv" % str(int(timer_result.start_time))
pd.DataFrame(
[[model.numBases(), (model.test_nmse * 100.0), model] for model in models],
columns=['Num Bases', 'Test error (%)', 'Model'],
).to_csv(output_csv, encoding='utf-8')
columns=["Num Bases", "Test error (%)", "Model"],
).to_csv(output_csv, encoding="utf-8")

print('Done. Runtime: %.1f seconds. Results are in: %s' % (timer_result.seconds, output_csv))
print(
"Done. Runtime: %.1f seconds. Results are in: %s"
% (timer_result.seconds, output_csv)
)


if __name__ == '__main__':
if __name__ == "__main__":
main() # pylint:disable=no-value-for-parameter
Loading