ffx-org · natekupp · Jun 28, 2025 · Jun 28, 2025 · Jun 28, 2025 · Jun 28, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,11 +21,13 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
+    - name: Install uv
+      uses: astral-sh/setup-uv@v1
+      with:
+        version: "latest"
+
     - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r dev-requirements.txt
-        pip install -e .
+      run: uv sync --dev
 
     - name: Set PYTHONPATH
       run: echo "PYTHONPATH=$(pwd):$PYTHONPATH" >> $GITHUB_ENV
@@ -39,11 +41,11 @@ jobs:
         fi
 
     - name: Run tests with coverage
-      run: pytest --cov ffx
+      run: make test-cov
 
     - name: Upload coverage to Coveralls
       if: matrix.python-version == '3.11'
-      uses: coverallsapp/github-action@v2
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        coveralls-endpoint: https://coveralls.io
+      run: uv run coveralls
+      env:
+        COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -27,6 +27,9 @@ share/python-wheels/
 *.egg
 MANIFEST
 
+# uv
+.uv/
+
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.

diff --git a/.isort.cfg b/.isort.cfg
diff --git a/.pylintrc b/.pylintrc
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,9 @@
 # Changes
 
+## 2.1.0
+
+- Migrated FFX to uv / ruff and removed Travis CI for Github Actions
+
 ## 2.0.1 / 2.0.2
 
 - Fix ImportError introduced in 2.0.0

diff --git a/Makefile b/Makefile
@@ -1,19 +1,38 @@
-pylint:
-	pylint -j 0 `git ls-files '*.py'` --rcfile=.pylintrc
+# Install dependencies
+install:
+	uv sync
 
-black:
-	black ffx --line-length 100 --target-version py27 --target-version py35 --target-version py36 --target-version py37 --target-version py38 -S --fast --exclude "build/|buck-out/|dist/|_build/|\.eggs/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/"
+install-dev:
+	uv sync --dev
 
-isort:
-	isort -rc -y
+lint:
+	uv run ruff check ffx
 
-validate: pylint isort black
+format:
+	uv run ruff format ffx
 
-pypi:
-	rm -rf dist/*
-	python setup.py sdist bdist_egg bdist_wheel
-	twine upload dist/*
-	#twine upload --repository-url https://test.pypi.org/legacy/ dist/* # testpypi
+typecheck:
+	uv run ty check ffx
+
+validate: lint format typecheck
 
+# Testing
 test:
-	pytest ffx_tests/
+	uv run pytest ffx_tests/
+
+test-cov:
+	uv run pytest ffx_tests/ --cov=ffx --cov-report=term-missing --cov-report=xml
+
+# Build and publish
+build:
+	uv build
+
+pypi:
+	uv build
+	uv publish
+
+# Clean
+clean:
+	rm -rf dist/
+	rm -rf build/
+	rm -rf *.egg-info/
diff --git a/dev-requirements.txt b/dev-requirements.txt
diff --git a/ffx/__init__.py b/ffx/__init__.py
@@ -1,3 +1,3 @@
 from .api import FFXRegressor, run
 
-__all__ = ['run', 'FFXRegressor']
+__all__ = ["run", "FFXRegressor"]
diff --git a/ffx/api.py b/ffx/api.py
@@ -1,6 +1,6 @@
-'''api.py defines user interfaces to FFX. run() runs the complete method.
+"""api.py defines user interfaces to FFX. run() runs the complete method.
 FFXRegressor is a Scikit-learn style regressor.
-'''
+"""
 
 from sklearn.base import BaseEstimator, RegressorMixin
 from sklearn.utils import check_array, check_X_y
@@ -10,17 +10,20 @@
 def run(train_X, train_y, test_X, test_y, varnames=None, verbose=False):
     from .core import MultiFFXModelFactory
 
-    return MultiFFXModelFactory().build(train_X, train_y, test_X, test_y, varnames, verbose)
+    return MultiFFXModelFactory().build(
+        train_X, train_y, test_X, test_y, varnames, verbose
+    )
 
 
-class FFXRegressor(BaseEstimator, RegressorMixin):
-    '''This class provides a Scikit-learn style estimator.'''
+class FFXRegressor(RegressorMixin, BaseEstimator):
+    """This class provides a Scikit-learn style estimator."""
 
     def fit(self, X, y):
         X, y = check_X_y(X, y, y_numeric=True, multi_output=False)
+        self.n_features_in_ = X.shape[1]  # pylint: disable=attribute-defined-outside-init
         # if X is a Pandas DataFrame, we don't have to pass in varnames.
         # otherwise we make up placeholders.
-        if hasattr(X, 'columns'):
+        if hasattr(X, "columns"):
             varnames = None
         else:
             varnames = ["X%d" % i for i in range(len(X))]
@@ -33,6 +36,10 @@ def fit(self, X, y):
     def predict(self, X):
         check_is_fitted(self, "model_")
         X = check_array(X, accept_sparse=False)
+        if X.shape[1] != self.n_features_in_:
+            raise ValueError(
+                f"X has {X.shape[1]} features, but {self.__class__.__name__} is expecting {self.n_features_in_} features as input."
+            )
         return self.model_.predict(X)
 
     def complexity(self):

diff --git a/ffx/cli.py b/ffx/cli.py
@@ -10,15 +10,14 @@
 
 @click.group()
 def main():
-    '''Fast Function Extraction (FFX) toolkit.
-    '''
+    """Fast Function Extraction (FFX) toolkit."""
 
 
 @main.command()
-@click.argument('x_file', type=click.Path(exists=True))
-@click.argument('y_file', type=click.Path(exists=True))
+@click.argument("x_file", type=click.Path(exists=True))
+@click.argument("y_file", type=click.Path(exists=True))
 def splitdata(x_file, y_file):
-    '''Usage: ffx splitdata INPUTS_FILE[.csv/.txt] OUTPUTS_FILE[.csv/.txt]
+    """Usage: ffx splitdata INPUTS_FILE[.csv/.txt] OUTPUTS_FILE[.csv/.txt]
 
     Given csv-formatted inputs and outputs files, splits them into training and testing data files
     of the form INPUTS_FILE_train.csv, OUTPUTS_FILE_train.csv, INPUTS_FILE_test.csv,
@@ -29,40 +28,47 @@ def splitdata(x_file, y_file):
     In the csv files, there is one column for each sample point.  The inputs files have one row for
     each input variable.  The outputs files have just one row total, because the output is scalar.
     Values in a given row are separated by spaces.
-    '''
-    if not (x_file.endswith('.csv') or x_file.endswith('.txt')):
-        print('INPUTS_FILE file \'%s\' needs to end with .csv or .txt.' % x_file)
+    """
+    if not (x_file.endswith(".csv") or x_file.endswith(".txt")):
+        print("INPUTS_FILE file '%s' needs to end with .csv or .txt." % x_file)
         return
 
-    if not (y_file.endswith('.csv') or y_file.endswith('.txt')):
-        print('OUTPUTS_FILE file \'%s\' needs to end with .csv or .txt.' % y_file)
+    if not (y_file.endswith(".csv") or y_file.endswith(".txt")):
+        print("OUTPUTS_FILE file '%s' needs to end with .csv or .txt." % y_file)
         return
 
     # create the target output filenames, and ensure they don't exist
-    join = lambda n, prefix: os.path.join(os.path.dirname(n), prefix + os.path.basename(n))
-    train_X_file = join(x_file, 'train_')
-    train_y_file = join(y_file, 'train_')
-    test_X_file = join(x_file, 'test_')
-    test_y_file = join(y_file, 'test_')
+    def join(n, prefix):
+        return os.path.join(os.path.dirname(n), prefix + os.path.basename(n))
+
+    train_X_file = join(x_file, "train_")
+    train_y_file = join(y_file, "train_")
+    test_X_file = join(x_file, "test_")
+    test_y_file = join(y_file, "test_")
 
     for newfile in [train_X_file, train_y_file, test_X_file, test_y_file]:
         if os.path.exists(newfile):
-            print('New file \'%s\' exists, and should not. Early exit.' % newfile)
+            print("New file '%s' exists, and should not. Early exit." % newfile)
             return
 
-    print('Begin ffx splitdata. INPUTS_FILE.csv=%s, OUTPUTS_FILE.csv=%s' % (x_file, y_file))
+    print(
+        "Begin ffx splitdata. INPUTS_FILE.csv=%s, OUTPUTS_FILE.csv=%s"
+        % (x_file, y_file)
+    )
 
     X = pd.read_csv(x_file)  # [sample_i][var_i] : float
     y = pd.read_csv(y_file)  # [sample_i] : float
 
     if X.shape[0] != y.shape[0]:
         X = X.T
-    assert X.shape[0] == y.shape[0], 'Error: X shape and y shape do not match. Early exit.'
+    assert X.shape[0] == y.shape[0], (
+        "Error: X shape and y shape do not match. Early exit."
+    )
 
     # create train/test data from X,y
     I = np.argsort(y)
     test_I, train_I = [], []
-    for (loc, i) in enumerate(I):
+    for loc, i in enumerate(I):
         if loc % 4 == 0:
             test_I.append(i)
         else:
@@ -74,42 +80,41 @@ def splitdata(x_file, y_file):
     test_y = np.take(y, test_I)
 
     print(
-        'There will be %d samples in training data, and %d samples in test data'
+        "There will be %d samples in training data, and %d samples in test data"
         % (len(train_y), len(test_y))
     )
 
-    delimiter = ',' if x_file.endswith('.csv') else '\t'
+    delimiter = "," if x_file.endswith(".csv") else "\t"
     np.savetxt(train_X_file, train_X, delimiter=delimiter)
     np.savetxt(train_y_file, train_y, delimiter=delimiter)
     np.savetxt(test_X_file, test_X, delimiter=delimiter)
     np.savetxt(test_y_file, test_y, delimiter=delimiter)
 
-    print('Created these files:')
-    print('  Training inputs:  %s' % train_X_file)
-    print('  Training outputs: %s' % train_y_file)
-    print('  Testing inputs:   %s' % test_X_file)
-    print('  Testing outputs:  %s' % test_y_file)
+    print("Created these files:")
+    print("  Training inputs:  %s" % train_X_file)
+    print("  Training outputs: %s" % train_y_file)
+    print("  Testing inputs:   %s" % test_X_file)
+    print("  Testing outputs:  %s" % test_y_file)
 
 
 @main.command()
-@click.argument('samples-file', type=click.Path(exists=True))
+@click.argument("samples-file", type=click.Path(exists=True))
 def aboutdata(samples_file):
-    '''Simply prints the number of variables and number of samples for the given file
-    '''
+    """Simply prints the number of variables and number of samples for the given file"""
     d = pd.read_csv(samples_file)
-    print('Data file: %s' % samples_file)
-    print('Number of input variables: %d' % d.shape[1])
-    print('Number of input samples: %d' % d.shape[0])
+    print("Data file: %s" % samples_file)
+    print("Number of input variables: %d" % d.shape[1])
+    print("Number of input samples: %d" % d.shape[0])
 
 
 @main.command()
-@click.argument('train-x', type=click.Path(exists=True))
-@click.argument('train-y', type=click.Path(exists=True))
-@click.argument('test-x', type=click.Path(exists=True))
-@click.argument('test-y', type=click.Path(exists=True))
-@click.argument('varnames', type=click.Path())
+@click.argument("train-x", type=click.Path(exists=True))
+@click.argument("train-y", type=click.Path(exists=True))
+@click.argument("test-x", type=click.Path(exists=True))
+@click.argument("test-y", type=click.Path(exists=True))
+@click.argument("varnames", type=click.Path())
 def testffx(train_x, train_y, test_x, test_y, varnames):
-    '''Usage: runffx test TRAIN_IN.csv TRAIN_OUT.csv TEST_IN.csv TEST_OUT.csv [VARNAMES.csv]
+    """Usage: runffx test TRAIN_IN.csv TRAIN_OUT.csv TEST_IN.csv TEST_OUT.csv [VARNAMES.csv]
 
     - Builds a model from training data TRAIN_IN.csv and TRAIN_OUT.csv.
     - Computes & prints test nmse using test data TEST_IN.csv TEST_OUT.csv.
@@ -125,27 +130,36 @@ def testffx(train_x, train_y, test_x, test_y, varnames):
     In the training and test files, there is one column for each sample point.  The inputs
     files have one row for each input variable.  The outputs files have just one row total,
     because the output is scalar.  Values in a given row are separated by spaces.
-    '''
-    print('Begin ffx test.')
+    """
+    print("Begin ffx test.")
 
     # get X/y
-    train_X, train_y, test_X, test_y = [pd.read_csv(f) for f in (train_x, train_y, test_x, test_y)]
+    train_X, train_y, test_X, test_y = [
+        pd.read_csv(f) for f in (train_x, train_y, test_x, test_y)
+    ]
 
     # get varnames
-    varnames = pd.read_csv(varnames) if varnames else ['x%d' % i for i in range(train_X.shape[1])]
+    varnames = (
+        pd.read_csv(varnames)
+        if varnames
+        else ["x%d" % i for i in range(train_X.shape[1])]
+    )
 
     # build models
     with time_execution_scope() as timer_result:
         models = run(train_X, train_y, test_X, test_y, varnames)
 
-        output_csv = 'pareto_front_%s.csv' % str(int(timer_result.start_time))
+        output_csv = "pareto_front_%s.csv" % str(int(timer_result.start_time))
         pd.DataFrame(
             [[model.numBases(), (model.test_nmse * 100.0), model] for model in models],
-            columns=['Num Bases', 'Test error (%)', 'Model'],
-        ).to_csv(output_csv, encoding='utf-8')
+            columns=["Num Bases", "Test error (%)", "Model"],
+        ).to_csv(output_csv, encoding="utf-8")
 
-    print('Done.  Runtime: %.1f seconds.  Results are in: %s' % (timer_result.seconds, output_csv))
+    print(
+        "Done.  Runtime: %.1f seconds.  Results are in: %s"
+        % (timer_result.seconds, output_csv)
+    )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()  # pylint:disable=no-value-for-parameter