Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ orbs:
# Orb commands and jobs help you with common scripting around a language/tool
# so you dont have to copy and paste it everywhere.
# See the orb documentation here: https://circleci.com/developer/orbs/orb/circleci/python
python: circleci/python@1.5.0
python: circleci/python@2.1.1

# Define a job to be invoked later in a workflow.
# See: https://circleci.com/docs/2.0/configuration-reference/#jobs
Expand All @@ -33,6 +33,7 @@ jobs:
name: build clustpy
command: |
python -m pip install --upgrade pip
pip install build Cython numpy
pip install pytest
pip install -e .
- run:
Expand Down
37 changes: 0 additions & 37 deletions .github/workflows/lint.yml

This file was deleted.

11 changes: 6 additions & 5 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,28 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Display Python version
run: python -c "import sys; print(sys.version)"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build

- name: Build package
run: python -m build --sdist

- name: Publish package to Test PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/

- name: Publish package to PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
Expand Down
54 changes: 42 additions & 12 deletions .github/workflows/test-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,67 @@ permissions:
contents: read

jobs:
build:
lint:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip' # Speeds up flake8 installation

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
build:
needs: lint # This job only starts if 'lint' passes
runs-on: ubuntu-latest
strategy:
# You can use PyPy versions in python-version.
# For example, pypy-2.7 and pypy-3.8
fail-fast: false # don't break 3.12 if 3.10 fails
matrix:
python-version: ["3.12", "3.10"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

# You can test your matrix by printing the current Python version
- name: Display Python version
run: python -c "import sys; print(sys.version)"
cache: 'pip' # Automatically caches your dependencies

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pytest-cov
pip install -e .[full]
- name: Test with pytest
- name: Test with pytest (with codecov)
if: ${{ matrix.python-version == '3.10' }}
run: |
pytest -m "not largedata" --cov --cov-report=xml
- name: Test with pytest (without codecov)
if: ${{ matrix.python-version != '3.10' }}
run: |
pytest -m "not largedata" --cov
pytest -m "not largedata"
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4.0.1
if: ${{ matrix.python-version == '3.10' }}
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true # Helpful to know if upload failed
22 changes: 13 additions & 9 deletions clustpy/data/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
except:
print(
"[WARNING] Could not import nltk in clustpy.data.real_world_data to use the SnowballStemmer. Please install nltk by 'pip install nltk' if necessary")
try:
from PIL import Image
except:
print(
"[WARNING] Could not import PIL in clustpy.data.real_world_data. Please install PIL by 'pip install Pillow' if necessary")
import numpy as np
import urllib.request
import os
from pathlib import Path
import ssl
from PIL import Image
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
from sklearn.feature_selection import VarianceThreshold
from sklearn.datasets import fetch_file


DEFAULT_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_datafiles")
Expand Down Expand Up @@ -63,11 +66,11 @@ def _download_file(file_url: str, filename_local: str) -> None:
filename_local : str
local name of the file after it has been downloaded
"""
local_path = Path(filename_local)
local_dir = local_path.parent
local_filename = local_path.name
print("Downloading data set from {0} to {1}".format(file_url, filename_local))
default_ssl = ssl._create_default_https_context
ssl._create_default_https_context = ssl._create_unverified_context
urllib.request.urlretrieve(file_url, filename_local)
ssl._create_default_https_context = default_ssl
fetch_file(file_url, folder=local_dir, local_filename=local_filename)


def _download_file_from_google_drive(file_id: str, filename_local: str, chunk_size: int = 32768) -> None:
Expand Down Expand Up @@ -187,7 +190,7 @@ def _load_image_data(image: str, image_size: tuple, color_image: bool) -> np.nda
image_data : np.ndarray
The numpy array containing the image data
"""
if type(image) is str:
if isinstance(image, str):
pil_image = Image.open(image)
else:
pil_image = Image.fromarray(np.uint8(image))
Expand All @@ -196,7 +199,8 @@ def _load_image_data(image: str, image_size: tuple, color_image: bool) -> np.nda
# Convert to coherent size
if image_size is not None:
pil_image = pil_image.resize(image_size)
image_data = np.asarray(pil_image)
image_data = np.array(pil_image).copy()
pil_image.close()
assert image_size is None or image_data.shape == (
image_size[0], image_size[1], 3), "Size of image is not correct. Should be {0} but is {1}".format(image_size,
image_data.shape)
Expand Down
8 changes: 2 additions & 6 deletions clustpy/data/real_torchvision_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import torchvision
import torch
import numpy as np
import ssl
from clustpy.data._utils import _get_download_dir, _load_image_data, flatten_images
from sklearn.datasets._base import Bunch

Expand Down Expand Up @@ -45,7 +44,7 @@ def _get_data_and_labels(dataset: torchvision.datasets.VisionDataset, image_size
labels.append(label)
image_data = _load_image_data(path, image_size, True)
data_list.append(image_data)
# Convert data form list to numpy array
# Convert data from list to numpy array
data = np.array(data_list)
labels = np.array(labels)
if type(data) is np.ndarray:
Expand Down Expand Up @@ -89,8 +88,6 @@ def _load_torch_image_data(data_source: torchvision.datasets.VisionDataset, subs
assert subset in ["all", "train",
"test"], "subset must match 'all', 'train' or 'test'. Your input {0}".format(subset)
# Get data from source
default_ssl = ssl._create_default_https_context
ssl._create_default_https_context = ssl._create_unverified_context
if subset == "all" or subset == "train":
# Load training data
if uses_train_param:
Expand All @@ -117,7 +114,6 @@ def _load_torch_image_data(data_source: torchvision.datasets.VisionDataset, subs
# Convert data to float and labels to int
data = data.float()
labels = labels.int()
ssl._create_default_https_context = default_ssl
# Check data dimensions
if data.dim() < 3 or data.dim() > 5:
raise Exception(
Expand All @@ -137,7 +133,7 @@ def _load_torch_image_data(data_source: torchvision.datasets.VisionDataset, subs
# Some dataset (e.g., SVHN) do not have the class information included
if hasattr(dataset, "classes"):
return Bunch(dataset_name=dataset.__class__.__name__, data=data_flatten, target=labels_numpy,
images=data_image, image_format=image_format, classes=dataset.classes)
images=data_image, image_format=image_format, classes=dataset.classes.copy())
else:
return Bunch(dataset_name=dataset.__class__.__name__, data=data_flatten, target=labels_numpy,
images=data_image, image_format=image_format)
Expand Down
12 changes: 3 additions & 9 deletions clustpy/data/real_uci_data.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
try:
from PIL import Image
except:
print(
"[WARNING] Could not import PIL in clustpy.data.real_world_data. Please install PIL by 'pip install Pillow' if necessary")
from clustpy.data._utils import _download_file, _get_download_dir, _decompress_z_file, _load_data_file, flatten_images, _transform_text_data
from clustpy.data._utils import _download_file, _get_download_dir, _decompress_z_file, _load_data_file, flatten_images, _transform_text_data, _load_image_data
import os
import numpy as np
import zipfile
Expand Down Expand Up @@ -1167,8 +1162,7 @@ def load_cmu_faces(return_X_y: bool = False, downloads_path: str = None) -> Bunc
if not image.endswith("_4.pgm"):
continue
# get image data
image_data = Image.open(path_images + "/" + image)
image_array = np.array(image_data)
image_array = _load_image_data(path_images + "/" + image, None, False)
# Get labels
name_parts = image.split("_")
user_id = np.argwhere(names == name_parts[0])[0][0]
Expand All @@ -1188,7 +1182,7 @@ def load_cmu_faces(return_X_y: bool = False, downloads_path: str = None) -> Bunc
return data_flatten, labels
else:
return Bunch(dataset_name="CMUFace", data=data_flatten, target=labels, images=data_image, image_format="HW",
classes=[names, positions, expressions, eyes])
classes=(names, positions, expressions, eyes))


def load_gene_expression_cancer_rna_seq(return_X_y: bool = False, downloads_path: str = None):
Expand Down
Loading
Loading