Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
with:
# Pin ruff version to make sure we do not break our builds at the
# worst times
version: "0.14.5"
version: "0.14.7"

test:
# name: Test (${{ matrix.python-version }}, ${{ matrix.os }})
Expand All @@ -27,7 +27,7 @@ jobs:
fail-fast: false
matrix:
# os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ['3.9', '3.10', '3.11', '3.12']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']

defaults:
run:
Expand Down
6 changes: 3 additions & 3 deletions condarecipe/larray/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ test:
- pytest --pyargs larray

about:
home: http://github.com/larray-project/larray
home: https://github.com/larray-project/larray
license: GPL-3.0-only
license_family: GPL
license_file: LICENSE
summary: "N-dimensional labelled arrays in Python"
description: |
LArray is an open source Python library that aims to provide tools for
easy exploration and manipulation of N-dimensional labelled data structures.
doc_url: http://larray.readthedocs.io/
dev_url: http://github.com/larray-project/larray
doc_url: https://larray.readthedocs.io/
dev_url: https://github.com/larray-project/larray

extra:
recipe-maintainers:
Expand Down
5 changes: 5 additions & 0 deletions doc/source/changes/version_0_35.rst.inc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ Backward incompatible changes
New features
^^^^^^^^^^^^

* added support for Python 3.13 (closes :issue:`1128`).

* :py:obj:`Array.plot()` now has an ``animate`` argument to produce animated
plots. The argument takes an axis (it also supports several axes but that is
rarely useful) and will create an animation, with one image per label of that
Expand Down Expand Up @@ -92,6 +94,9 @@ Miscellaneous improvements

>>> arr.plot.bar(stack='gender')

* :py:obj:`from_frame()` and :py:obj:`asarray()` now support Pandas DataFrames
with more than one level (row) of columns (closes :issue:`466`).

* :py:obj:`Array.to_frame()` gained an ``ncolaxes`` argument to control how many
axes should be used as columns (defaults to 1, as before).

Expand Down
5 changes: 4 additions & 1 deletion larray/core/axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2302,7 +2302,10 @@ def index(self, axis, compatible=False) -> int:
name = axis
if name is None:
raise ValueError(f"{axis!r} is not in collection")
return self.names.index(name)
try:
return self.names.index(name)
except ValueError:
raise ValueError(f"axis {name!r} is not in collection")

# XXX: we might want to return a new AxisCollection (same question for other inplace operations:
# append, extend, pop, __delitem__, __setitem__)
Expand Down
79 changes: 54 additions & 25 deletions larray/inout/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from larray.core.array import Array
from larray.core.axis import Axis, AxisCollection
from larray.core.constants import nan
from larray.util.misc import unique_list


def decode(s, encoding='utf-8', errors='strict'):
Expand Down Expand Up @@ -46,34 +45,51 @@ def index_to_labels(idx, sort=True):
"""
if isinstance(idx, pd.MultiIndex):
if sort:
return list(idx.levels)
return list(idx.levels) # list of pd.Index
else:
return [unique_list(idx.get_level_values(label)) for label in range(idx.nlevels)]
# requires Pandas >= 0.23 (and it does NOT sort the values)
# TODO: unsure to_list is necessary (larray tests pass without it
# but I am not sure this code path is covered by tests)
# and there might be a subtle difference. The type
# of the returned object without to_list() is pd.Index
return [idx.unique(level).to_list() for level in range(idx.nlevels)]
else:
assert isinstance(idx, pd.Index)
labels = list(idx.values)
return [sorted(labels) if sort else labels]


def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
idx = df.index
labels = index_to_labels(idx, sort=sort_rows)
def product_index(idx, sort=False):
"""
Converts a pandas (Multi)Index to an (Multi)Index with a cartesian
product of the labels present in each level
"""
labels = index_to_labels(idx, sort=sort)
if isinstance(idx, pd.MultiIndex):
if sort_rows:
new_index = pd.MultiIndex.from_product(labels)
else:
new_index = pd.MultiIndex.from_tuples(list(product(*labels)))
return pd.MultiIndex.from_product(labels), labels
else:
if sort_rows:
new_index = pd.Index(labels[0], name=idx.name)
assert isinstance(idx, pd.Index)
if sort:
return pd.Index(labels[0], name=idx.name), labels
else:
new_index = idx
columns = sorted(df.columns) if sort_columns else list(df.columns)
# the prodlen test is meant to avoid the more expensive array_equal test
prodlen = np.prod([len(axis_labels) for axis_labels in labels])
if prodlen == len(df) and columns == list(df.columns) and np.array_equal(idx.values, new_index.values):
return df, labels
return df.reindex(index=new_index, columns=columns, fill_value=fill_value, **kwargs), labels
return idx, labels


def cartesian_product_df(df, sort_rows=False, sort_columns=False,
fill_value=nan, **kwargs):
idx = df.index
columns = df.columns
prod_index, index_labels = product_index(idx, sort=sort_rows)
prod_columns, column_labels = product_index(columns, sort=sort_columns)
combined_labels = index_labels + column_labels
# the len() tests are meant to avoid the more expensive array_equal tests
if (len(prod_index) == len(idx) and
len(prod_columns) == len(columns) and
np.array_equal(idx.values, prod_index.values) and
np.array_equal(columns.values, prod_columns.values)):
return df, combined_labels
return df.reindex(index=prod_index, columns=prod_columns,
fill_value=fill_value, **kwargs), combined_labels


def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Array:
Expand Down Expand Up @@ -124,8 +140,13 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Arra
a1 b1 6.0 7.0
"""
if isinstance(s.index, pd.MultiIndex):
# TODO: use argument sort=False when it will be available
# (see https://github.com/pandas-dev/pandas/issues/15105)
# Using unstack sort argument (requires Pandas >= 2.1) would make this
# code simpler, but it makes it even slower than it already is.
# As of Pandas 2.3.3 on 12/2025, a series with a large MultiIndex is
# extremely slow to unstack, whether sort is used or not:
# >>> arr = ndtest((200, 200, 200))
# >>> s = arr.to_series() # 31.4 ms
# >>> s.unstack(level=-1, fill_value=np.nan) # 1.5s !!!
df = s.unstack(level=-1, fill_value=fill_value)
# pandas (un)stack and pivot(_table) methods return a Dataframe/Series with sorted index and columns
if not sort_rows:
Expand Down Expand Up @@ -211,13 +232,15 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo

# handle 2 or more dimensions with the last axis name given using \
if unfold_last_axis_name:
# Note that having several axes in columns (and using df.columns.names)
# in this case does not make sense
if isinstance(axes_names[-1], str) and '\\' in axes_names[-1]:
last_axes = [name.strip() for name in axes_names[-1].split('\\')]
axes_names = axes_names[:-1] + last_axes
else:
axes_names += [None]
else:
axes_names += [df.columns.name]
axes_names += df.columns.names

if cartesian_prod:
df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
Expand All @@ -226,12 +249,18 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
if sort_rows or sort_columns:
raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
'Please call the method sort_labels on the returned array to sort rows or columns')
axes_labels = index_to_labels(df.index, sort=False)
index_labels = index_to_labels(df.index, sort=False)
column_labels = index_to_labels(df.columns, sort=False)
axes_labels = index_labels + column_labels

# Pandas treats column labels as column names (strings) so we need to convert them to values
last_axis_labels = [parse(cell) for cell in df.columns.values] if parse_header else list(df.columns.values)
axes_labels.append(last_axis_labels)
if parse_header:
ncolaxes = df.columns.nlevels
for i in range(len(axes_labels) - ncolaxes, len(axes_labels)):
axes_labels[i] = [parse(cell) for cell in axes_labels[i]]

# TODO: use zip(..., strict=True) instead when we drop support for Python 3.9
assert len(axes_labels) == len(axes_names)
axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)])
data = df.values.reshape(axes.shape)
return Array(data, axes, meta=meta)
Expand Down
76 changes: 76 additions & 0 deletions larray/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -4121,6 +4121,7 @@ def test_to_frame():
assert df.columns.to_list() == ['c0', 'c1']
assert df.index.names == ['a', r'b\c']


def test_from_frame():
# 1) data = scalar
# ================
Expand Down Expand Up @@ -4530,6 +4531,81 @@ def test_from_frame():
res = from_frame(df, fill_value=-1)
assert_larray_equal(res, expected)

# 6) with a multi-index in columns
# ================================

# a) normal
arr = ndtest((2, 2, 2, 2))
df = arr.to_frame(ncolaxes=2)
res = from_frame(df)
assert_larray_equal(res, arr)

# b) with duplicated axis names
arr = ndtest("a=a0,a1;a=b0,b1;a=c0,c1;a=d0,d1")
df = arr.to_frame(ncolaxes=2)
res = from_frame(df)
assert_larray_equal(res, arr)

# c) with duplicated axes names and labels
arr = ndtest("a=a0,a1;a=a0,a1;a=a0,a1;a=a0,a1")
df = arr.to_frame(ncolaxes=2)
res = from_frame(df)
assert_larray_equal(res, arr)

# d) with unsorted labels
arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
df = arr.to_frame(ncolaxes=2)
res = from_frame(df)
assert_larray_equal(res, arr)

# e) with sorting of unsorted column labels
arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
df = arr.to_frame(ncolaxes=2)
expected = from_string(r"""
a b c\d d0 d1
a1 b1 c0 3 2
a1 b1 c1 1 0
a1 b0 c0 7 6
a1 b0 c1 5 4
a0 b1 c0 11 10
a0 b1 c1 9 8
a0 b0 c0 15 14
a0 b0 c1 13 12""")
res = from_frame(df, sort_columns=True)
assert_larray_equal(res, expected)

# f) with sorting of unsorted row labels
arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
df = arr.to_frame(ncolaxes=2)
expected = from_string(r"""
a b c\d d1 d0
a0 b0 c1 12 13
a0 b0 c0 14 15
a0 b1 c1 8 9
a0 b1 c0 10 11
a1 b0 c1 4 5
a1 b0 c0 6 7
a1 b1 c1 0 1
a1 b1 c0 2 3""")
res = from_frame(df, sort_rows=True)
assert_larray_equal(res, expected)

# g) with sorting of all unsorted labels
arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
df = arr.to_frame(ncolaxes=2)
expected = from_string(r"""
a b c\d d0 d1
a0 b0 c0 15 14
a0 b0 c1 13 12
a0 b1 c0 11 10
a0 b1 c1 9 8
a1 b0 c0 7 6
a1 b0 c1 5 4
a1 b1 c0 3 2
a1 b1 c1 1 0""")
res = from_frame(df, sort_rows=True, sort_columns=True)
assert_larray_equal(res, expected)


def test_asarray():
series = pd.Series([0, 1, 2], ['a0', 'a1', 'a2'], name='a')
Expand Down
8 changes: 4 additions & 4 deletions larray/tests/test_axiscollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ def test_contains(col):

def test_index(col):
assert col.index('lipro') == 0
with must_raise(ValueError, msg="'nonexisting' is not in list"):
with must_raise(ValueError, msg="axis 'nonexisting' is not in collection"):
col.index('nonexisting')
assert col.index(0) == 0
assert col.index(0) == 0
assert col.index(1) == 1
assert col.index(2) == 2
assert col.index(-1) == -1
Expand All @@ -237,9 +237,9 @@ def test_index(col):
assert col.index(sex) == 1
assert col.index(age) == 2
assert col.index(sex2) == 1
with must_raise(ValueError, msg="'geo' is not in list"):
with must_raise(ValueError, msg="axis 'geo' is not in collection"):
col.index(geo)
with must_raise(ValueError, msg="'value' is not in list"):
with must_raise(ValueError, msg="axis 'value' is not in collection"):
col.index(value)

# test anonymous axes
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ def readlocal(fname):
'Intended Audience :: Developers',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
'Topic :: Scientific/Engineering',
'Topic :: Software Development :: Libraries',
]
Expand Down
Loading