larray-project · gdementen · Dec 3, 2025 · May 14, 2025 · Dec 1, 2025 · Dec 2, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
         with:
           # Pin ruff version to make sure we do not break our builds at the
           # worst times
-          version: "0.14.5"
+          version: "0.14.7"
 
   test:
     # name: Test (${{ matrix.python-version }}, ${{ matrix.os }})
@@ -27,7 +27,7 @@ jobs:
       fail-fast: false
       matrix:
         # os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: ['3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
 
     defaults:
       run:

diff --git a/condarecipe/larray/meta.yaml b/condarecipe/larray/meta.yaml
@@ -35,16 +35,16 @@ test:
     - pytest --pyargs larray
 
 about:
-  home: http://github.com/larray-project/larray
+  home: https://github.com/larray-project/larray
   license: GPL-3.0-only
   license_family: GPL
   license_file: LICENSE
   summary: "N-dimensional labelled arrays in Python"
   description: |
     LArray is an open source Python library that aims to provide tools for
     easy exploration and manipulation of N-dimensional labelled data structures.
-  doc_url: http://larray.readthedocs.io/
-  dev_url: http://github.com/larray-project/larray
+  doc_url: https://larray.readthedocs.io/
+  dev_url: https://github.com/larray-project/larray
 
 extra:
   recipe-maintainers:

diff --git a/doc/source/changes/version_0_35.rst.inc b/doc/source/changes/version_0_35.rst.inc
@@ -31,6 +31,8 @@ Backward incompatible changes
 New features
 ^^^^^^^^^^^^
 
+* added support for Python 3.13 (closes :issue:`1128`).
+
 * :py:obj:`Array.plot()` now has an ``animate`` argument to produce animated
   plots. The argument takes an axis (it also supports several axes but that is
   rarely useful) and will create an animation, with one image per label of that
@@ -92,6 +94,9 @@ Miscellaneous improvements
 
     >>> arr.plot.bar(stack='gender')
 
+* :py:obj:`from_frame()` and :py:obj:`asarray()` now support Pandas DataFrames
+  with more than one level (row) of columns (closes :issue:`466`).
+
 * :py:obj:`Array.to_frame()` gained an ``ncolaxes`` argument to control how many
   axes should be used as columns (defaults to 1, as before).
 

diff --git a/larray/core/axis.py b/larray/core/axis.py
@@ -2302,7 +2302,10 @@ def index(self, axis, compatible=False) -> int:
             name = axis
         if name is None:
             raise ValueError(f"{axis!r} is not in collection")
-        return self.names.index(name)
+        try:
+            return self.names.index(name)
+        except ValueError:
+            raise ValueError(f"axis {name!r} is not in collection")
 
     # XXX: we might want to return a new AxisCollection (same question for other inplace operations:
     # append, extend, pop, __delitem__, __setitem__)

diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
@@ -6,7 +6,6 @@
 from larray.core.array import Array
 from larray.core.axis import Axis, AxisCollection
 from larray.core.constants import nan
-from larray.util.misc import unique_list
 
 
 def decode(s, encoding='utf-8', errors='strict'):
@@ -46,34 +45,51 @@ def index_to_labels(idx, sort=True):
     """
     if isinstance(idx, pd.MultiIndex):
         if sort:
-            return list(idx.levels)
+            return list(idx.levels)  # list of pd.Index
         else:
-            return [unique_list(idx.get_level_values(label)) for label in range(idx.nlevels)]
+            # requires Pandas >= 0.23 (and it does NOT sort the values)
+            # TODO: unsure to_list is necessary (larray tests pass without it
+            #       but I am not sure this code path is covered by tests)
+            #       and there might be a subtle difference. The type
+            #       of the returned object without to_list() is pd.Index
+            return [idx.unique(level).to_list() for level in range(idx.nlevels)]
     else:
         assert isinstance(idx, pd.Index)
         labels = list(idx.values)
         return [sorted(labels) if sort else labels]
 
 
-def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
-    idx = df.index
-    labels = index_to_labels(idx, sort=sort_rows)
+def product_index(idx, sort=False):
+    """
+    Converts a pandas (Multi)Index to an (Multi)Index with a cartesian
+    product of the labels present in each level
+    """
+    labels = index_to_labels(idx, sort=sort)
     if isinstance(idx, pd.MultiIndex):
-        if sort_rows:
-            new_index = pd.MultiIndex.from_product(labels)
-        else:
-            new_index = pd.MultiIndex.from_tuples(list(product(*labels)))
+        return pd.MultiIndex.from_product(labels), labels
     else:
-        if sort_rows:
-            new_index = pd.Index(labels[0], name=idx.name)
+        assert isinstance(idx, pd.Index)
+        if sort:
+            return pd.Index(labels[0], name=idx.name), labels
         else:
-            new_index = idx
-    columns = sorted(df.columns) if sort_columns else list(df.columns)
-    # the prodlen test is meant to avoid the more expensive array_equal test
-    prodlen = np.prod([len(axis_labels) for axis_labels in labels])
-    if prodlen == len(df) and columns == list(df.columns) and np.array_equal(idx.values, new_index.values):
-        return df, labels
-    return df.reindex(index=new_index, columns=columns, fill_value=fill_value, **kwargs), labels
+            return idx, labels
+
+
+def cartesian_product_df(df, sort_rows=False, sort_columns=False,
+                         fill_value=nan, **kwargs):
+    idx = df.index
+    columns = df.columns
+    prod_index, index_labels = product_index(idx, sort=sort_rows)
+    prod_columns, column_labels = product_index(columns, sort=sort_columns)
+    combined_labels = index_labels + column_labels
+    # the len() tests are meant to avoid the more expensive array_equal tests
+    if (len(prod_index) == len(idx) and
+            len(prod_columns) == len(columns) and
+            np.array_equal(idx.values, prod_index.values) and
+            np.array_equal(columns.values, prod_columns.values)):
+        return df, combined_labels
+    return df.reindex(index=prod_index, columns=prod_columns,
+                      fill_value=fill_value, **kwargs), combined_labels
 
 
 def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Array:
@@ -124,8 +140,13 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Arra
     a1   b1  6.0  7.0
     """
     if isinstance(s.index, pd.MultiIndex):
-        # TODO: use argument sort=False when it will be available
-        # (see https://github.com/pandas-dev/pandas/issues/15105)
+        # Using unstack sort argument (requires Pandas >= 2.1) would make this
+        # code simpler, but it makes it even slower than it already is.
+        # As of Pandas 2.3.3 on 12/2025, a series with a large MultiIndex is
+        # extremely slow to unstack, whether sort is used or not:
+        # >>> arr = ndtest((200, 200, 200))
+        # >>> s = arr.to_series()                     # 31.4 ms
+        # >>> s.unstack(level=-1, fill_value=np.nan)  # 1.5s !!!
         df = s.unstack(level=-1, fill_value=fill_value)
         # pandas (un)stack and pivot(_table) methods return a Dataframe/Series with sorted index and columns
         if not sort_rows:
@@ -211,13 +232,15 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
 
     # handle 2 or more dimensions with the last axis name given using \
     if unfold_last_axis_name:
+        # Note that having several axes in columns (and using df.columns.names)
+        # in this case does not make sense
         if isinstance(axes_names[-1], str) and '\\' in axes_names[-1]:
             last_axes = [name.strip() for name in axes_names[-1].split('\\')]
             axes_names = axes_names[:-1] + last_axes
         else:
             axes_names += [None]
     else:
-        axes_names += [df.columns.name]
+        axes_names += df.columns.names
 
     if cartesian_prod:
         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
@@ -226,12 +249,18 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
         if sort_rows or sort_columns:
             raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
                              'Please call the method sort_labels on the returned array to sort rows or columns')
-        axes_labels = index_to_labels(df.index, sort=False)
+        index_labels = index_to_labels(df.index, sort=False)
+        column_labels = index_to_labels(df.columns, sort=False)
+        axes_labels = index_labels + column_labels
 
     # Pandas treats column labels as column names (strings) so we need to convert them to values
-    last_axis_labels = [parse(cell) for cell in df.columns.values] if parse_header else list(df.columns.values)
-    axes_labels.append(last_axis_labels)
+    if parse_header:
+        ncolaxes = df.columns.nlevels
+        for i in range(len(axes_labels) - ncolaxes, len(axes_labels)):
+            axes_labels[i] = [parse(cell) for cell in axes_labels[i]]
 
+    # TODO: use zip(..., strict=True) instead when we drop support for Python 3.9
+    assert len(axes_labels) == len(axes_names)
     axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)])
     data = df.values.reshape(axes.shape)
     return Array(data, axes, meta=meta)

diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
@@ -4121,6 +4121,7 @@ def test_to_frame():
     assert df.columns.to_list() == ['c0', 'c1']
     assert df.index.names == ['a', r'b\c']
 
+
 def test_from_frame():
     # 1) data = scalar
     # ================
@@ -4530,6 +4531,81 @@ def test_from_frame():
     res = from_frame(df, fill_value=-1)
     assert_larray_equal(res, expected)
 
+    # 6) with a multi-index in columns
+    # ================================
+
+    # a) normal
+    arr = ndtest((2, 2, 2, 2))
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # b) with duplicated axis names
+    arr = ndtest("a=a0,a1;a=b0,b1;a=c0,c1;a=d0,d1")
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # c) with duplicated axes names and labels
+    arr = ndtest("a=a0,a1;a=a0,a1;a=a0,a1;a=a0,a1")
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # d) with unsorted labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # e) with sorting of unsorted column labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    expected = from_string(r"""
+     a   b  c\d  d0  d1
+    a1  b1   c0   3   2
+    a1  b1   c1   1   0
+    a1  b0   c0   7   6
+    a1  b0   c1   5   4
+    a0  b1   c0  11  10
+    a0  b1   c1   9   8
+    a0  b0   c0  15  14
+    a0  b0   c1  13  12""")
+    res = from_frame(df, sort_columns=True)
+    assert_larray_equal(res, expected)
+
+    # f) with sorting of unsorted row labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    expected = from_string(r"""
+     a   b  c\d  d1  d0
+    a0  b0   c1  12  13
+    a0  b0   c0  14  15
+    a0  b1   c1   8   9
+    a0  b1   c0  10  11
+    a1  b0   c1   4   5
+    a1  b0   c0   6   7
+    a1  b1   c1   0   1
+    a1  b1   c0   2   3""")
+    res = from_frame(df, sort_rows=True)
+    assert_larray_equal(res, expected)
+
+    # g) with sorting of all unsorted labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    expected = from_string(r"""
+    a   b  c\d  d0  d1
+    a0  b0   c0  15  14
+    a0  b0   c1  13  12
+    a0  b1   c0  11  10
+    a0  b1   c1   9   8
+    a1  b0   c0   7   6
+    a1  b0   c1   5   4
+    a1  b1   c0   3   2
+    a1  b1   c1   1   0""")
+    res = from_frame(df, sort_rows=True, sort_columns=True)
+    assert_larray_equal(res, expected)
+
 
 def test_asarray():
     series = pd.Series([0, 1, 2], ['a0', 'a1', 'a2'], name='a')

diff --git a/larray/tests/test_axiscollection.py b/larray/tests/test_axiscollection.py
@@ -221,9 +221,9 @@ def test_contains(col):
 
 def test_index(col):
     assert col.index('lipro') == 0
-    with must_raise(ValueError, msg="'nonexisting' is not in list"):
+    with must_raise(ValueError, msg="axis 'nonexisting' is not in collection"):
         col.index('nonexisting')
-        assert col.index(0) == 0
+    assert col.index(0) == 0
     assert col.index(1) == 1
     assert col.index(2) == 2
     assert col.index(-1) == -1
@@ -237,9 +237,9 @@ def test_index(col):
     assert col.index(sex) == 1
     assert col.index(age) == 2
     assert col.index(sex2) == 1
-    with must_raise(ValueError, msg="'geo' is not in list"):
+    with must_raise(ValueError, msg="axis 'geo' is not in collection"):
         col.index(geo)
-    with must_raise(ValueError, msg="'value' is not in list"):
+    with must_raise(ValueError, msg="axis 'value' is not in collection"):
         col.index(value)
 
     # test anonymous axes

diff --git a/setup.py b/setup.py
@@ -30,11 +30,11 @@ def readlocal(fname):
     'Intended Audience :: Developers',
     'Programming Language :: Python',
     'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3.8',
     'Programming Language :: Python :: 3.9',
     'Programming Language :: Python :: 3.10',
     'Programming Language :: Python :: 3.11',
     'Programming Language :: Python :: 3.12',
+    'Programming Language :: Python :: 3.13',
     'Topic :: Scientific/Engineering',
     'Topic :: Software Development :: Libraries',
 ]