From edaafdb561d2a27bd39616bddf096b9cb111e79f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 14 May 2025 18:00:38 +0200
Subject: [PATCH 1/5] FEAT: added support for dataframes with MultiIndex in
 columns in from_frame (closes #466)

---
 doc/source/changes/version_0_35.rst.inc |  3 +
 larray/inout/pandas.py                  | 79 +++++++++++++++++--------
 larray/tests/test_array.py              | 76 ++++++++++++++++++++++++
 3 files changed, 133 insertions(+), 25 deletions(-)

diff --git a/doc/source/changes/version_0_35.rst.inc b/doc/source/changes/version_0_35.rst.inc
index 22167ef49..8cca1c95f 100644
--- a/doc/source/changes/version_0_35.rst.inc
+++ b/doc/source/changes/version_0_35.rst.inc
@@ -92,6 +92,9 @@ Miscellaneous improvements
 
     >>> arr.plot.bar(stack='gender')
 
+* :py:obj:`from_frame()` and :py:obj:`asarray()` now support Pandas DataFrames
+  with more than one level (row) of columns (closes :issue:`466`).
+
 * :py:obj:`Array.to_frame()` gained an ``ncolaxes`` argument to control how many
   axes should be used as columns (defaults to 1, as before).
 
diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
index d67fec7e8..52ab48144 100644
--- a/larray/inout/pandas.py
+++ b/larray/inout/pandas.py
@@ -6,7 +6,6 @@
 from larray.core.array import Array
 from larray.core.axis import Axis, AxisCollection
 from larray.core.constants import nan
-from larray.util.misc import unique_list
 
 
 def decode(s, encoding='utf-8', errors='strict'):
@@ -46,34 +45,51 @@ def index_to_labels(idx, sort=True):
     """
     if isinstance(idx, pd.MultiIndex):
         if sort:
-            return list(idx.levels)
+            return list(idx.levels)  # list of pd.Index
         else:
-            return [unique_list(idx.get_level_values(label)) for label in range(idx.nlevels)]
+            # requires Pandas >= 0.23 (and it does NOT sort the values)
+            # TODO: unsure to_list is necessary (larray tests pass without it
+            #       but I am not sure this code path is covered by tests)
+            #       and there might be a subtle difference. The type
+            #       of the returned object without to_list() is pd.Index
+            return [idx.unique(level).to_list() for level in range(idx.nlevels)]
     else:
         assert isinstance(idx, pd.Index)
         labels = list(idx.values)
         return [sorted(labels) if sort else labels]
 
 
-def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
-    idx = df.index
-    labels = index_to_labels(idx, sort=sort_rows)
+def product_index(idx, sort=False):
+    """
+    Converts a pandas (Multi)Index to an (Multi)Index with a cartesian
+    product of the labels present in each level
+    """
+    labels = index_to_labels(idx, sort=sort)
     if isinstance(idx, pd.MultiIndex):
-        if sort_rows:
-            new_index = pd.MultiIndex.from_product(labels)
-        else:
-            new_index = pd.MultiIndex.from_tuples(list(product(*labels)))
+        return pd.MultiIndex.from_product(labels), labels
     else:
-        if sort_rows:
-            new_index = pd.Index(labels[0], name=idx.name)
+        assert isinstance(idx, pd.Index)
+        if sort:
+            return pd.Index(labels[0], name=idx.name), labels
         else:
-            new_index = idx
-    columns = sorted(df.columns) if sort_columns else list(df.columns)
-    # the prodlen test is meant to avoid the more expensive array_equal test
-    prodlen = np.prod([len(axis_labels) for axis_labels in labels])
-    if prodlen == len(df) and columns == list(df.columns) and np.array_equal(idx.values, new_index.values):
-        return df, labels
-    return df.reindex(index=new_index, columns=columns, fill_value=fill_value, **kwargs), labels
+            return idx, labels
+
+
+def cartesian_product_df(df, sort_rows=False, sort_columns=False,
+                         fill_value=nan, **kwargs):
+    idx = df.index
+    columns = df.columns
+    prod_index, index_labels = product_index(idx, sort=sort_rows)
+    prod_columns, column_labels = product_index(columns, sort=sort_columns)
+    combined_labels = index_labels + column_labels
+    # the len() tests are meant to avoid the more expensive array_equal tests
+    if (len(prod_index) == len(idx) and
+            len(prod_columns) == len(columns) and
+            np.array_equal(idx.values, prod_index.values) and
+            np.array_equal(columns.values, prod_columns.values)):
+        return df, combined_labels
+    return df.reindex(index=prod_index, columns=prod_columns,
+                      fill_value=fill_value, **kwargs), combined_labels
 
 
 def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Array:
@@ -124,8 +140,13 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Arra
     a1   b1  6.0  7.0
     """
     if isinstance(s.index, pd.MultiIndex):
-        # TODO: use argument sort=False when it will be available
-        # (see https://github.com/pandas-dev/pandas/issues/15105)
+        # Using unstack sort argument (requires Pandas >= 2.1) would make this
+        # code simpler, but it makes it even slower than it already is.
+        # As of Pandas 2.3.3 on 12/2025, a series with a large MultiIndex is
+        # extremely slow to unstack, whether sort is used or not:
+        # >>> arr = ndtest((200, 200, 200))
+        # >>> s = arr.to_series()                     # 31.4 ms
+        # >>> s.unstack(level=-1, fill_value=np.nan)  # 1.5s !!!
         df = s.unstack(level=-1, fill_value=fill_value)
         # pandas (un)stack and pivot(_table) methods return a Dataframe/Series with sorted index and columns
         if not sort_rows:
@@ -211,13 +232,15 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
 
     # handle 2 or more dimensions with the last axis name given using \
     if unfold_last_axis_name:
+        # Note that having several axes in columns (and using df.columns.names)
+        # in this case does not make sense
         if isinstance(axes_names[-1], str) and '\\' in axes_names[-1]:
             last_axes = [name.strip() for name in axes_names[-1].split('\\')]
             axes_names = axes_names[:-1] + last_axes
         else:
             axes_names += [None]
     else:
-        axes_names += [df.columns.name]
+        axes_names += df.columns.names
 
     if cartesian_prod:
         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
@@ -226,12 +249,18 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
         if sort_rows or sort_columns:
             raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
                              'Please call the method sort_labels on the returned array to sort rows or columns')
-        axes_labels = index_to_labels(df.index, sort=False)
+        index_labels = index_to_labels(df.index, sort=False)
+        column_labels = index_to_labels(df.columns, sort=False)
+        axes_labels = index_labels + column_labels
 
     # Pandas treats column labels as column names (strings) so we need to convert them to values
-    last_axis_labels = [parse(cell) for cell in df.columns.values] if parse_header else list(df.columns.values)
-    axes_labels.append(last_axis_labels)
+    if parse_header:
+        ncolaxes = df.columns.nlevels
+        for i in range(len(axes_labels) - ncolaxes, len(axes_labels)):
+            axes_labels[i] = [parse(cell) for cell in axes_labels[i]]
 
+    # TODO: use zip(..., strict=True) instead when we drop support for Python 3.9
+    assert len(axes_labels) == len(axes_names)
     axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)])
     data = df.values.reshape(axes.shape)
     return Array(data, axes, meta=meta)
diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
index 5beb0f54d..050a4692a 100644
--- a/larray/tests/test_array.py
+++ b/larray/tests/test_array.py
@@ -4121,6 +4121,7 @@ def test_to_frame():
     assert df.columns.to_list() == ['c0', 'c1']
     assert df.index.names == ['a', r'b\c']
 
+
 def test_from_frame():
     # 1) data = scalar
     # ================
@@ -4530,6 +4531,81 @@ def test_from_frame():
     res = from_frame(df, fill_value=-1)
     assert_larray_equal(res, expected)
 
+    # 6) with a multi-index in columns
+    # ================================
+
+    # a) normal
+    arr = ndtest((2, 2, 2, 2))
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # b) with duplicated axis names
+    arr = ndtest("a=a0,a1;a=b0,b1;a=c0,c1;a=d0,d1")
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # c) with duplicated axes names and labels
+    arr = ndtest("a=a0,a1;a=a0,a1;a=a0,a1;a=a0,a1")
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # d) with unsorted labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    res = from_frame(df)
+    assert_larray_equal(res, arr)
+
+    # e) with sorting of unsorted column labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    expected = from_string(r"""
+     a   b  c\d  d0  d1
+    a1  b1   c0   3   2
+    a1  b1   c1   1   0
+    a1  b0   c0   7   6
+    a1  b0   c1   5   4
+    a0  b1   c0  11  10
+    a0  b1   c1   9   8
+    a0  b0   c0  15  14
+    a0  b0   c1  13  12""")
+    res = from_frame(df, sort_columns=True)
+    assert_larray_equal(res, expected)
+
+    # f) with sorting of unsorted row labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    expected = from_string(r"""
+     a   b  c\d  d1  d0
+    a0  b0   c1  12  13
+    a0  b0   c0  14  15
+    a0  b1   c1   8   9
+    a0  b1   c0  10  11
+    a1  b0   c1   4   5
+    a1  b0   c0   6   7
+    a1  b1   c1   0   1
+    a1  b1   c0   2   3""")
+    res = from_frame(df, sort_rows=True)
+    assert_larray_equal(res, expected)
+
+    # g) with sorting of all unsorted labels
+    arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0")
+    df = arr.to_frame(ncolaxes=2)
+    expected = from_string(r"""
+    a   b  c\d  d0  d1
+    a0  b0   c0  15  14
+    a0  b0   c1  13  12
+    a0  b1   c0  11  10
+    a0  b1   c1   9   8
+    a1  b0   c0   7   6
+    a1  b0   c1   5   4
+    a1  b1   c0   3   2
+    a1  b1   c1   1   0""")
+    res = from_frame(df, sort_rows=True, sort_columns=True)
+    assert_larray_equal(res, expected)
+
 
 def test_asarray():
     series = pd.Series([0, 1, 2], ['a0', 'a1', 'a2'], name='a')

From 539a06b7ada8ad12735a62bc2c8893452874faf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 1 Dec 2025 10:17:19 +0100
Subject: [PATCH 2/5] MAINT: added support for Python 3.13 (closes #1128)

---
 .github/workflows/ci.yml                | 2 +-
 doc/source/changes/version_0_35.rst.inc | 2 ++
 setup.py                                | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6d9d1f9a6..5b3ed14d8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,7 +27,7 @@ jobs:
       fail-fast: false
       matrix:
         # os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: ['3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
 
     defaults:
       run:
diff --git a/doc/source/changes/version_0_35.rst.inc b/doc/source/changes/version_0_35.rst.inc
index 8cca1c95f..ffd9d2187 100644
--- a/doc/source/changes/version_0_35.rst.inc
+++ b/doc/source/changes/version_0_35.rst.inc
@@ -31,6 +31,8 @@ Backward incompatible changes
 New features
 ^^^^^^^^^^^^
 
+* added support for Python 3.13 (closes :issue:`1128`).
+
 * :py:obj:`Array.plot()` now has an ``animate`` argument to produce animated
   plots. The argument takes an axis (it also supports several axes but that is
   rarely useful) and will create an animation, with one image per label of that
diff --git a/setup.py b/setup.py
index db6a8d818..4cde7409a 100644
--- a/setup.py
+++ b/setup.py
@@ -30,11 +30,11 @@ def readlocal(fname):
     'Intended Audience :: Developers',
     'Programming Language :: Python',
     'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3.8',
     'Programming Language :: Python :: 3.9',
     'Programming Language :: Python :: 3.10',
     'Programming Language :: Python :: 3.11',
     'Programming Language :: Python :: 3.12',
+    'Programming Language :: Python :: 3.13',
     'Topic :: Scientific/Engineering',
     'Topic :: Software Development :: Libraries',
 ]

From f98850006b88741bb2cf1b1ed5fa9662320aa7d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 2 Dec 2025 16:53:13 +0100
Subject: [PATCH 3/5] MAINT: modernize github actions

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5b3ed14d8..b205e5def 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
         with:
           # Pin ruff version to make sure we do not break our builds at the
           # worst times
-          version: "0.14.5"
+          version: "0.14.7"
 
   test:
     # name: Test (${{ matrix.python-version }}, ${{ matrix.os }})

From e3dcb33932faee1780ee29f363a919714bec90c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 3 Dec 2025 11:36:35 +0100
Subject: [PATCH 4/5] MAINT: use https links

---
 condarecipe/larray/meta.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/condarecipe/larray/meta.yaml b/condarecipe/larray/meta.yaml
index 13cd7e886..fbb062431 100644
--- a/condarecipe/larray/meta.yaml
+++ b/condarecipe/larray/meta.yaml
@@ -35,7 +35,7 @@ test:
     - pytest --pyargs larray
 
 about:
-  home: http://github.com/larray-project/larray
+  home: https://github.com/larray-project/larray
   license: GPL-3.0-only
   license_family: GPL
   license_file: LICENSE
@@ -43,8 +43,8 @@ about:
   description: |
     LArray is an open source Python library that aims to provide tools for
     easy exploration and manipulation of N-dimensional labelled data structures.
-  doc_url: http://larray.readthedocs.io/
-  dev_url: http://github.com/larray-project/larray
+  doc_url: https://larray.readthedocs.io/
+  dev_url: https://github.com/larray-project/larray
 
 extra:
   recipe-maintainers:

From 459295c7bbe679bd13ab700919f9080fbeb6d7ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 3 Dec 2025 12:39:53 +0100
Subject: [PATCH 5/5] FEAT: better error message for AxisCollection.index(name)

this is also Python3.14-proof, while we used to rely on the list.index builtin message, which changed (for the worse IMO) in Python 3.14
---
 larray/core/axis.py                 | 5 ++++-
 larray/tests/test_axiscollection.py | 8 ++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index 6c58a878a..9b18c4905 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -2302,7 +2302,10 @@ def index(self, axis, compatible=False) -> int:
             name = axis
         if name is None:
             raise ValueError(f"{axis!r} is not in collection")
-        return self.names.index(name)
+        try:
+            return self.names.index(name)
+        except ValueError:
+            raise ValueError(f"axis {name!r} is not in collection")
 
     # XXX: we might want to return a new AxisCollection (same question for other inplace operations:
     # append, extend, pop, __delitem__, __setitem__)
diff --git a/larray/tests/test_axiscollection.py b/larray/tests/test_axiscollection.py
index dd34478d9..e0d26bdf9 100644
--- a/larray/tests/test_axiscollection.py
+++ b/larray/tests/test_axiscollection.py
@@ -221,9 +221,9 @@ def test_contains(col):
 
 def test_index(col):
     assert col.index('lipro') == 0
-    with must_raise(ValueError, msg="'nonexisting' is not in list"):
+    with must_raise(ValueError, msg="axis 'nonexisting' is not in collection"):
         col.index('nonexisting')
-        assert col.index(0) == 0
+    assert col.index(0) == 0
     assert col.index(1) == 1
     assert col.index(2) == 2
     assert col.index(-1) == -1
@@ -237,9 +237,9 @@ def test_index(col):
     assert col.index(sex) == 1
     assert col.index(age) == 2
     assert col.index(sex2) == 1
-    with must_raise(ValueError, msg="'geo' is not in list"):
+    with must_raise(ValueError, msg="axis 'geo' is not in collection"):
         col.index(geo)
-    with must_raise(ValueError, msg="'value' is not in list"):
+    with must_raise(ValueError, msg="axis 'value' is not in collection"):
         col.index(value)
 
     # test anonymous axes