From 84154db7266103604b72a4d96936ff869a070880 Mon Sep 17 00:00:00 2001 From: Samuel Aboderin Date: Wed, 17 Jun 2026 10:19:49 +0100 Subject: [PATCH 1/5] GH-49644: [Python] Support converting list of multi-dimensional arrays to FixedShapeTensor --- .../src/arrow/python/python_to_arrow.cc | 14 ++++++- python/pyarrow/tests/test_array.py | 18 ++++++++ python/pyarrow/tests/test_extension_type.py | 42 +++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index e7ce54abcd8f..10f84525d900 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -908,8 +908,20 @@ class PyListConverter : public ListConverter { Status AppendNdarray(PyObject* value) { PyArrayObject* ndarray = reinterpret_cast(value); + OwnedRef flattened; if (PyArray_NDIM(ndarray) != 1) { - return Status::Invalid("Can only convert 1-dimensional array values"); + // GH-49644: a fixed-size list (e.g. the storage of a fixed-shape tensor) + // can be built from a multi-dimensional array by flattening it in C + // order. The total number of elements must still match the list size, + // which the builder validates below. Variable-sized lists remain + // restricted to 1-dimensional values to avoid ambiguity. + if (this->list_type_->id() != Type::FIXED_SIZE_LIST) { + return Status::Invalid("Can only convert 1-dimensional array values"); + } + flattened.reset(PyArray_Ravel(ndarray, NPY_CORDER)); + RETURN_IF_PYERROR(); + value = flattened.obj(); + ndarray = reinterpret_cast(value); } if (PyArray_ISBYTESWAPPED(ndarray)) { // TODO diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 0205db2393ab..4a64255f50d7 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2924,6 +2924,24 @@ def test_array_from_invalid_dim_raises(): pa.array(arr0d) +@pytest.mark.numpy +def test_fixed_size_list_from_multidim_ndarray(): + # GH-49644: a fixed-size list can be built from multi-dimensional ndarray + # elements by flattening them in C order. + arr = pa.array([np.array([[1, 2, 3]]), np.array([[4, 5, 6]])], + type=pa.list_(pa.int64(), 3)) + assert arr.type == pa.list_(pa.int64(), 3) + assert arr.to_pylist() == [[1, 2, 3], [4, 5, 6]] + + # The flattened length must still match the fixed size + with pytest.raises(pa.lib.ArrowInvalid): + pa.array([np.array([[1, 2], [3, 4]])], type=pa.list_(pa.int64(), 3)) + + # Variable-sized lists still require 1-dimensional values + with pytest.raises(pa.lib.ArrowInvalid, match="1-dimensional"): + pa.array([np.array([[1, 2, 3]])], type=pa.list_(pa.int64())) + + @pytest.mark.numpy def test_array_from_strided_bool(): # ARROW-6325 diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 1adbd4e98070..f3233bc420ae 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1730,6 +1730,48 @@ def test_tensor_array_from_numpy(np_type_str): pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1]) +@pytest.mark.numpy +@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32")) +def test_tensor_array_from_list_of_ndarrays(np_type_str): + # GH-49644: build a fixed-shape-tensor array from a list of individual + # (multi-dimensional) ndarrays, not only from a single stacked ndarray. + np_dtype = np.dtype(np_type_str) + tensor_type = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 3)) + + elements = [ + np.arange(6, dtype=np_dtype).reshape(2, 3), + np.arange(6, 12, dtype=np_dtype).reshape(2, 3), + ] + result = pa.array(elements, type=tensor_type) + assert isinstance(result, pa.FixedShapeTensorArray) + assert result.type == tensor_type + assert len(result) == 2 + + # Must match the existing from_numpy_ndarray path on the same data + expected = pa.FixedShapeTensorArray.from_numpy_ndarray(np.stack(elements)) + assert result.storage.equals(expected.storage) + + # Each element round-trips back to the original ndarray (with its shape) + for scalar, original in zip(result, elements): + np.testing.assert_array_equal(scalar.to_numpy(), original) + + # Higher-dimensional tensors work too + tensor_3d = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 2, 3)) + elements_3d = [np.arange(12, dtype=np_dtype).reshape(2, 2, 3)] + result_3d = pa.array(elements_3d, type=tensor_3d) + assert result_3d.type == tensor_3d + np.testing.assert_array_equal(result_3d[0].to_numpy(), elements_3d[0]) + + # None elements are allowed + result_with_null = pa.array([elements[0], None], type=tensor_type) + assert result_with_null.null_count == 1 + assert result_with_null[1].as_py() is None + + # A flattened size that doesn't match the tensor shape is rejected + with pytest.raises(pa.lib.ArrowInvalid): + pa.array([np.arange(8, dtype=np_dtype).reshape(2, 4)], type=tensor_type) + + @pytest.mark.numpy @pytest.mark.parametrize("tensor_type", ( pa.fixed_shape_tensor(pa.int8(), [2, 2, 3]), From e695d015437744c4718011b9a4a3ff27253791e4 Mon Sep 17 00:00:00 2001 From: Samuel Aboderin Date: Wed, 17 Jun 2026 10:40:55 +0100 Subject: [PATCH 2/5] GH-49644: [Python] Make fixed_size_list test deterministic and cover C-order flatten --- python/pyarrow/tests/test_array.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 4a64255f50d7..7b21fa08db12 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2928,18 +2928,26 @@ def test_array_from_invalid_dim_raises(): def test_fixed_size_list_from_multidim_ndarray(): # GH-49644: a fixed-size list can be built from multi-dimensional ndarray # elements by flattening them in C order. - arr = pa.array([np.array([[1, 2, 3]]), np.array([[4, 5, 6]])], + arr = pa.array([np.array([[1, 2, 3]], dtype=np.int64), + np.array([[4, 5, 6]], dtype=np.int64)], type=pa.list_(pa.int64(), 3)) assert arr.type == pa.list_(pa.int64(), 3) assert arr.to_pylist() == [[1, 2, 3], [4, 5, 6]] + # A non-trivial 2D shape confirms values are flattened in C (row-major) order + arr = pa.array([np.array([[1, 2], [3, 4]], dtype=np.int64)], + type=pa.list_(pa.int64(), 4)) + assert arr.to_pylist() == [[1, 2, 3, 4]] + # The flattened length must still match the fixed size with pytest.raises(pa.lib.ArrowInvalid): - pa.array([np.array([[1, 2], [3, 4]])], type=pa.list_(pa.int64(), 3)) + pa.array([np.array([[1, 2], [3, 4]], dtype=np.int64)], + type=pa.list_(pa.int64(), 3)) # Variable-sized lists still require 1-dimensional values with pytest.raises(pa.lib.ArrowInvalid, match="1-dimensional"): - pa.array([np.array([[1, 2, 3]])], type=pa.list_(pa.int64())) + pa.array([np.array([[1, 2, 3]], dtype=np.int64)], + type=pa.list_(pa.int64())) @pytest.mark.numpy From daed54f4382f0cab1f6cfe8a971dd1c198d9bd48 Mon Sep 17 00:00:00 2001 From: Samuel Aboderin Date: Wed, 17 Jun 2026 21:59:11 +0100 Subject: [PATCH 3/5] GH-49644: [Python] Only flatten ndim>=2 ndarrays for fixed-size lists --- python/pyarrow/src/arrow/python/python_to_arrow.cc | 7 ++++--- python/pyarrow/tests/test_array.py | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 10f84525d900..3464d469a130 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -913,9 +913,10 @@ class PyListConverter : public ListConverter { // GH-49644: a fixed-size list (e.g. the storage of a fixed-shape tensor) // can be built from a multi-dimensional array by flattening it in C // order. The total number of elements must still match the list size, - // which the builder validates below. Variable-sized lists remain - // restricted to 1-dimensional values to avoid ambiguity. - if (this->list_type_->id() != Type::FIXED_SIZE_LIST) { + // which the builder validates below. 0-dimensional arrays and + // variable-sized lists remain restricted to 1-dimensional values. + if (PyArray_NDIM(ndarray) < 2 || + this->list_type_->id() != Type::FIXED_SIZE_LIST) { return Status::Invalid("Can only convert 1-dimensional array values"); } flattened.reset(PyArray_Ravel(ndarray, NPY_CORDER)); diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 7b21fa08db12..bde97f0537f9 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2949,6 +2949,10 @@ def test_fixed_size_list_from_multidim_ndarray(): pa.array([np.array([[1, 2, 3]], dtype=np.int64)], type=pa.list_(pa.int64())) + # 0-dimensional arrays are still rejected (not flattened to length 1) + with pytest.raises(pa.lib.ArrowInvalid, match="1-dimensional"): + pa.array([np.array(1, dtype=np.int64)], type=pa.list_(pa.int64(), 1)) + @pytest.mark.numpy def test_array_from_strided_bool(): From 832aceead7b2b6b84c5da9cc054a137eceacac6a Mon Sep 17 00:00:00 2001 From: Samuel Aboderin Date: Mon, 22 Jun 2026 18:04:05 +0100 Subject: [PATCH 4/5] GH-49644: [Python] Fix clang-format in AppendNdarray --- python/pyarrow/src/arrow/python/python_to_arrow.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 3464d469a130..1b135f8ff1ad 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -915,8 +915,7 @@ class PyListConverter : public ListConverter { // order. The total number of elements must still match the list size, // which the builder validates below. 0-dimensional arrays and // variable-sized lists remain restricted to 1-dimensional values. - if (PyArray_NDIM(ndarray) < 2 || - this->list_type_->id() != Type::FIXED_SIZE_LIST) { + if (PyArray_NDIM(ndarray) < 2 || this->list_type_->id() != Type::FIXED_SIZE_LIST) { return Status::Invalid("Can only convert 1-dimensional array values"); } flattened.reset(PyArray_Ravel(ndarray, NPY_CORDER)); From e7b7f69cc57b4ca2f63f1e68957b9b6eecbc0b5f Mon Sep 17 00:00:00 2001 From: Samuel Aboderin Date: Wed, 24 Jun 2026 13:03:19 +0100 Subject: [PATCH 5/5] GH-49644: [Python] Validate tensor shape, reject permuted tensors, use PyArray_CheckFromAny --- python/pyarrow/array.pxi | 18 ++++++++++++ .../src/arrow/python/python_to_arrow.cc | 29 ++++++++++++------- python/pyarrow/tests/test_extension_type.py | 14 +++++++-- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 4c625c45e489..b1f7ce783931 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -266,6 +266,24 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None, if type is not None and type.id == _Type_EXTENSION: extension_type = type type = type.storage_type + # GH-49644: when building a fixed_shape_tensor from a sequence of arrays, + # the converter only sees the flat storage type, so validate the + # tensor-specific constraints here where the type is still known. + if (isinstance(extension_type, FixedShapeTensorType) + and isinstance(obj, (list, tuple))): + if extension_type.permutation is not None: + raise NotImplementedError( + "Converting a sequence of arrays to a fixed_shape_tensor with " + "a permutation is not supported; use " + "FixedShapeTensorArray.from_numpy_ndarray instead") + if np is not None: + expected_shape = tuple(extension_type.shape) + for element in obj: + if (isinstance(element, np.ndarray) and element.ndim >= 2 + and tuple(element.shape) != expected_shape): + raise ValueError( + f"Cannot convert array of shape {element.shape} to a " + f"fixed_shape_tensor of shape {expected_shape}") if from_pandas is None: c_from_pandas = False diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 1b135f8ff1ad..f7b3687a8127 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -908,25 +908,32 @@ class PyListConverter : public ListConverter { Status AppendNdarray(PyObject* value) { PyArrayObject* ndarray = reinterpret_cast(value); + if (PyArray_ISBYTESWAPPED(ndarray)) { + // TODO + return Status::NotImplemented("Byte-swapped arrays not supported"); + } OwnedRef flattened; if (PyArray_NDIM(ndarray) != 1) { - // GH-49644: a fixed-size list (e.g. the storage of a fixed-shape tensor) - // can be built from a multi-dimensional array by flattening it in C - // order. The total number of elements must still match the list size, - // which the builder validates below. 0-dimensional arrays and - // variable-sized lists remain restricted to 1-dimensional values. + // GH-49644: a fixed-size list (e.g. fixed-shape-tensor storage) can be + // built from a multi-dimensional array, always flattened in C order + // regardless of the input's memory layout. if (PyArray_NDIM(ndarray) < 2 || this->list_type_->id() != Type::FIXED_SIZE_LIST) { - return Status::Invalid("Can only convert 1-dimensional array values"); + return Status::Invalid( + "Can only convert 1-dimensional array values to a variable-sized list"); } - flattened.reset(PyArray_Ravel(ndarray, NPY_CORDER)); + // Get an aligned, C-contiguous array (copying only if needed), then view + // it as 1-D so its values can be read directly in C order. + PyObject* contiguous = + PyArray_CheckFromAny(value, nullptr, /*min_depth=*/0, /*max_depth=*/0, + NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_ALIGNED, nullptr); + RETURN_IF_PYERROR(); + flattened.reset( + PyArray_Ravel(reinterpret_cast(contiguous), NPY_CORDER)); + Py_DECREF(contiguous); RETURN_IF_PYERROR(); value = flattened.obj(); ndarray = reinterpret_cast(value); } - if (PyArray_ISBYTESWAPPED(ndarray)) { - // TODO - return Status::NotImplemented("Byte-swapped arrays not supported"); - } const int64_t size = PyArray_SIZE(ndarray); RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index f3233bc420ae..10e53a08d365 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1767,9 +1767,17 @@ def test_tensor_array_from_list_of_ndarrays(np_type_str): assert result_with_null.null_count == 1 assert result_with_null[1].as_py() is None - # A flattened size that doesn't match the tensor shape is rejected - with pytest.raises(pa.lib.ArrowInvalid): - pa.array([np.arange(8, dtype=np_dtype).reshape(2, 4)], type=tensor_type) + # A multi-dimensional element whose shape doesn't match the tensor shape is + # rejected, even when the total number of elements is the same (GH-49644). + with pytest.raises(ValueError, match="shape"): + pa.array([np.arange(6, dtype=np_dtype).reshape(3, 2)], type=tensor_type) + + # Permuted tensor types can't be built from a sequence (the flatten would + # store the wrong layout), so they're rejected for now. + permuted_type = pa.fixed_shape_tensor( + pa.from_numpy_dtype(np_dtype), (2, 3), permutation=[1, 0]) + with pytest.raises(NotImplementedError, match="permutation"): + pa.array(elements, type=permuted_type) @pytest.mark.numpy