From ef5dbbcd763a16a8cfec7d4cdd22e9a4e305b662 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 15 Jun 2026 11:40:09 +0200 Subject: [PATCH 01/21] Add CopyBits --- cpp/src/arrow/util/bit_util.h | 33 ++++++++++++++++++++++ cpp/src/arrow/util/bit_util_test.cc | 43 +++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index bffc3c14a106..fe8d0b1606cb 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include @@ -176,6 +177,38 @@ static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } +template +struct CopyBitsParams { + Uint src = {}; + Uint dst = {}; + Uint start = {}; + Uint end = {}; +}; + +/// Copy a contiguous span of bits from src into dst. +/// +/// Copy bits [start, end[ from src into the position [start, end[ in dst +/// and return the result (inputs are unmodified). +/// Setting ``kAllowFullCopy`` to false is an optimization when the caller can +/// guarantee that the range of bits to copy does not cover the whole range. +template +[[nodiscard]] constexpr Uint CopyBits(const CopyBitsParams& params) { + constexpr auto kUintSizeBits = static_cast(sizeof(Uint) * 8); + assert(params.start <= params.end); + assert(params.start < kUintSizeBits); + assert(params.end <= kUintSizeBits); + + const auto length = static_cast(params.end - params.start); + if constexpr (kAllowFullCopy) { + if (length == kUintSizeBits) { + return params.src; + } + } + assert(length < kUintSizeBits); + const Uint mask = LeastSignificantBitMask(length) << params.start; + return (~mask & params.dst) | (mask & params.src); +} + static inline void ClearBit(uint8_t* bits, int64_t i) { bits[i / 8] &= kFlippedBitmask[i % 8]; } diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 1e7714540eeb..f8715333de19 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1065,4 +1065,47 @@ TEST(SpliceWord, SpliceWord) { 0xfedc456789abcdef); } +TEST(BitUtil, CopyBits) { + // Copy bits [start, end[ from src into dst, keeping dst's other bits. + using bit_util::CopyBits; + + // Empty range: result equals dst. + ASSERT_EQ( + CopyBits({.src = 0b11111111, .dst = 0b00010010, .start = 3, .end = 3}), + 0b00010010); + // dst = 0101 0101, src = 1010 1010 -> 0101 1010 + ASSERT_EQ( + CopyBits({.src = 0b10101010, .dst = 0b01010101, .start = 0, .end = 4}), + 0b01011010); + // Copy a middle span [2, 5[ of all-ones into an all-zeros dst. + ASSERT_EQ( + CopyBits({.src = 0b11111111, .dst = 0b00000000, .start = 2, .end = 5}), + 0b00011100); + // Copy a middle span [2, 5[ of all-zeros into an all-ones dst. + ASSERT_EQ( + CopyBits({.src = 0b00000000, .dst = 0b11111111, .start = 2, .end = 5}), + 0b11100011); + // Full-word copy returns src unchanged. + ASSERT_EQ( + CopyBits({.src = 0b10101011, .dst = 0b00010010, .start = 0, .end = 8}), + 0b10101011); + // uint16_t partial range [4, 12[: dst keeps its bits outside, src fills inside. + ASSERT_EQ( + CopyBits( + {.src = 0b1010101010101010, .dst = 0b0101010101010101, .start = 4, .end = 12}), + 0b0101101010100101); + // uint64_t + ASSERT_EQ(CopyBits({ + .src = 0x0123456789abcdef, + .dst = 0xfedcba9876543210, + .start = 0, + .end = 64, + }), + 0x0123456789abcdef); + // constexpr-evaluable. + static_assert( + CopyBits({.src = 0b10101010, .dst = 0b01010101, .start = 0, .end = 4}) == + 0b01011010); +} + } // namespace arrow From 109723bea4a71e6eb59d63e54a875c93053d24c5 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 15 Jun 2026 15:51:38 +0200 Subject: [PATCH 02/21] Add Get32Bits --- cpp/src/arrow/util/bit_util.h | 18 ++++++++++++++++++ cpp/src/arrow/util/bit_util_test.cc | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index fe8d0b1606cb..8f471a38eb92 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -20,8 +20,10 @@ #include #include #include +#include #include +#include "arrow/util/endian.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" @@ -177,6 +179,22 @@ static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } +/// Read 32 bits starting at bit `offset` into a uint32. +/// +/// The return value in in-memory byte layout matches the source bit-stream +/// identically on little- and big-endian platforms. +/// +/// The caller must guarantee that many bytes are readable. +static inline uint32_t Get32Bits(const uint8_t* bits, uint64_t offset) { + uint64_t buffer = {}; + std::memcpy(&buffer, bits + (offset / 8), BytesForBits(offset % 8 + 32)); + // Interpret the loaded bytes as little-endian, drop the low `offset % 8` bits + // to align LSB-first, then store back in little-endian byte order so the + // result's memory representation matches the bit-stream. + buffer = FromLittleEndian(buffer); + return ToLittleEndian(static_cast(buffer >> (offset % 8))); +} + template struct CopyBitsParams { Uint src = {}; diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index f8715333de19..a04b006edbff 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -726,6 +726,23 @@ TEST(BitUtil, ByteSwap) { EXPECT_EQ(bit_util::ByteSwap(srcd64), expectedd64); } +TEST(BitUtil, Get32Bits) { + const std::array src = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, + 0xde, 0xf0, 0x11, 0x22, 0x33, 0x44}; + + // The result's in-memory bytes must reproduce the source bit-stream slice + // so each result bit must match the corresponding source bit. + for (uint64_t offset = 0; offset <= 24; ++offset) { + ARROW_SCOPED_TRACE("offset = ", offset); + const uint32_t result = bit_util::Get32Bits(src.data(), offset); + for (uint64_t i = 0; i < 32; ++i) { + EXPECT_EQ(bit_util::GetBit(reinterpret_cast(&result), i), + bit_util::GetBit(src.data(), offset + i)) + << "bit " << i; + } + } +} + TEST(BitUtil, Log2) { EXPECT_EQ(bit_util::Log2(1), 0); EXPECT_EQ(bit_util::Log2(2), 1); From c4ee2f9b9cf2c2527a8efda9f3f73271f0dcd817 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 15 Jun 2026 15:58:28 +0200 Subject: [PATCH 03/21] Add RleRun::value --- cpp/src/arrow/util/rle_encoding_internal.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index 7b4f21148089..76b1fa43494e 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include "arrow/util/bpacking_internal.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/ubsan.h" namespace arrow::util { @@ -116,6 +118,9 @@ class RleRun { std::copy(data, data + raw_data_size(value_bit_width), data_.begin()); } + /// The repeated value in the run + uint64_t value() const noexcept { return SafeLoadAs(data_.data()); } + /// The number of repeated values in this run. constexpr rle_size_t values_count() const noexcept { return values_count_; } @@ -132,7 +137,7 @@ class RleRun { private: /// The repeated value raw bytes stored inside the class with enough space to store /// up to a 64 bit value. - std::array data_ = {}; + alignas(8) std::array data_ = {}; /// The number of time the value is repeated. rle_size_t values_count_ = 0; }; From aa9715049c77f558e483e0fedbd301115c7c7fef Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 16 Jun 2026 11:25:01 +0200 Subject: [PATCH 04/21] Add Rle bitmap --- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/rle_bitmap_internal.h | 326 +++++++++++++++++++++++ cpp/src/arrow/util/rle_bitmap_test.cc | 285 ++++++++++++++++++++ 3 files changed, 612 insertions(+) create mode 100644 cpp/src/arrow/util/rle_bitmap_internal.h create mode 100644 cpp/src/arrow/util/rle_bitmap_test.cc diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 28ea215bd235..628e9a4d1c7e 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -99,6 +99,7 @@ add_arrow_test(bit-utility-test bit_util_test.cc bitmap_test.cc bpacking_test.cc + rle_bitmap_test.cc rle_encoding_test.cc test_common.cc) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h new file mode 100644 index 000000000000..e4bfe3bce15d --- /dev/null +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -0,0 +1,326 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/util/bit_util.h" +#include "arrow/util/logging.h" +#include "arrow/util/macros.h" +#include "arrow/util/rle_encoding_internal.h" + +namespace arrow::util { + +template +class BitmapSpan { + public: + using size_type = rle_size_t; + using byte_type = B; + + explicit constexpr BitmapSpan(byte_type* data, size_type bit_start = 0) noexcept + : data_{data}, bit_start_{bit_start} { + Normalize(); + } + + constexpr byte_type* data() const noexcept { return data_; } + + constexpr size_type bit_start() const noexcept { return bit_start_; } + + constexpr BitmapSpan NewStartingAt(size_type bit_start) const noexcept { + auto out = *this; + out.bit_start_ += bit_start; + out.Normalize(); + return out; + } + + private: + byte_type* data_; + size_type bit_start_ = 0; + + /// Ensure `bit_start` always ends up in [0, 8[, even when it starts negative. + constexpr void Normalize() { + // On two's-complement targets an arithmetic right shift floors, and the AND mask + // yields the non-negative remainder (e.g. -1 -> byte -1, offset 7), unlike `/` and + // `%` which truncate toward zero. + data_ += bit_start_ >> 3; + bit_start_ &= 0b0111; + } +}; + +using BitmapSpanMut = BitmapSpan; +using BitmapSpanConst = BitmapSpan; + +namespace internal_rle { +template +class RunToBitMapDecoderMixin { + public: + /// Advance by as many values as provided or until exhaustion of the decoder. + /// Return the number of values skipped. + [[nodiscard]] rle_size_t Advance(rle_size_t batch_size) { + const auto steps = std::min(batch_size, derived()->remaining()); + derived()->AdvanceUnsafe(steps); + ARROW_DCHECK_GE(derived()->remaining(), 0); + return steps; + } + + /// Get the next value and return false if there are no more. + [[nodiscard]] constexpr bool Get(BitmapSpanMut out) { + return derived()->GetBatch(out, 1) == 1; + } + + /// Get a batch of values return the number of decoded elements. + /// + /// May write fewer elements to the output than requested if there are not + /// enough values left. + [[nodiscard]] rle_size_t GetBatch(BitmapSpanMut out, rle_size_t batch_size) { + const auto out_bit_offset = out.bit_start(); + ARROW_DCHECK_GE(out_bit_offset, 0); + ARROW_DCHECK_LT(out_bit_offset, 8); + + if (ARROW_PREDICT_FALSE(derived()->remaining() == 0 || batch_size == 0)) { + return 0; + } + + // HEADER: Writing inside the first byte if caller gives a non-aligned input + if (ARROW_PREDICT_FALSE(out_bit_offset != 0)) { + const auto n_vals = derived()->GetBatchFirstByte(out, batch_size); + // If we exhausted the values in this decoder, or we filled what was required, + // then the following recursive call will return 0. + // If in the opposite case, then we will continue on a byte-aligned boundary. + return n_vals + GetBatch(out.NewStartingAt(n_vals), batch_size - n_vals); + } + + // Writing full bytes + const auto n_vals = derived()->GetBatchFast(out, batch_size); + + // TRAILER: Writing inside the last byte if caller asked for non multiple of 8 values + const auto n_last_vals = batch_size - n_vals; + if (ARROW_PREDICT_FALSE(derived()->remaining() > 0 && n_last_vals > 0)) { + ARROW_DCHECK_GT(n_last_vals, 0); + ARROW_DCHECK_LT(n_last_vals, 8); + out = out.NewStartingAt(n_vals); + return n_vals + derived()->GetBatchFirstByte(out, n_last_vals); + } + + return n_vals; + } + + protected: + [[nodiscard]] rle_size_t GetBatchFromByte(BitmapSpanMut out, rle_size_t batch_size, + uint8_t src) { + const auto out_bit_offset = out.bit_start(); + ARROW_DCHECK_GE(out_bit_offset, 0); + ARROW_DCHECK_LT(out_bit_offset, 8); + ARROW_DCHECK_GT(derived()->remaining(), 0); + ARROW_DCHECK_GE(batch_size, 0); + + // Empty bits in first byte that we can fill + const auto empty_bits = rle_size_t{8} - out_bit_offset; + // Number of bits in first byte that we want to fill + const auto desired_bits = std::min(empty_bits, batch_size); + // Try to advance, and get number of bits we had remaining + const auto n_bits = Advance(desired_bits); + // Copy relevant bits from the value pattern to the output. + *out.data() = bit_util::CopyBits({ + .src = src, + .dst = *out.data(), + .start = static_cast(out_bit_offset), + .end = static_cast(out_bit_offset + n_bits), + }); + + return n_bits; + } + + private: + CRTP* derived() { return static_cast(this); } + CRTP const* derived() const { return static_cast(this); } +}; +} // namespace internal_rle + +class RleRunToBitMapDecoder + : public internal_rle::RunToBitMapDecoderMixin { + public: + /// The type of run that can be decoded. + using RunType = RleRun; + + constexpr RleRunToBitMapDecoder() noexcept = default; + + explicit RleRunToBitMapDecoder(const RunType& run) noexcept { Reset(run); } + + void Reset(const RunType& run) noexcept { + values_left_ = run.values_count(); + if (run.value() == 0) { + value_pattern_ = uint8_t{0}; + } else { + value_pattern_ = uint8_t{0xFF}; + } + } + + /// Return the number of values that can be advanced. + rle_size_t remaining() const { return values_left_; } + + /// Return the repeated value of this decoder. + constexpr bool value() const { return value_pattern_ != 0; } + + private: + friend class internal_rle::RunToBitMapDecoderMixin; + + /// The byte pattern for 8 values (full ones or full zeros). + uint8_t value_pattern_ = {}; + /// Number of values left to decode. + rle_size_t values_left_ = 0; + + void AdvanceUnsafe(rle_size_t batch_size) { values_left_ -= batch_size; } + + /// Get batch values to fill the first incomplete byte of the output. + [[nodiscard]] rle_size_t GetBatchFirstByte(BitmapSpanMut out, rle_size_t batch_size) { + return GetBatchFromByte(out, batch_size, value_pattern_); + } + + /// Get batch in full bytes using memset. + [[nodiscard]] rle_size_t GetBatchFast(BitmapSpanMut out, rle_size_t batch_size) { + ARROW_DCHECK_EQ(out.bit_start(), 0); + const auto n_bytes = std::min(batch_size, remaining()) / 8; + std::memset(out.data(), value_pattern_, n_bytes); + const auto n_vals = 8 * n_bytes; + AdvanceUnsafe(n_vals); + return n_vals; + } +}; + +class BitPackedRunToBitMapDecoder + : public internal_rle::RunToBitMapDecoderMixin { + public: + /// The type of run that can be decoded. + using RunType = BitPackedRun; + + constexpr BitPackedRunToBitMapDecoder() noexcept = default; + + explicit BitPackedRunToBitMapDecoder(const RunType& run) noexcept { Reset(run); } + + void Reset(const RunType& run) noexcept { + data_ = run.raw_data_ptr(); + values_count_ = run.values_count(); + values_read_ = 0; + } + + /// Return the number of values that can be advanced. + constexpr rle_size_t remaining() const { return values_count_ - values_read_; } + + /// Get a batch of values return the number of decoded elements. + /// May write fewer elements to the output than requested if there are not enough values + /// left. + [[nodiscard]] rle_size_t GetBatch(BitmapSpanMut out, rle_size_t batch_size) { + // WARN: Slow case where bit offsets of input and output are not aligned. + // We loose the ability to do memcpy + if (ARROW_PREDICT_FALSE(out.bit_start() != unread_values_bit_offset())) { + return GetBatchMisaligned(out, batch_size); + } + return Base::GetBatch(out, batch_size); + } + + private: + using Base = internal_rle::RunToBitMapDecoderMixin; + friend class internal_rle::RunToBitMapDecoderMixin; + + /// The pointer to the beginning of the run + const uint8_t* data_ = nullptr; + /// The total number of values in the run + rle_size_t values_count_ = 0; + /// The number of values read by the decoder + rle_size_t values_read_ = 0; + + /// Start pointer of the unread values (may contain values already read). + const uint8_t* unread_values_ptr() const noexcept { return data_ + (values_read_ / 8); } + + /// Bit in @ref unread_values_ptr where the unread values start. + rle_size_t unread_values_bit_offset() const noexcept { return values_read_ % 8; } + + void AdvanceUnsafe(rle_size_t batch_size) { values_read_ += batch_size; } + + /// Get batch values to fill the first incomplete byte of the output. + [[nodiscard]] rle_size_t GetBatchFirstByte(BitmapSpanMut out, rle_size_t batch_size) { + return GetBatchFromByte(out, batch_size, *unread_values_ptr()); + } + + /// Get batch in full bytes using memcpy. + [[nodiscard]] rle_size_t GetBatchFast(BitmapSpanMut out, rle_size_t batch_size) { + ARROW_DCHECK_EQ(out.bit_start(), 0); + const auto n_bytes = std::min(batch_size, remaining()) / 8; + std::memcpy(out.data(), unread_values_ptr(), n_bytes); + const auto n_vals = 8 * n_bytes; + AdvanceUnsafe(n_vals); + return n_vals; + } + + /// Correct and slow function for reading a batch one bit at a time. + [[nodiscard]] rle_size_t GetBatchSlow(BitmapSpanMut out, rle_size_t batch_size) { + const rle_size_t to_read = std::min(batch_size, remaining()); + for (rle_size_t i = 0; i < to_read; ++i) { + const bool bit = bit_util::GetBit(data_, values_read_ + i); + bit_util::SetBitTo(out.data(), out.bit_start() + i, bit); + } + AdvanceUnsafe(to_read); + return to_read; + } + + [[nodiscard]] rle_size_t GetBatchMisaligned(BitmapSpanMut out, rle_size_t batch_size) { + const auto out_bit_offset = out.bit_start(); + ARROW_DCHECK_LT(out_bit_offset, 8); + ARROW_DCHECK_GE(out_bit_offset, 0); + + if (ARROW_PREDICT_FALSE(remaining() == 0 || batch_size == 0)) { + return 0; + } + + const rle_size_t to_read = std::min(batch_size, remaining()); + rle_size_t read = 0; + + // HEADER: copy bits one by one unit the output is byte-aligned + const rle_size_t to_read_until_aligned = (8 - out_bit_offset) % 8; + const rle_size_t to_read_header = std::min(to_read, to_read_until_aligned); + const auto read_header = GetBatchSlow(out, to_read_header); + // We ensured there was enough capacity + ARROW_DCHECK_EQ(read_header, to_read_header); + read += read_header; + out = out.NewStartingAt(read_header); + // Either we are done or we have aligned the output values on a byte. + ARROW_DCHECK(read == to_read || read == to_read_until_aligned); + + // Main loop, copy 32 bits at the time + const rle_size_t n_batches = (to_read - read) / 32; + for (rle_size_t i = 0; i < n_batches; ++i) { + const auto bits = bit_util::Get32Bits(data_, values_read_ + 32 * i); + std::memcpy(out.data(), &bits, sizeof(bits)); + out = out.NewStartingAt(8 * sizeof(bits)); + } + const rle_size_t read_main = n_batches * 32; + AdvanceUnsafe(read_main); + read += read_main; + + // TRAILER: copy remaining bits one by one + const auto read_trailer = GetBatchSlow(out, to_read - read); + read += read_trailer; + ARROW_DCHECK_EQ(read, to_read); + + return to_read; + } +}; + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc new file mode 100644 index 000000000000..cc468d677167 --- /dev/null +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -0,0 +1,285 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include + +#include "arrow/testing/gtest_util.h" +#include "arrow/util/bit_util.h" +#include "arrow/util/rle_bitmap_internal.h" +#include "arrow/util/rle_encoding_internal.h" + +namespace arrow::util { + +namespace { + +// Expand a list of bytes into the bit-packed (LSB-first) sequence of booleans +// they encode, keeping only the first `count` bits. +std::vector BitsFromBytes(const std::vector& bytes, rle_size_t count) { + std::vector bits(count); + for (rle_size_t i = 0; i < count; ++i) { + bits[i] = bit_util::GetBit(bytes.data(), i); + } + return bits; +} + +// Skip the first `skip` values with Advance(), then decode the rest of the run +// into a contiguous output bitmap, in successive calls of at most `chunk` values +// each, and compare the result against `expected`. +// +// This drives both decoder types through their byte-aligned and bit-unaligned +// code paths depending on `chunk`: as soon as `chunk` is not a multiple of 8, +// later calls land on a non-zero output bit offset. +// +// A non-zero `skip` additionally desynchronizes the decoder's read offset from +// the (byte-aligned) output offset, which exercises the bit-unaligned +// (GetBatchMisaligned) path of BitPackedRunToBitMapDecoder. With `skip == 0` the +// read and output offsets stay in lockstep and only the aligned path is used. +template +void CheckChunkedDecode(const typename Decoder::RunType& run, + const std::vector& expected, rle_size_t chunk, + rle_size_t skip = 0) { + ARROW_SCOPED_TRACE("chunk = ", chunk, ", skip = ", skip); + const auto n = static_cast(expected.size()); + ASSERT_LE(skip, n); + + Decoder decoder(run); + ASSERT_EQ(decoder.Advance(skip), skip); + const auto rest = n - skip; + + // Output buffer with one guard byte to catch out-of-bounds writes. + std::vector out(static_cast(bit_util::BytesForBits(rest)) + 1, 0); + const uint8_t guard = 0xA5; + out.back() = guard; + + rle_size_t pos = 0; + while (pos < rest) { + const auto want = std::min(chunk, rest - pos); + const auto got = decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/pos), want); + EXPECT_EQ(got, want) << "at pos " << pos; + pos += got; + EXPECT_EQ(decoder.remaining(), rest - pos); + } + + EXPECT_EQ(decoder.remaining(), 0); + EXPECT_EQ(out.back(), guard) << "decoder wrote past the end of the output"; + for (rle_size_t i = 0; i < rest; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[skip + i]) << "at bit " << i; + } +} + +// The full battery of checks shared by both decoder types. `expected` is the +// full sequence of booleans the run is supposed to decode to. +template +void CheckBitMapDecoder(const typename Decoder::RunType& run, + const std::vector& expected) { + const auto n = static_cast(expected.size()); + // The sub-checks below (Advance, single Get, several chunk sizes) assume a + // run with a handful of values to exercise. + ASSERT_GE(n, 9); + + // remaining() reflects the run size before any value is read. + { + Decoder decoder(run); + EXPECT_EQ(decoder.remaining(), n); + } + + // Empty requests are a no-op. + { + Decoder decoder(run); + uint8_t out = 0; + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&out), /*batch_size=*/0), 0); + EXPECT_EQ(decoder.remaining(), n); + } + + // Decode the whole run with various chunk sizes: aligned (8), unaligned + // (1, 3, 7, 9) and all-at-once. + for (const rle_size_t chunk : + {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, rle_size_t{8}, rle_size_t{9}, n}) { + CheckChunkedDecode(run, expected, chunk); + // Same, but skipping the first 1..7 values first so the decoder's read + // offset no longer matches the byte-aligned output (driving the + // bit-unaligned path for decoders that have one). + for (rle_size_t skip = 1; skip < 8 && skip < n; ++skip) { + CheckChunkedDecode(run, expected, chunk, skip); + } + } + + // Get() one value at a time, then read past the end. + { + Decoder decoder(run); + std::vector out(static_cast(bit_util::BytesForBits(n)) + 1, 0); + for (rle_size_t i = 0; i < n; ++i) { + EXPECT_TRUE(decoder.Get(BitmapSpanMut(out.data(), /*bit_start=*/i))); + EXPECT_EQ(decoder.remaining(), n - i - 1); + } + // Exhausted: nothing more can be read or advanced. + EXPECT_FALSE(decoder.Get(BitmapSpanMut(out.data()))); + EXPECT_EQ(decoder.Advance(1), 0); + EXPECT_EQ(decoder.remaining(), 0); + for (rle_size_t i = 0; i < n; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; + } + } + + // Advance() skips values; the remainder still decodes correctly and + // contiguously from the skipped position. + { + Decoder decoder(run); + const rle_size_t skip = 5; + EXPECT_EQ(decoder.Advance(skip), skip); + EXPECT_EQ(decoder.remaining(), n - skip); + + std::vector out(static_cast(bit_util::BytesForBits(n)) + 1, 0); + rle_size_t pos = skip; + while (pos < n) { + const auto got = + decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/pos), n - pos); + EXPECT_GT(got, 0); + pos += got; + } + EXPECT_EQ(decoder.remaining(), 0); + for (rle_size_t i = skip; i < n; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; + } + } + + // Advancing more than available stops at the run boundary. + { + Decoder decoder(run); + EXPECT_EQ(decoder.Advance(n + 100), n); + EXPECT_EQ(decoder.remaining(), 0); + } + + // Reset() rewinds the decoder so the run can be decoded again. + { + Decoder decoder(run); + std::vector scratch(static_cast(bit_util::BytesForBits(n)), 0); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(scratch.data()), n), n); + EXPECT_EQ(decoder.remaining(), 0); + + decoder.Reset(run); + EXPECT_EQ(decoder.remaining(), n); + std::vector out(static_cast(bit_util::BytesForBits(n)), 0); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n), n); + for (rle_size_t i = 0; i < n; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; + } + } +} + +} // namespace + +// --------------------------------------------------------------------------- +// RleRunToBitMapDecoder +// --------------------------------------------------------------------------- + +struct RleBitMapCase { + // The repeated boolean value of the run. + bool value; + // The number of values in the run. + rle_size_t count; +}; + +class RleRunToBitMapDecoderTest : public ::testing::TestWithParam {}; + +TEST_P(RleRunToBitMapDecoderTest, Decode) { + const auto& param = GetParam(); + + // A boolean RLE run stores its value in a single (1-bit-wide) byte. + const uint8_t data = param.value ? 1 : 0; + const auto run = RleRun(&data, param.count, /*value_bit_width=*/1); + + // value() reports the repeated boolean. + { + RleRunToBitMapDecoder decoder(run); + EXPECT_EQ(decoder.value(), param.value); + } + + const std::vector expected(param.count, param.value); + CheckBitMapDecoder(run, expected); +} + +INSTANTIATE_TEST_SUITE_P(RleBitmap, RleRunToBitMapDecoderTest, + ::testing::Values(RleBitMapCase{/*value=*/false, /*count=*/9}, + RleBitMapCase{/*value=*/true, /*count=*/9}, + RleBitMapCase{/*value=*/false, /*count=*/13}, + RleBitMapCase{/*value=*/true, /*count=*/13}, + RleBitMapCase{/*value=*/false, /*count=*/64}, + RleBitMapCase{/*value=*/true, /*count=*/64}, + RleBitMapCase{/*value=*/false, /*count=*/100}, + RleBitMapCase{/*value=*/true, /*count=*/100}, + RleBitMapCase{/*value=*/true, /*count=*/1000}), + [](const ::testing::TestParamInfo& info) { + return std::string(info.param.value ? "true_" : "false_") + + std::to_string(info.param.count); + }); + +// --------------------------------------------------------------------------- +// BitPackedRunToBitMapDecoder +// --------------------------------------------------------------------------- + +struct BitPackedBitMapCase { + std::string name; + // The raw bit-packed bytes (LSB first). Must hold at least `count` bits. + std::vector bytes; + // The number of values in the run. + rle_size_t count; +}; + +class BitPackedRunToBitMapDecoderTest + : public ::testing::TestWithParam {}; + +TEST_P(BitPackedRunToBitMapDecoderTest, Decode) { + const auto& param = GetParam(); + ASSERT_GE(param.bytes.size(), static_cast(bit_util::BytesForBits(param.count))); + + const auto run = BitPackedRun(param.bytes.data(), param.count, /*value_bit_width=*/1, + /*max_read_bytes=*/-1); + + const std::vector expected = BitsFromBytes(param.bytes, param.count); + CheckBitMapDecoder(run, expected); +} + +INSTANTIATE_TEST_SUITE_P( + RleBitmap, BitPackedRunToBitMapDecoderTest, + ::testing::Values(BitPackedBitMapCase{/*name=*/"alternating", + /*bytes=*/{0b10101010, 0b10101010}, + /*count=*/13}, + BitPackedBitMapCase{/*name=*/"all_zeros", + /*bytes=*/{0x00, 0x00}, + /*count=*/16}, + BitPackedBitMapCase{/*name=*/"all_ones", + /*bytes=*/{0xFF, 0xFF}, + /*count=*/16}, + BitPackedBitMapCase{/*name=*/"mixed", + /*bytes=*/{0b11001010, 0b00001111, 0b10110001}, + /*count=*/24}, + BitPackedBitMapCase{/*name=*/"unaligned_count", + /*bytes=*/{0b00110101, 0b11100100}, + /*count=*/11}, + BitPackedBitMapCase{/*name=*/"large", + /*bytes=*/std::vector(16, 0b01101001), + /*count=*/128}), + [](const ::testing::TestParamInfo& info) { + return info.param.name; + }); + +} // namespace arrow::util From 00ef6d6284414160a86bb58332cd571d324cc3cb Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 16 Jun 2026 16:05:03 +0200 Subject: [PATCH 05/21] Simplify parser decoder maping --- cpp/src/arrow/util/rle_encoding_internal.h | 65 ++++++++++++---------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index 76b1fa43494e..9a4d9b0aa8de 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -104,10 +104,6 @@ class RleRunDecoder; /// 10 % on some benchmarks. class RleRun { public: - /// The decoder class used to decode a single run in the given type. - template - using DecoderType = RleRunDecoder; - constexpr RleRun() noexcept = default; explicit RleRun(const uint8_t* data, rle_size_t values_count, @@ -155,10 +151,6 @@ class BitPackedRunDecoder; /// 10 % on some benchmarks. class BitPackedRun { public: - /// The decoder class used to decode a single run in the given type. - template - using DecoderType = BitPackedRunDecoder; - constexpr BitPackedRun() noexcept = default; constexpr BitPackedRun(const uint8_t* data, rle_size_t values_count, @@ -245,6 +237,10 @@ class RleBitPackedParser { template void Parse(Handler&& handler); + /// Call the parser with a single callable for all event types. + template + void ParseWithCallable(Callable&& func); + private: /// The pointer to the beginning of the run const uint8_t* data_ = nullptr; @@ -494,6 +490,20 @@ class RleBitPackedDecoder { rle_size_t null_count, const uint8_t* valid_bits, int64_t valid_bits_offset); private: + /// Utility to map a run type to the associate decoder. + template + struct get_decoder; + template <> + struct get_decoder { + using type = RleRunDecoder; + }; + template <> + struct get_decoder { + using type = BitPackedRunDecoder; + }; + template + using get_decoder_t = get_decoder::type; + RleBitPackedParser parser_ = {}; std::variant, BitPackedRunDecoder> decoder_ = {}; rle_size_t value_bit_width_; @@ -510,10 +520,6 @@ class RleBitPackedDecoder { decoder_); } - /// Call the parser with a single callable for all event types. - template - void ParseWithCallable(Callable&& func); - /// Utility methods for retrieving spaced values. template [[nodiscard]] rle_size_t GetSpaced(Converter converter, @@ -675,6 +681,17 @@ void RleBitPackedParser::Parse(Handler&& handler) { } } +template +void RleBitPackedParser::ParseWithCallable(Callable&& func) { + struct { + Callable func; + auto OnBitPackedRun(BitPackedRun run) { return func(std::move(run)); } + auto OnRleRun(RleRun run) { return func(std::move(run)); } + } handler{std::move(func)}; + + return Parse(std::move(handler)); +} + namespace internal { /// The maximal unsigned size that a variable can fit. template @@ -763,18 +780,6 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const * RleBitPackedDecoder * *************************/ -template -template -void RleBitPackedDecoder::ParseWithCallable(Callable&& func) { - struct { - Callable func; - auto OnBitPackedRun(BitPackedRun run) { return func(std::move(run)); } - auto OnRleRun(RleRun run) { return func(std::move(run)); } - } handler{std::move(func)}; - - parser_.Parse(std::move(handler)); -} - template bool RleBitPackedDecoder::Get(value_type* val) { return GetBatch(val, 1) == 1; @@ -800,8 +805,8 @@ auto RleBitPackedDecoder::GetBatch(value_type* out, ARROW_DCHECK(run_remaining() == 0); } - ParseWithCallable([&](auto run) { - using RunDecoder = typename decltype(run)::template DecoderType; + parser_.ParseWithCallable([&](auto run) { + using RunDecoder = get_decoder_t; ARROW_DCHECK_LT(values_read, batch_size); RunDecoder decoder(run, value_bit_width_); @@ -1113,8 +1118,8 @@ auto RleBitPackedDecoder::GetSpaced(Converter converter, ARROW_DCHECK(run_remaining() == 0); } - ParseWithCallable([&](auto run) { - using RunDecoder = typename decltype(run)::template DecoderType; + parser_.ParseWithCallable([&](auto run) { + using RunDecoder = get_decoder_t; RunDecoder decoder(run, value_bit_width_); @@ -1295,8 +1300,8 @@ auto RleBitPackedDecoder::GetBatchWithDict(const V* dictionary, ARROW_DCHECK(run_remaining() == 0); } - ParseWithCallable([&](auto run) { - using RunDecoder = typename decltype(run)::template DecoderType; + parser_.ParseWithCallable([&](auto run) { + using RunDecoder = get_decoder_t; RunDecoder decoder(run, value_bit_width_); From cf5ac4e0c689b7762cf35d31e72be1be1f04d7a2 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 16 Jun 2026 16:59:40 +0200 Subject: [PATCH 06/21] Add BitPackedRunToBitMapDecoder --- cpp/src/arrow/util/rle_bitmap_internal.h | 110 +++++++- cpp/src/arrow/util/rle_bitmap_test.cc | 321 ++++++++++++++++++++--- 2 files changed, 386 insertions(+), 45 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index e4bfe3bce15d..3995fbdc038b 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -110,9 +110,8 @@ class RunToBitMapDecoderMixin { const auto n_vals = derived()->GetBatchFast(out, batch_size); // TRAILER: Writing inside the last byte if caller asked for non multiple of 8 values - const auto n_last_vals = batch_size - n_vals; - if (ARROW_PREDICT_FALSE(derived()->remaining() > 0 && n_last_vals > 0)) { - ARROW_DCHECK_GT(n_last_vals, 0); + const auto n_last_vals = std::min(batch_size - n_vals, derived()->remaining()); + if (ARROW_PREDICT_FALSE(n_last_vals > 0)) { ARROW_DCHECK_LT(n_last_vals, 8); out = out.NewStartingAt(n_vals); return n_vals + derived()->GetBatchFirstByte(out, n_last_vals); @@ -323,4 +322,109 @@ class BitPackedRunToBitMapDecoder } }; +/// A specialized decoder class to extract RLE+bitpacked booleans. +/// +/// In some cases, such as when reading definition levels for nullable values (with +/// no repetition and no nesting), we know values to be decoded will end up in an +/// Arrow validity bitmap. In such cases, decoding values to a ``int16`` before +/// encoding them again in overly wasteful. +class RleBitPackedToBitMapDecoder { + public: + RleBitPackedToBitMapDecoder() noexcept = default; + + /// Create a decoder object. + /// + /// data and data_size are the raw bytes to decode. + RleBitPackedToBitMapDecoder(const uint8_t* data, rle_size_t data_size) noexcept { + Reset(data, data_size); + } + + void Reset(const uint8_t* data, rle_size_t data_size) noexcept { + parser_.Reset(data, data_size, /* value_bit_width= */ 1); + decoder_ = {}; + } + + /// Whether there is still runs to iterate over. + bool exhausted() const { return (run_remaining() == 0) && parser_.exhausted(); } + + /// Get a batch of values return the number of decoded elements. + /// May write fewer elements to the output than requested if there are not enough + /// values left or if an error occurred. + [[nodiscard]] rle_size_t GetBatch(BitmapSpanMut out, rle_size_t batch_size); + + private: + /// Utility to map a run type to the associate decoder. + template + struct get_decoder; + template <> + struct get_decoder { + using type = RleRunToBitMapDecoder; + }; + template <> + struct get_decoder { + using type = BitPackedRunToBitMapDecoder; + }; + template + using get_decoder_t = get_decoder::type; + + RleBitPackedParser parser_ = {}; + std::variant decoder_ = {}; + + /// Return the number of values that are remaining in the current run. + rle_size_t run_remaining() const { + return std::visit([](const auto& dec) { return dec.remaining(); }, decoder_); + } + + /// Get a batch of values from the current run and return the number elements read. + [[nodiscard]] rle_size_t RunGetBatch(BitmapSpanMut out, rle_size_t batch_size) { + return std::visit([&](auto& dec) { return dec.GetBatch(out, batch_size); }, decoder_); + } +}; + +/************************************************ + * RleBitPackedToBitMapDecoder implementation * + ************************************************/ + +inline auto RleBitPackedToBitMapDecoder::GetBatch(BitmapSpanMut out, + rle_size_t batch_size) -> rle_size_t { + using ControlFlow = RleBitPackedParser::ControlFlow; + + rle_size_t values_read = 0; + + // Remaining from a previous call that would have left some unread data from a run. + if (ARROW_PREDICT_FALSE(run_remaining() > 0)) { + const auto read = RunGetBatch(out, batch_size); + values_read += read; + + // Either we fulfilled all the batch to be read or we finished remaining run. + if (ARROW_PREDICT_FALSE(values_read == batch_size)) { + return values_read; + } + ARROW_DCHECK(run_remaining() == 0); + } + + parser_.ParseWithCallable([&](auto run) { + using RunDecoder = get_decoder_t; + + ARROW_DCHECK_LT(values_read, batch_size); + RunDecoder decoder(run); + // The output span carries its own bit offset, so advancing it past the values + // already written keeps successive runs correctly aligned in the bitmap. + const auto read = + decoder.GetBatch(out.NewStartingAt(values_read), batch_size - values_read); + ARROW_DCHECK_LE(read, batch_size - values_read); + values_read += read; + + // Stop reading and store remaining decoder + if (ARROW_PREDICT_FALSE(values_read == batch_size || read == 0)) { + decoder_ = std::move(decoder); + return ControlFlow::Break; + } + + return ControlFlow::Continue; + }); + + return values_read; +} + } // namespace arrow::util diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index cc468d677167..e813d8911bed 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -57,25 +57,27 @@ void CheckChunkedDecode(const typename Decoder::RunType& run, const std::vector& expected, rle_size_t chunk, rle_size_t skip = 0) { ARROW_SCOPED_TRACE("chunk = ", chunk, ", skip = ", skip); - const auto n = static_cast(expected.size()); - ASSERT_LE(skip, n); + const auto n_vals = static_cast(expected.size()); + ASSERT_LE(skip, n_vals); Decoder decoder(run); ASSERT_EQ(decoder.Advance(skip), skip); - const auto rest = n - skip; + const auto rest = n_vals - skip; // Output buffer with one guard byte to catch out-of-bounds writes. std::vector out(static_cast(bit_util::BytesForBits(rest)) + 1, 0); const uint8_t guard = 0xA5; out.back() = guard; - rle_size_t pos = 0; - while (pos < rest) { - const auto want = std::min(chunk, rest - pos); - const auto got = decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/pos), want); - EXPECT_EQ(got, want) << "at pos " << pos; - pos += got; - EXPECT_EQ(decoder.remaining(), rest - pos); + rle_size_t read = 0; + while (read < rest) { + const auto want = std::min(chunk, rest - read); + const auto got = + decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/read), want); + EXPECT_EQ(got, want) << "at pos " << read; + ASSERT_GT(got, 0) << "at pos " << read; // break on failure + read += got; + EXPECT_EQ(decoder.remaining(), rest - read); } EXPECT_EQ(decoder.remaining(), 0); @@ -90,15 +92,15 @@ void CheckChunkedDecode(const typename Decoder::RunType& run, template void CheckBitMapDecoder(const typename Decoder::RunType& run, const std::vector& expected) { - const auto n = static_cast(expected.size()); + const auto n_vals = static_cast(expected.size()); // The sub-checks below (Advance, single Get, several chunk sizes) assume a // run with a handful of values to exercise. - ASSERT_GE(n, 9); + ASSERT_GE(n_vals, 9); // remaining() reflects the run size before any value is read. { Decoder decoder(run); - EXPECT_EQ(decoder.remaining(), n); + EXPECT_EQ(decoder.remaining(), n_vals); } // Empty requests are a no-op. @@ -106,18 +108,18 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, Decoder decoder(run); uint8_t out = 0; EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&out), /*batch_size=*/0), 0); - EXPECT_EQ(decoder.remaining(), n); + EXPECT_EQ(decoder.remaining(), n_vals); } // Decode the whole run with various chunk sizes: aligned (8), unaligned // (1, 3, 7, 9) and all-at-once. - for (const rle_size_t chunk : - {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, rle_size_t{8}, rle_size_t{9}, n}) { + for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, + rle_size_t{8}, rle_size_t{9}, n_vals}) { CheckChunkedDecode(run, expected, chunk); // Same, but skipping the first 1..7 values first so the decoder's read // offset no longer matches the byte-aligned output (driving the // bit-unaligned path for decoders that have one). - for (rle_size_t skip = 1; skip < 8 && skip < n; ++skip) { + for (rle_size_t skip = 1; skip < 8 && skip < n_vals; ++skip) { CheckChunkedDecode(run, expected, chunk, skip); } } @@ -125,16 +127,16 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, // Get() one value at a time, then read past the end. { Decoder decoder(run); - std::vector out(static_cast(bit_util::BytesForBits(n)) + 1, 0); - for (rle_size_t i = 0; i < n; ++i) { + std::vector out(static_cast(bit_util::BytesForBits(n_vals)) + 1, 0); + for (rle_size_t i = 0; i < n_vals; ++i) { EXPECT_TRUE(decoder.Get(BitmapSpanMut(out.data(), /*bit_start=*/i))); - EXPECT_EQ(decoder.remaining(), n - i - 1); + EXPECT_EQ(decoder.remaining(), n_vals - i - 1); } // Exhausted: nothing more can be read or advanced. EXPECT_FALSE(decoder.Get(BitmapSpanMut(out.data()))); EXPECT_EQ(decoder.Advance(1), 0); EXPECT_EQ(decoder.remaining(), 0); - for (rle_size_t i = 0; i < n; ++i) { + for (rle_size_t i = 0; i < n_vals; ++i) { EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; } } @@ -145,18 +147,22 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, Decoder decoder(run); const rle_size_t skip = 5; EXPECT_EQ(decoder.Advance(skip), skip); - EXPECT_EQ(decoder.remaining(), n - skip); + EXPECT_EQ(decoder.remaining(), n_vals - skip); - std::vector out(static_cast(bit_util::BytesForBits(n)) + 1, 0); - rle_size_t pos = skip; - while (pos < n) { + std::vector out(static_cast(bit_util::BytesForBits(n_vals)) + 1, 0); + rle_size_t read = skip; + while (read < n_vals) { const auto got = - decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/pos), n - pos); - EXPECT_GT(got, 0); - pos += got; + decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/read), n_vals - read); + ASSERT_GT(got, 0); // break on failure + read += got; } EXPECT_EQ(decoder.remaining(), 0); - for (rle_size_t i = skip; i < n; ++i) { + // Bits before the skipped position must be left untouched (still zero). + for (rle_size_t i = 0; i < skip; ++i) { + EXPECT_FALSE(bit_util::GetBit(out.data(), i)) << "clobbered bit " << i; + } + for (rle_size_t i = skip; i < n_vals; ++i) { EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; } } @@ -164,22 +170,22 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, // Advancing more than available stops at the run boundary. { Decoder decoder(run); - EXPECT_EQ(decoder.Advance(n + 100), n); + EXPECT_EQ(decoder.Advance(n_vals + 100), n_vals); EXPECT_EQ(decoder.remaining(), 0); } // Reset() rewinds the decoder so the run can be decoded again. { Decoder decoder(run); - std::vector scratch(static_cast(bit_util::BytesForBits(n)), 0); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(scratch.data()), n), n); + std::vector scratch(static_cast(bit_util::BytesForBits(n_vals)), 0); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(scratch.data()), n_vals), n_vals); EXPECT_EQ(decoder.remaining(), 0); decoder.Reset(run); - EXPECT_EQ(decoder.remaining(), n); - std::vector out(static_cast(bit_util::BytesForBits(n)), 0); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n), n); - for (rle_size_t i = 0; i < n; ++i) { + EXPECT_EQ(decoder.remaining(), n_vals); + std::vector out(static_cast(bit_util::BytesForBits(n_vals)), 0); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n_vals), n_vals); + for (rle_size_t i = 0; i < n_vals; ++i) { EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; } } @@ -187,9 +193,9 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, } // namespace -// --------------------------------------------------------------------------- -// RleRunToBitMapDecoder -// --------------------------------------------------------------------------- +/*************************** + * RleRunToBitMapDecoder * + ***************************/ struct RleBitMapCase { // The repeated boolean value of the run. @@ -232,9 +238,9 @@ INSTANTIATE_TEST_SUITE_P(RleBitmap, RleRunToBitMapDecoderTest, std::to_string(info.param.count); }); -// --------------------------------------------------------------------------- -// BitPackedRunToBitMapDecoder -// --------------------------------------------------------------------------- +/********************************* + * BitPackedRunToBitMapDecoder * + *********************************/ struct BitPackedBitMapCase { std::string name; @@ -282,4 +288,235 @@ INSTANTIATE_TEST_SUITE_P( return info.param.name; }); +/********************************* + * RleBitPackedToBitMapDecoder * + *********************************/ + +namespace { + +// Append the LEB128 (unsigned, little-endian base-128) encoding of `value`. +void AppendLeb128(std::vector& out, uint32_t value) { + uint8_t buf[bit_util::kMaxLEB128ByteLenFor]; + const auto n_vals = + bit_util::WriteLEB128(value, buf, static_cast(sizeof(buf))); + ASSERT_GT(n_vals, 0); + out.insert(out.end(), buf, buf + n_vals); +} + +// Append an RLE run of `count` copies of `value` (1-bit values) to the encoded +// `bytes` stream and to the `expected` decoded sequence. +void AppendRleRun(std::vector& bytes, std::vector& expected, bool value, + rle_size_t count) { + AppendLeb128(bytes, static_cast(count) << 1); // low bit 0 => RLE + bytes.push_back(value ? 1 : 0); + expected.insert(expected.end(), count, value); +} + +// Append a bit-packed run holding `packed.size()` groups of 8 (1-bit) values, +// LSB first, to the encoded `bytes` stream and to the `expected` sequence. +void AppendBitPackedRun(std::vector& bytes, std::vector& expected, + const std::vector& packed) { + const auto groups = static_cast(packed.size()); + AppendLeb128(bytes, (static_cast(groups) << 1) | 1); // low bit 1 => packed + bytes.insert(bytes.end(), packed.begin(), packed.end()); + for (rle_size_t i = 0; i < groups * 8; ++i) { + expected.push_back(bit_util::GetBit(packed.data(), i)); + } +} + +// Decode the whole `bytes` stream into a bitmap starting at bit offset +// `out_offset`, in successive GetBatch calls of at most `chunk` values, and +// compare against `expected`. +// +// Decoding in small, byte-unaligned chunks and (with a non-zero `out_offset`) +// at a non-aligned output position exercises how the decoder threads its output +// bit offset across runs that do not end on a byte boundary. +void CheckRleBitPackedDecode(const std::vector& bytes, + const std::vector& expected, rle_size_t chunk, + rle_size_t out_offset = 0) { + ARROW_SCOPED_TRACE("chunk = ", chunk, ", out_offset = ", out_offset); + const auto n_vals = static_cast(expected.size()); + + RleBitPackedToBitMapDecoder decoder(bytes.data(), + static_cast(bytes.size())); + EXPECT_EQ(decoder.exhausted(), n_vals == 0); + + // Output buffer with one guard byte to catch out-of-bounds writes. + std::vector out( + static_cast(bit_util::BytesForBits(out_offset + n_vals)) + 1, 0); + const uint8_t guard = 0xA5; + out.back() = guard; + + rle_size_t read = 0; + while (read < n_vals) { + const auto want = std::min(chunk, n_vals - read); + const auto got = decoder.GetBatch( + BitmapSpanMut(out.data(), /*bit_start=*/out_offset + read), want); + EXPECT_EQ(got, want) << "at pos " << read; + ASSERT_GT(got, 0) << "at pos " << read; // break on failure + read += got; + } + + EXPECT_EQ(read, n_vals); + EXPECT_TRUE(decoder.exhausted()); + // Reading past the end yields nothing and leaves the decoder exhausted. + uint8_t scratch = 0; + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&scratch), 8), 0); + EXPECT_TRUE(decoder.exhausted()); + + EXPECT_EQ(out.back(), guard) << "decoder wrote past the end of the output"; + // Bits before the output offset must be left untouched (still zero). + for (rle_size_t i = 0; i < out_offset; ++i) { + EXPECT_FALSE(bit_util::GetBit(out.data(), i)) << "clobbered bit " << i; + } + for (rle_size_t i = 0; i < n_vals; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), out_offset + i), expected[i]) + << "at bit " << i; + } +} + +// Run the decode check over a battery of chunk sizes and output offsets. +void CheckRleBitPackedToBitMap(const std::vector& bytes, + const std::vector& expected) { + const auto n = static_cast(expected.size()); + ASSERT_GT(n, 0); + for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, + rle_size_t{8}, rle_size_t{9}, rle_size_t{33}, n}) { + CheckRleBitPackedDecode(bytes, expected, chunk); + // A non-zero output offset forces the first run to start at a non-byte + // aligned output position. + for (rle_size_t out_offset = 1; out_offset < 8; ++out_offset) { + CheckRleBitPackedDecode(bytes, expected, chunk, out_offset); + } + } +} + +} // namespace + +TEST(RleBitPackedToBitMapDecoder, Empty) { + // A default-constructed decoder is already exhausted. + RleBitPackedToBitMapDecoder decoder; + EXPECT_TRUE(decoder.exhausted()); + uint8_t out = 0; + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&out), 8), 0); + + // So is one reset on an empty buffer. + decoder.Reset(nullptr, 0); + EXPECT_TRUE(decoder.exhausted()); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&out), 8), 0); +} + +TEST(RleBitPackedToBitMapDecoder, SingleRleZeros) { + std::vector bytes; + std::vector expected; + AppendRleRun(bytes, expected, /*value=*/false, /*count=*/100); + CheckRleBitPackedToBitMap(bytes, expected); +} + +TEST(RleBitPackedToBitMapDecoder, SingleRleOnes) { + std::vector bytes; + std::vector expected; + AppendRleRun(bytes, expected, /*value=*/true, /*count=*/100); + CheckRleBitPackedToBitMap(bytes, expected); +} + +TEST(RleBitPackedToBitMapDecoder, SingleBitPacked) { + std::vector bytes; + std::vector expected; + AppendBitPackedRun(bytes, expected, {0b10101010, 0b11001100, 0b11110000}); + CheckRleBitPackedToBitMap(bytes, expected); +} + +TEST(RleBitPackedToBitMapDecoder, MixedRunsAligned) { + // All runs end on a byte boundary, so each run starts byte-aligned in the + // output. + std::vector bytes; + std::vector expected; + AppendRleRun(bytes, expected, /*value=*/false, /*count=*/16); + AppendBitPackedRun(bytes, expected, {0b10101010, 0b01010101}); + AppendRleRun(bytes, expected, /*value=*/true, /*count=*/64); + AppendBitPackedRun(bytes, expected, {0b00001111}); + CheckRleBitPackedToBitMap(bytes, expected); +} + +TEST(RleBitPackedToBitMapDecoder, MixedRunsUnaligned) { + // RLE runs with counts that are not multiples of 8 make each following run + // start at a non-byte-aligned output position. + std::vector bytes; + std::vector expected; + AppendRleRun(bytes, expected, /*value=*/true, /*count=*/13); + AppendBitPackedRun(bytes, expected, {0b01101001, 0b10010110}); + AppendRleRun(bytes, expected, /*value=*/false, /*count=*/5); + AppendRleRun(bytes, expected, /*value=*/true, /*count=*/200); + AppendBitPackedRun(bytes, expected, {0b11110000}); + AppendRleRun(bytes, expected, /*value=*/false, /*count=*/3); + AppendBitPackedRun(bytes, expected, {0b10110001, 0b00011101}); + CheckRleBitPackedToBitMap(bytes, expected); +} + +TEST(RleBitPackedToBitMapDecoder, ReadPastEnd) { + std::vector bytes; + std::vector expected; + AppendRleRun(bytes, expected, /*value=*/true, /*count=*/10); + AppendBitPackedRun(bytes, expected, {0b10110010}); + const auto n = static_cast(expected.size()); + + RleBitPackedToBitMapDecoder decoder(bytes.data(), + static_cast(bytes.size())); + std::vector out(static_cast(bit_util::BytesForBits(n)) + 1, 0); + // Requesting more values than available produces only the available ones. + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n + 100), n); + EXPECT_TRUE(decoder.exhausted()); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), 10), 0); + for (rle_size_t i = 0; i < n; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; + } +} + +TEST(RleBitPackedToBitMapDecoder, Reset) { + std::vector bytes; + std::vector expected; + AppendRleRun(bytes, expected, /*value=*/true, /*count=*/13); + AppendBitPackedRun(bytes, expected, {0b01101001, 0b10010110}); + AppendRleRun(bytes, expected, /*value=*/false, /*count=*/20); + const auto n = static_cast(expected.size()); + const auto size = static_cast(bytes.size()); + + RleBitPackedToBitMapDecoder decoder(bytes.data(), size); + std::vector out(static_cast(bit_util::BytesForBits(n)), 0); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n), n); + EXPECT_TRUE(decoder.exhausted()); + + // Reset rewinds the decoder so the same buffer decodes again. + decoder.Reset(bytes.data(), size); + EXPECT_FALSE(decoder.exhausted()); + std::vector out2(static_cast(bit_util::BytesForBits(n)), 0); + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out2.data()), n), n); + EXPECT_TRUE(decoder.exhausted()); + for (rle_size_t i = 0; i < n; ++i) { + EXPECT_EQ(bit_util::GetBit(out2.data(), i), expected[i]) << "at bit " << i; + } +} + +TEST(RleBitPackedToBitMapDecoder, Truncated) { + // GH-style malformed input: a bit-packed run declares more groups than the + // buffer holds. The decoder yields what it can and is not exhausted. + std::vector bytes; + std::vector expected; + AppendRleRun(bytes, expected, /*value=*/true, /*count=*/10); + // Declare 4 groups (32 values) of bit-packed data but provide only 1 byte. + AppendLeb128(bytes, (4u << 1) | 1); + bytes.push_back(0b10101010); + + RleBitPackedToBitMapDecoder decoder(bytes.data(), + static_cast(bytes.size())); + std::vector out(16, 0); + // The RLE run decodes fully; the truncated bit-packed run cannot be parsed. + EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), 1000), 10); + EXPECT_FALSE(decoder.exhausted()); + for (rle_size_t i = 0; i < 10; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), i), true) << "at bit " << i; + } +} + } // namespace arrow::util From c0692c17de39b4e23c15564089c8c6887fb74423 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 16 Jun 2026 17:14:58 +0200 Subject: [PATCH 07/21] Extra check --- cpp/src/arrow/util/rle_bitmap_internal.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 3995fbdc038b..5a67899660fd 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -217,6 +217,12 @@ class BitPackedRunToBitMapDecoder data_ = run.raw_data_ptr(); values_count_ = run.values_count(); values_read_ = 0; + // This decoder bounds all of its reads by `values_count_` alone and does not + // carry `max_read_bytes`. Its memory safety therefore relies on the producer + // having sized the run to fit the backing buffer. Check that contract here, + // at the point it is relied upon (a negative max means "unbounded"). + ARROW_DCHECK(run.raw_data_max_size() < 0 || + bit_util::BytesForBits(values_count_) <= run.raw_data_max_size()); } /// Return the number of values that can be advanced. From fb0d18b4aaabe92e0c99f2ce6845ae403ea449fa Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 16 Jun 2026 17:20:46 +0200 Subject: [PATCH 08/21] Extra doc --- cpp/src/arrow/util/rle_bitmap_internal.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 5a67899660fd..19ceada2a4ee 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -27,6 +27,7 @@ namespace arrow::util { +/// A lightweight view over a bitmap. template class BitmapSpan { public: @@ -38,10 +39,13 @@ class BitmapSpan { Normalize(); } + /// Pointer to the byte where the first value is stored. constexpr byte_type* data() const noexcept { return data_; } + /// Bit offset of the first value in the first byte. constexpr size_type bit_start() const noexcept { return bit_start_; } + /// Return a new span starting at the given position. constexpr BitmapSpan NewStartingAt(size_type bit_start) const noexcept { auto out = *this; out.bit_start_ += bit_start; @@ -217,10 +221,6 @@ class BitPackedRunToBitMapDecoder data_ = run.raw_data_ptr(); values_count_ = run.values_count(); values_read_ = 0; - // This decoder bounds all of its reads by `values_count_` alone and does not - // carry `max_read_bytes`. Its memory safety therefore relies on the producer - // having sized the run to fit the backing buffer. Check that contract here, - // at the point it is relied upon (a negative max means "unbounded"). ARROW_DCHECK(run.raw_data_max_size() < 0 || bit_util::BytesForBits(values_count_) <= run.raw_data_max_size()); } From 347c2ec0a9c74df735e9248ef2ed58d32194c99e Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 16 Jun 2026 17:29:55 +0200 Subject: [PATCH 09/21] Comment style --- cpp/src/arrow/util/rle_bitmap_test.cc | 58 +++++++++++++-------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index e813d8911bed..7dad76966ba2 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -30,8 +30,8 @@ namespace arrow::util { namespace { -// Expand a list of bytes into the bit-packed (LSB-first) sequence of booleans -// they encode, keeping only the first `count` bits. +/// Expand a list of bytes into the bit-packed (LSB-first) sequence of booleans +/// they encode, keeping only the first `count` bits. std::vector BitsFromBytes(const std::vector& bytes, rle_size_t count) { std::vector bits(count); for (rle_size_t i = 0; i < count; ++i) { @@ -40,18 +40,18 @@ std::vector BitsFromBytes(const std::vector& bytes, rle_size_t co return bits; } -// Skip the first `skip` values with Advance(), then decode the rest of the run -// into a contiguous output bitmap, in successive calls of at most `chunk` values -// each, and compare the result against `expected`. -// -// This drives both decoder types through their byte-aligned and bit-unaligned -// code paths depending on `chunk`: as soon as `chunk` is not a multiple of 8, -// later calls land on a non-zero output bit offset. -// -// A non-zero `skip` additionally desynchronizes the decoder's read offset from -// the (byte-aligned) output offset, which exercises the bit-unaligned -// (GetBatchMisaligned) path of BitPackedRunToBitMapDecoder. With `skip == 0` the -// read and output offsets stay in lockstep and only the aligned path is used. +/// Skip the first `skip` values with Advance(), then decode the rest of the run +/// into a contiguous output bitmap, in successive calls of at most `chunk` values +/// each, and compare the result against `expected`. +/// +/// This drives both decoder types through their byte-aligned and bit-unaligned +/// code paths depending on `chunk`: as soon as `chunk` is not a multiple of 8, +/// later calls land on a non-zero output bit offset. +/// +/// A non-zero `skip` additionally desynchronizes the decoder's read offset from +/// the (byte-aligned) output offset, which exercises the bit-unaligned +/// (GetBatchMisaligned) path of BitPackedRunToBitMapDecoder. With `skip == 0` the +/// read and output offsets stay in lockstep and only the aligned path is used. template void CheckChunkedDecode(const typename Decoder::RunType& run, const std::vector& expected, rle_size_t chunk, @@ -87,8 +87,8 @@ void CheckChunkedDecode(const typename Decoder::RunType& run, } } -// The full battery of checks shared by both decoder types. `expected` is the -// full sequence of booleans the run is supposed to decode to. +/// The full battery of checks shared by both decoder types. `expected` is the +/// full sequence of booleans the run is supposed to decode to. template void CheckBitMapDecoder(const typename Decoder::RunType& run, const std::vector& expected) { @@ -294,7 +294,7 @@ INSTANTIATE_TEST_SUITE_P( namespace { -// Append the LEB128 (unsigned, little-endian base-128) encoding of `value`. +/// Append the LEB128 (unsigned, little-endian base-128) encoding of `value`. void AppendLeb128(std::vector& out, uint32_t value) { uint8_t buf[bit_util::kMaxLEB128ByteLenFor]; const auto n_vals = @@ -303,8 +303,8 @@ void AppendLeb128(std::vector& out, uint32_t value) { out.insert(out.end(), buf, buf + n_vals); } -// Append an RLE run of `count` copies of `value` (1-bit values) to the encoded -// `bytes` stream and to the `expected` decoded sequence. +/// Append an RLE run of `count` copies of `value` (1-bit values) to the encoded +/// `bytes` stream and to the `expected` decoded sequence. void AppendRleRun(std::vector& bytes, std::vector& expected, bool value, rle_size_t count) { AppendLeb128(bytes, static_cast(count) << 1); // low bit 0 => RLE @@ -312,8 +312,8 @@ void AppendRleRun(std::vector& bytes, std::vector& expected, bool expected.insert(expected.end(), count, value); } -// Append a bit-packed run holding `packed.size()` groups of 8 (1-bit) values, -// LSB first, to the encoded `bytes` stream and to the `expected` sequence. +/// Append a bit-packed run holding `packed.size()` groups of 8 (1-bit) values, +/// LSB first, to the encoded `bytes` stream and to the `expected` sequence. void AppendBitPackedRun(std::vector& bytes, std::vector& expected, const std::vector& packed) { const auto groups = static_cast(packed.size()); @@ -324,13 +324,13 @@ void AppendBitPackedRun(std::vector& bytes, std::vector& expected } } -// Decode the whole `bytes` stream into a bitmap starting at bit offset -// `out_offset`, in successive GetBatch calls of at most `chunk` values, and -// compare against `expected`. -// -// Decoding in small, byte-unaligned chunks and (with a non-zero `out_offset`) -// at a non-aligned output position exercises how the decoder threads its output -// bit offset across runs that do not end on a byte boundary. +/// Decode the whole `bytes` stream into a bitmap starting at bit offset +/// `out_offset`, in successive GetBatch calls of at most `chunk` values, and +/// compare against `expected`. +/// +/// Decoding in small, byte-unaligned chunks and (with a non-zero `out_offset`) +/// at a non-aligned output position exercises how the decoder threads its output +/// bit offset across runs that do not end on a byte boundary. void CheckRleBitPackedDecode(const std::vector& bytes, const std::vector& expected, rle_size_t chunk, rle_size_t out_offset = 0) { @@ -375,7 +375,7 @@ void CheckRleBitPackedDecode(const std::vector& bytes, } } -// Run the decode check over a battery of chunk sizes and output offsets. +/// Run the decode check over a battery of chunk sizes and output offsets. void CheckRleBitPackedToBitMap(const std::vector& bytes, const std::vector& expected) { const auto n = static_cast(expected.size()); From 0008ac89a13c5cdf4d23b4bf6e9949849fa61fa4 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 17 Jun 2026 18:09:26 +0200 Subject: [PATCH 10/21] Test layout improvements --- cpp/src/arrow/util/rle_bitmap_test.cc | 294 ++++++++++++-------------- 1 file changed, 140 insertions(+), 154 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index 7dad76966ba2..564c6c455f1b 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -30,8 +31,7 @@ namespace arrow::util { namespace { -/// Expand a list of bytes into the bit-packed (LSB-first) sequence of booleans -/// they encode, keeping only the first `count` bits. +/// Read the first `count` bits of `bytes` (LSB first) into a vector of booleans. std::vector BitsFromBytes(const std::vector& bytes, rle_size_t count) { std::vector bits(count); for (rle_size_t i = 0; i < count; ++i) { @@ -40,28 +40,42 @@ std::vector BitsFromBytes(const std::vector& bytes, rle_size_t co return bits; } +/// Check the decoded output in `out` against `expected`. +/// Bits `out[out_offset..out_offset + count]` must equal `expected[skip..skip + count]`. +/// The `out_offset` bits before them must still be zero. +void CheckDecodedBits(const std::vector& out, const std::vector& expected, + rle_size_t count, rle_size_t out_offset = 0, rle_size_t skip = 0) { + ARROW_SCOPED_TRACE("out_offset = ", out_offset, ", skip = ", skip); + for (rle_size_t i = 0; i < out_offset; ++i) { + EXPECT_FALSE(bit_util::GetBit(out.data(), i)) << "clobbered bit " << i; + } + for (rle_size_t i = 0; i < count; ++i) { + EXPECT_EQ(bit_util::GetBit(out.data(), out_offset + i), expected[skip + i]) + << "at bit " << i; + } +} + /// Skip the first `skip` values with Advance(), then decode the rest of the run -/// into a contiguous output bitmap, in successive calls of at most `chunk` values -/// each, and compare the result against `expected`. +/// into one output bitmap, `chunk` values at a time. Compare against `expected`. /// -/// This drives both decoder types through their byte-aligned and bit-unaligned -/// code paths depending on `chunk`: as soon as `chunk` is not a multiple of 8, -/// later calls land on a non-zero output bit offset. +/// `chunk` controls output bit alignment. When `chunk` is not a multiple of 8, +/// later calls start at a non-zero output bit offset. /// -/// A non-zero `skip` additionally desynchronizes the decoder's read offset from -/// the (byte-aligned) output offset, which exercises the bit-unaligned -/// (GetBatchMisaligned) path of BitPackedRunToBitMapDecoder. With `skip == 0` the -/// read and output offsets stay in lockstep and only the aligned path is used. +/// `skip` shifts the decoder's read offset relative to the output offset. +/// A non-zero `skip` makes the two differ, which exercises the bit-unaligned read +/// path of BitPackedRunToBitMapDecoder. With `skip == 0` they stay in sync and +/// only the aligned path runs. template void CheckChunkedDecode(const typename Decoder::RunType& run, - const std::vector& expected, rle_size_t chunk, + const std::vector& expected, rle_size_t chunk = 1, rle_size_t skip = 0) { ARROW_SCOPED_TRACE("chunk = ", chunk, ", skip = ", skip); const auto n_vals = static_cast(expected.size()); ASSERT_LE(skip, n_vals); Decoder decoder(run); - ASSERT_EQ(decoder.Advance(skip), skip); + const auto advanced = decoder.Advance(skip); + ASSERT_EQ(advanced, skip); const auto rest = n_vals - skip; // Output buffer with one guard byte to catch out-of-bounds writes. @@ -82,20 +96,16 @@ void CheckChunkedDecode(const typename Decoder::RunType& run, EXPECT_EQ(decoder.remaining(), 0); EXPECT_EQ(out.back(), guard) << "decoder wrote past the end of the output"; - for (rle_size_t i = 0; i < rest; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[skip + i]) << "at bit " << i; - } + CheckDecodedBits(out, expected, /*count=*/rest, /*out_offset=*/0, skip); } -/// The full battery of checks shared by both decoder types. `expected` is the -/// full sequence of booleans the run is supposed to decode to. +/// All the checks shared by both decoder types. +/// +/// `expected` is the full sequence of booleans the run should decode to. template void CheckBitMapDecoder(const typename Decoder::RunType& run, const std::vector& expected) { const auto n_vals = static_cast(expected.size()); - // The sub-checks below (Advance, single Get, several chunk sizes) assume a - // run with a handful of values to exercise. - ASSERT_GE(n_vals, 9); // remaining() reflects the run size before any value is read. { @@ -107,18 +117,21 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, { Decoder decoder(run); uint8_t out = 0; - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&out), /*batch_size=*/0), 0); + const auto got = decoder.GetBatch(BitmapSpanMut(&out), /*batch_size=*/0); + EXPECT_EQ(got, 0); EXPECT_EQ(decoder.remaining(), n_vals); } - // Decode the whole run with various chunk sizes: aligned (8), unaligned - // (1, 3, 7, 9) and all-at-once. + // Decode the whole run in several chunks. for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, rle_size_t{8}, rle_size_t{9}, n_vals}) { CheckChunkedDecode(run, expected, chunk); - // Same, but skipping the first 1..7 values first so the decoder's read - // offset no longer matches the byte-aligned output (driving the - // bit-unaligned path for decoders that have one). + } + + // Decode the whole run in several chunks, after an initial Advance that shifts + // the run and output bit alignment. + for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, + rle_size_t{8}, rle_size_t{9}, n_vals}) { for (rle_size_t skip = 1; skip < 8 && skip < n_vals; ++skip) { CheckChunkedDecode(run, expected, chunk, skip); } @@ -129,65 +142,41 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, Decoder decoder(run); std::vector out(static_cast(bit_util::BytesForBits(n_vals)) + 1, 0); for (rle_size_t i = 0; i < n_vals; ++i) { - EXPECT_TRUE(decoder.Get(BitmapSpanMut(out.data(), /*bit_start=*/i))); + const bool ok = decoder.Get(BitmapSpanMut(out.data(), /*bit_start=*/i)); + EXPECT_TRUE(ok); EXPECT_EQ(decoder.remaining(), n_vals - i - 1); } // Exhausted: nothing more can be read or advanced. - EXPECT_FALSE(decoder.Get(BitmapSpanMut(out.data()))); - EXPECT_EQ(decoder.Advance(1), 0); - EXPECT_EQ(decoder.remaining(), 0); - for (rle_size_t i = 0; i < n_vals; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; - } - } - - // Advance() skips values; the remainder still decodes correctly and - // contiguously from the skipped position. - { - Decoder decoder(run); - const rle_size_t skip = 5; - EXPECT_EQ(decoder.Advance(skip), skip); - EXPECT_EQ(decoder.remaining(), n_vals - skip); - - std::vector out(static_cast(bit_util::BytesForBits(n_vals)) + 1, 0); - rle_size_t read = skip; - while (read < n_vals) { - const auto got = - decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/read), n_vals - read); - ASSERT_GT(got, 0); // break on failure - read += got; - } + const bool ok = decoder.Get(BitmapSpanMut(out.data())); + EXPECT_FALSE(ok); + const auto advanced = decoder.Advance(1); + EXPECT_EQ(advanced, 0); EXPECT_EQ(decoder.remaining(), 0); - // Bits before the skipped position must be left untouched (still zero). - for (rle_size_t i = 0; i < skip; ++i) { - EXPECT_FALSE(bit_util::GetBit(out.data(), i)) << "clobbered bit " << i; - } - for (rle_size_t i = skip; i < n_vals; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; - } + CheckDecodedBits(out, expected, /*count=*/n_vals); } // Advancing more than available stops at the run boundary. { Decoder decoder(run); - EXPECT_EQ(decoder.Advance(n_vals + 100), n_vals); + const auto advanced = decoder.Advance(n_vals + 100); + EXPECT_EQ(advanced, n_vals); EXPECT_EQ(decoder.remaining(), 0); } // Reset() rewinds the decoder so the run can be decoded again. { Decoder decoder(run); - std::vector scratch(static_cast(bit_util::BytesForBits(n_vals)), 0); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(scratch.data()), n_vals), n_vals); + std::vector out_1(static_cast(bit_util::BytesForBits(n_vals)), 0); + const auto scratch_got = decoder.GetBatch(BitmapSpanMut(out_1.data()), n_vals); + EXPECT_EQ(scratch_got, n_vals); EXPECT_EQ(decoder.remaining(), 0); decoder.Reset(run); EXPECT_EQ(decoder.remaining(), n_vals); - std::vector out(static_cast(bit_util::BytesForBits(n_vals)), 0); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n_vals), n_vals); - for (rle_size_t i = 0; i < n_vals; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; - } + std::vector out_2(static_cast(bit_util::BytesForBits(n_vals)), 0); + const auto got = decoder.GetBatch(BitmapSpanMut(out_2.data()), n_vals); + EXPECT_EQ(got, n_vals); + CheckDecodedBits(out_2, expected, /*count=*/n_vals); } } @@ -223,20 +212,26 @@ TEST_P(RleRunToBitMapDecoderTest, Decode) { CheckBitMapDecoder(run, expected); } -INSTANTIATE_TEST_SUITE_P(RleBitmap, RleRunToBitMapDecoderTest, - ::testing::Values(RleBitMapCase{/*value=*/false, /*count=*/9}, - RleBitMapCase{/*value=*/true, /*count=*/9}, - RleBitMapCase{/*value=*/false, /*count=*/13}, - RleBitMapCase{/*value=*/true, /*count=*/13}, - RleBitMapCase{/*value=*/false, /*count=*/64}, - RleBitMapCase{/*value=*/true, /*count=*/64}, - RleBitMapCase{/*value=*/false, /*count=*/100}, - RleBitMapCase{/*value=*/true, /*count=*/100}, - RleBitMapCase{/*value=*/true, /*count=*/1000}), - [](const ::testing::TestParamInfo& info) { - return std::string(info.param.value ? "true_" : "false_") + - std::to_string(info.param.count); - }); +INSTANTIATE_TEST_SUITE_P( // + RleBitmap, RleRunToBitMapDecoderTest, + ::testing::Values( // + RleBitMapCase{.value = false, .count = 0}, + RleBitMapCase{.value = true, .count = 1}, + RleBitMapCase{.value = false, .count = 3}, + RleBitMapCase{.value = true, .count = 8}, + RleBitMapCase{.value = false, .count = 9}, + RleBitMapCase{.value = true, .count = 9}, + RleBitMapCase{.value = false, .count = 13}, + RleBitMapCase{.value = true, .count = 13}, + RleBitMapCase{.value = false, .count = 64}, + RleBitMapCase{.value = true, .count = 64}, + RleBitMapCase{.value = false, .count = 100}, + RleBitMapCase{.value = true, .count = 100}, + RleBitMapCase{.value = true, .count = 1000}), + [](const ::testing::TestParamInfo& info) { + return std::string(info.param.value ? "true_" : "false_") + + std::to_string(info.param.count); + }); /********************************* * BitPackedRunToBitMapDecoder * @@ -264,26 +259,26 @@ TEST_P(BitPackedRunToBitMapDecoderTest, Decode) { CheckBitMapDecoder(run, expected); } -INSTANTIATE_TEST_SUITE_P( +INSTANTIATE_TEST_SUITE_P( // RleBitmap, BitPackedRunToBitMapDecoderTest, - ::testing::Values(BitPackedBitMapCase{/*name=*/"alternating", - /*bytes=*/{0b10101010, 0b10101010}, - /*count=*/13}, - BitPackedBitMapCase{/*name=*/"all_zeros", - /*bytes=*/{0x00, 0x00}, - /*count=*/16}, - BitPackedBitMapCase{/*name=*/"all_ones", - /*bytes=*/{0xFF, 0xFF}, - /*count=*/16}, - BitPackedBitMapCase{/*name=*/"mixed", - /*bytes=*/{0b11001010, 0b00001111, 0b10110001}, - /*count=*/24}, - BitPackedBitMapCase{/*name=*/"unaligned_count", - /*bytes=*/{0b00110101, 0b11100100}, - /*count=*/11}, - BitPackedBitMapCase{/*name=*/"large", - /*bytes=*/std::vector(16, 0b01101001), - /*count=*/128}), + ::testing::Values( // + BitPackedBitMapCase{.name = "empty", .bytes = {0b10110010}, .count = 0}, + BitPackedBitMapCase{.name = "single", .bytes = {0b00000001}, .count = 1}, + BitPackedBitMapCase{.name = "three", .bytes = {0b00000101}, .count = 3}, + BitPackedBitMapCase{.name = "eight", .bytes = {0b11010010}, .count = 8}, + BitPackedBitMapCase{ + .name = "alternating", .bytes = {0b10101010, 0b10101010}, .count = 13}, + BitPackedBitMapCase{.name = "all_zeros", .bytes = {0x00, 0x00}, .count = 16}, + BitPackedBitMapCase{.name = "all_ones", .bytes = {0xFF, 0xFF}, .count = 16}, + BitPackedBitMapCase{ + .name = "mixed", .bytes = {0b11001010, 0b00001111, 0b10110001}, .count = 24}, + BitPackedBitMapCase{ + .name = "unaligned_count", .bytes = {0b00110101, 0b11100100}, .count = 11}, + BitPackedBitMapCase{ + .name = "large", + .bytes = std::vector(16, 0b01101001), + .count = 128, + }), [](const ::testing::TestParamInfo& info) { return info.param.name; }); @@ -296,15 +291,13 @@ namespace { /// Append the LEB128 (unsigned, little-endian base-128) encoding of `value`. void AppendLeb128(std::vector& out, uint32_t value) { - uint8_t buf[bit_util::kMaxLEB128ByteLenFor]; - const auto n_vals = - bit_util::WriteLEB128(value, buf, static_cast(sizeof(buf))); - ASSERT_GT(n_vals, 0); - out.insert(out.end(), buf, buf + n_vals); + std::array> buf; + const auto n_bytes = + bit_util::WriteLEB128(value, buf.data(), static_cast(buf.size())); + ASSERT_GT(n_bytes, 0); + out.insert(out.end(), buf.data(), buf.data() + n_bytes); } -/// Append an RLE run of `count` copies of `value` (1-bit values) to the encoded -/// `bytes` stream and to the `expected` decoded sequence. void AppendRleRun(std::vector& bytes, std::vector& expected, bool value, rle_size_t count) { AppendLeb128(bytes, static_cast(count) << 1); // low bit 0 => RLE @@ -312,8 +305,6 @@ void AppendRleRun(std::vector& bytes, std::vector& expected, bool expected.insert(expected.end(), count, value); } -/// Append a bit-packed run holding `packed.size()` groups of 8 (1-bit) values, -/// LSB first, to the encoded `bytes` stream and to the `expected` sequence. void AppendBitPackedRun(std::vector& bytes, std::vector& expected, const std::vector& packed) { const auto groups = static_cast(packed.size()); @@ -324,13 +315,11 @@ void AppendBitPackedRun(std::vector& bytes, std::vector& expected } } -/// Decode the whole `bytes` stream into a bitmap starting at bit offset -/// `out_offset`, in successive GetBatch calls of at most `chunk` values, and -/// compare against `expected`. +/// Decode the whole `bytes` into a bitmap and check it against `expected`. /// -/// Decoding in small, byte-unaligned chunks and (with a non-zero `out_offset`) -/// at a non-aligned output position exercises how the decoder threads its output -/// bit offset across runs that do not end on a byte boundary. +/// Decode `chunk` values per GetBatch call to check the decoder state between +/// calls. The output starts at bit offset `out_offset`. A non-zero offset makes +/// the output and the encoded `bytes` use different bit alignment. void CheckRleBitPackedDecode(const std::vector& bytes, const std::vector& expected, rle_size_t chunk, rle_size_t out_offset = 0) { @@ -361,27 +350,21 @@ void CheckRleBitPackedDecode(const std::vector& bytes, EXPECT_TRUE(decoder.exhausted()); // Reading past the end yields nothing and leaves the decoder exhausted. uint8_t scratch = 0; - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&scratch), 8), 0); + const auto past_end = decoder.GetBatch(BitmapSpanMut(&scratch), 8); + EXPECT_EQ(past_end, 0); EXPECT_TRUE(decoder.exhausted()); EXPECT_EQ(out.back(), guard) << "decoder wrote past the end of the output"; - // Bits before the output offset must be left untouched (still zero). - for (rle_size_t i = 0; i < out_offset; ++i) { - EXPECT_FALSE(bit_util::GetBit(out.data(), i)) << "clobbered bit " << i; - } - for (rle_size_t i = 0; i < n_vals; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), out_offset + i), expected[i]) - << "at bit " << i; - } + CheckDecodedBits(out, expected, /*count=*/n_vals, out_offset); } /// Run the decode check over a battery of chunk sizes and output offsets. void CheckRleBitPackedToBitMap(const std::vector& bytes, const std::vector& expected) { - const auto n = static_cast(expected.size()); - ASSERT_GT(n, 0); + const auto n_vals = static_cast(expected.size()); + ASSERT_GT(n_vals, 0); for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, - rle_size_t{8}, rle_size_t{9}, rle_size_t{33}, n}) { + rle_size_t{8}, rle_size_t{9}, rle_size_t{33}, n_vals}) { CheckRleBitPackedDecode(bytes, expected, chunk); // A non-zero output offset forces the first run to start at a non-byte // aligned output position. @@ -398,12 +381,14 @@ TEST(RleBitPackedToBitMapDecoder, Empty) { RleBitPackedToBitMapDecoder decoder; EXPECT_TRUE(decoder.exhausted()); uint8_t out = 0; - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&out), 8), 0); + auto got = decoder.GetBatch(BitmapSpanMut(&out), 8); + EXPECT_EQ(got, 0); // So is one reset on an empty buffer. decoder.Reset(nullptr, 0); EXPECT_TRUE(decoder.exhausted()); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(&out), 8), 0); + got = decoder.GetBatch(BitmapSpanMut(&out), 8); + EXPECT_EQ(got, 0); } TEST(RleBitPackedToBitMapDecoder, SingleRleZeros) { @@ -459,18 +444,18 @@ TEST(RleBitPackedToBitMapDecoder, ReadPastEnd) { std::vector expected; AppendRleRun(bytes, expected, /*value=*/true, /*count=*/10); AppendBitPackedRun(bytes, expected, {0b10110010}); - const auto n = static_cast(expected.size()); + const auto n_vals = static_cast(expected.size()); RleBitPackedToBitMapDecoder decoder(bytes.data(), static_cast(bytes.size())); - std::vector out(static_cast(bit_util::BytesForBits(n)) + 1, 0); + std::vector out(static_cast(bit_util::BytesForBits(n_vals)) + 1, 0); // Requesting more values than available produces only the available ones. - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n + 100), n); + auto got = decoder.GetBatch(BitmapSpanMut(out.data()), n_vals + 100); + EXPECT_EQ(got, n_vals); EXPECT_TRUE(decoder.exhausted()); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), 10), 0); - for (rle_size_t i = 0; i < n; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), i), expected[i]) << "at bit " << i; - } + got = decoder.GetBatch(BitmapSpanMut(out.data()), 10); + EXPECT_EQ(got, 0); + CheckDecodedBits(out, expected, /*count=*/n_vals); } TEST(RleBitPackedToBitMapDecoder, Reset) { @@ -479,32 +464,34 @@ TEST(RleBitPackedToBitMapDecoder, Reset) { AppendRleRun(bytes, expected, /*value=*/true, /*count=*/13); AppendBitPackedRun(bytes, expected, {0b01101001, 0b10010110}); AppendRleRun(bytes, expected, /*value=*/false, /*count=*/20); - const auto n = static_cast(expected.size()); - const auto size = static_cast(bytes.size()); + const auto n_vals = static_cast(expected.size()); + const auto data_size = static_cast(bytes.size()); - RleBitPackedToBitMapDecoder decoder(bytes.data(), size); - std::vector out(static_cast(bit_util::BytesForBits(n)), 0); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), n), n); + RleBitPackedToBitMapDecoder decoder(bytes.data(), data_size); + std::vector out_1(static_cast(bit_util::BytesForBits(n_vals)), 0); + const auto got_1 = decoder.GetBatch(BitmapSpanMut(out_1.data()), n_vals); + EXPECT_EQ(got_1, n_vals); EXPECT_TRUE(decoder.exhausted()); // Reset rewinds the decoder so the same buffer decodes again. - decoder.Reset(bytes.data(), size); + decoder.Reset(bytes.data(), data_size); EXPECT_FALSE(decoder.exhausted()); - std::vector out2(static_cast(bit_util::BytesForBits(n)), 0); - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out2.data()), n), n); + std::vector out_2(static_cast(bit_util::BytesForBits(n_vals)), 0); + const auto got_2 = decoder.GetBatch(BitmapSpanMut(out_2.data()), n_vals); + EXPECT_EQ(got_2, n_vals); EXPECT_TRUE(decoder.exhausted()); - for (rle_size_t i = 0; i < n; ++i) { - EXPECT_EQ(bit_util::GetBit(out2.data(), i), expected[i]) << "at bit " << i; - } + CheckDecodedBits(out_2, expected, /*count=*/n_vals); } TEST(RleBitPackedToBitMapDecoder, Truncated) { - // GH-style malformed input: a bit-packed run declares more groups than the - // buffer holds. The decoder yields what it can and is not exhausted. + // Malformed input: a bit-packed run declares more values than the buffer + // holds. The decoder should return the values it can read and report that it + // is not exhausted, rather than crash or read out of bounds. std::vector bytes; std::vector expected; AppendRleRun(bytes, expected, /*value=*/true, /*count=*/10); - // Declare 4 groups (32 values) of bit-packed data but provide only 1 byte. + // The header declares 4 bytes (4 * 8 = 32 values) of bit-packed data, but only + // 1 byte follows. AppendLeb128(bytes, (4u << 1) | 1); bytes.push_back(0b10101010); @@ -512,11 +499,10 @@ TEST(RleBitPackedToBitMapDecoder, Truncated) { static_cast(bytes.size())); std::vector out(16, 0); // The RLE run decodes fully; the truncated bit-packed run cannot be parsed. - EXPECT_EQ(decoder.GetBatch(BitmapSpanMut(out.data()), 1000), 10); + const auto got = decoder.GetBatch(BitmapSpanMut(out.data()), 1000); + EXPECT_EQ(got, 10); EXPECT_FALSE(decoder.exhausted()); - for (rle_size_t i = 0; i < 10; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), i), true) << "at bit " << i; - } + CheckDecodedBits(out, expected, /*count=*/10); } } // namespace arrow::util From 163aa10598026a5db197966404ce9f88658c9485 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 18 Jun 2026 10:57:53 +0200 Subject: [PATCH 11/21] Uniform BitMap > Bitmap --- cpp/src/arrow/util/rle_bitmap_internal.h | 40 ++++---- cpp/src/arrow/util/rle_bitmap_test.cc | 124 +++++++++++------------ 2 files changed, 82 insertions(+), 82 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 19ceada2a4ee..9cb9502f2033 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -72,7 +72,7 @@ using BitmapSpanConst = BitmapSpan; namespace internal_rle { template -class RunToBitMapDecoderMixin { +class RunToBitmapDecoderMixin { public: /// Advance by as many values as provided or until exhaustion of the decoder. /// Return the number of values skipped. @@ -156,15 +156,15 @@ class RunToBitMapDecoderMixin { }; } // namespace internal_rle -class RleRunToBitMapDecoder - : public internal_rle::RunToBitMapDecoderMixin { +class RleRunToBitmapDecoder + : public internal_rle::RunToBitmapDecoderMixin { public: /// The type of run that can be decoded. using RunType = RleRun; - constexpr RleRunToBitMapDecoder() noexcept = default; + constexpr RleRunToBitmapDecoder() noexcept = default; - explicit RleRunToBitMapDecoder(const RunType& run) noexcept { Reset(run); } + explicit RleRunToBitmapDecoder(const RunType& run) noexcept { Reset(run); } void Reset(const RunType& run) noexcept { values_left_ = run.values_count(); @@ -182,7 +182,7 @@ class RleRunToBitMapDecoder constexpr bool value() const { return value_pattern_ != 0; } private: - friend class internal_rle::RunToBitMapDecoderMixin; + friend class internal_rle::RunToBitmapDecoderMixin; /// The byte pattern for 8 values (full ones or full zeros). uint8_t value_pattern_ = {}; @@ -207,15 +207,15 @@ class RleRunToBitMapDecoder } }; -class BitPackedRunToBitMapDecoder - : public internal_rle::RunToBitMapDecoderMixin { +class BitPackedRunToBitmapDecoder + : public internal_rle::RunToBitmapDecoderMixin { public: /// The type of run that can be decoded. using RunType = BitPackedRun; - constexpr BitPackedRunToBitMapDecoder() noexcept = default; + constexpr BitPackedRunToBitmapDecoder() noexcept = default; - explicit BitPackedRunToBitMapDecoder(const RunType& run) noexcept { Reset(run); } + explicit BitPackedRunToBitmapDecoder(const RunType& run) noexcept { Reset(run); } void Reset(const RunType& run) noexcept { data_ = run.raw_data_ptr(); @@ -241,8 +241,8 @@ class BitPackedRunToBitMapDecoder } private: - using Base = internal_rle::RunToBitMapDecoderMixin; - friend class internal_rle::RunToBitMapDecoderMixin; + using Base = internal_rle::RunToBitmapDecoderMixin; + friend class internal_rle::RunToBitmapDecoderMixin; /// The pointer to the beginning of the run const uint8_t* data_ = nullptr; @@ -334,14 +334,14 @@ class BitPackedRunToBitMapDecoder /// no repetition and no nesting), we know values to be decoded will end up in an /// Arrow validity bitmap. In such cases, decoding values to a ``int16`` before /// encoding them again in overly wasteful. -class RleBitPackedToBitMapDecoder { +class RleBitPackedToBitmapDecoder { public: - RleBitPackedToBitMapDecoder() noexcept = default; + RleBitPackedToBitmapDecoder() noexcept = default; /// Create a decoder object. /// /// data and data_size are the raw bytes to decode. - RleBitPackedToBitMapDecoder(const uint8_t* data, rle_size_t data_size) noexcept { + RleBitPackedToBitmapDecoder(const uint8_t* data, rle_size_t data_size) noexcept { Reset(data, data_size); } @@ -364,17 +364,17 @@ class RleBitPackedToBitMapDecoder { struct get_decoder; template <> struct get_decoder { - using type = RleRunToBitMapDecoder; + using type = RleRunToBitmapDecoder; }; template <> struct get_decoder { - using type = BitPackedRunToBitMapDecoder; + using type = BitPackedRunToBitmapDecoder; }; template using get_decoder_t = get_decoder::type; RleBitPackedParser parser_ = {}; - std::variant decoder_ = {}; + std::variant decoder_ = {}; /// Return the number of values that are remaining in the current run. rle_size_t run_remaining() const { @@ -388,10 +388,10 @@ class RleBitPackedToBitMapDecoder { }; /************************************************ - * RleBitPackedToBitMapDecoder implementation * + * RleBitPackedToBitmapDecoder implementation * ************************************************/ -inline auto RleBitPackedToBitMapDecoder::GetBatch(BitmapSpanMut out, +inline auto RleBitPackedToBitmapDecoder::GetBatch(BitmapSpanMut out, rle_size_t batch_size) -> rle_size_t { using ControlFlow = RleBitPackedParser::ControlFlow; diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index 564c6c455f1b..7c97b3f6bce9 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -63,7 +63,7 @@ void CheckDecodedBits(const std::vector& out, const std::vector& /// /// `skip` shifts the decoder's read offset relative to the output offset. /// A non-zero `skip` makes the two differ, which exercises the bit-unaligned read -/// path of BitPackedRunToBitMapDecoder. With `skip == 0` they stay in sync and +/// path of BitPackedRunToBitmapDecoder. With `skip == 0` they stay in sync and /// only the aligned path runs. template void CheckChunkedDecode(const typename Decoder::RunType& run, @@ -103,7 +103,7 @@ void CheckChunkedDecode(const typename Decoder::RunType& run, /// /// `expected` is the full sequence of booleans the run should decode to. template -void CheckBitMapDecoder(const typename Decoder::RunType& run, +void CheckBitmapDecoder(const typename Decoder::RunType& run, const std::vector& expected) { const auto n_vals = static_cast(expected.size()); @@ -183,19 +183,19 @@ void CheckBitMapDecoder(const typename Decoder::RunType& run, } // namespace /*************************** - * RleRunToBitMapDecoder * + * RleRunToBitmapDecoder * ***************************/ -struct RleBitMapCase { +struct RleBitmapCase { // The repeated boolean value of the run. bool value; // The number of values in the run. rle_size_t count; }; -class RleRunToBitMapDecoderTest : public ::testing::TestWithParam {}; +class RleRunToBitmapDecoderTest : public ::testing::TestWithParam {}; -TEST_P(RleRunToBitMapDecoderTest, Decode) { +TEST_P(RleRunToBitmapDecoderTest, Decode) { const auto& param = GetParam(); // A boolean RLE run stores its value in a single (1-bit-wide) byte. @@ -204,40 +204,40 @@ TEST_P(RleRunToBitMapDecoderTest, Decode) { // value() reports the repeated boolean. { - RleRunToBitMapDecoder decoder(run); + RleRunToBitmapDecoder decoder(run); EXPECT_EQ(decoder.value(), param.value); } const std::vector expected(param.count, param.value); - CheckBitMapDecoder(run, expected); + CheckBitmapDecoder(run, expected); } INSTANTIATE_TEST_SUITE_P( // - RleBitmap, RleRunToBitMapDecoderTest, + RleBitmap, RleRunToBitmapDecoderTest, ::testing::Values( // - RleBitMapCase{.value = false, .count = 0}, - RleBitMapCase{.value = true, .count = 1}, - RleBitMapCase{.value = false, .count = 3}, - RleBitMapCase{.value = true, .count = 8}, - RleBitMapCase{.value = false, .count = 9}, - RleBitMapCase{.value = true, .count = 9}, - RleBitMapCase{.value = false, .count = 13}, - RleBitMapCase{.value = true, .count = 13}, - RleBitMapCase{.value = false, .count = 64}, - RleBitMapCase{.value = true, .count = 64}, - RleBitMapCase{.value = false, .count = 100}, - RleBitMapCase{.value = true, .count = 100}, - RleBitMapCase{.value = true, .count = 1000}), - [](const ::testing::TestParamInfo& info) { + RleBitmapCase{.value = false, .count = 0}, + RleBitmapCase{.value = true, .count = 1}, + RleBitmapCase{.value = false, .count = 3}, + RleBitmapCase{.value = true, .count = 8}, + RleBitmapCase{.value = false, .count = 9}, + RleBitmapCase{.value = true, .count = 9}, + RleBitmapCase{.value = false, .count = 13}, + RleBitmapCase{.value = true, .count = 13}, + RleBitmapCase{.value = false, .count = 64}, + RleBitmapCase{.value = true, .count = 64}, + RleBitmapCase{.value = false, .count = 100}, + RleBitmapCase{.value = true, .count = 100}, + RleBitmapCase{.value = true, .count = 1000}), + [](const ::testing::TestParamInfo& info) { return std::string(info.param.value ? "true_" : "false_") + std::to_string(info.param.count); }); /********************************* - * BitPackedRunToBitMapDecoder * + * BitPackedRunToBitmapDecoder * *********************************/ -struct BitPackedBitMapCase { +struct BitPackedBitmapCase { std::string name; // The raw bit-packed bytes (LSB first). Must hold at least `count` bits. std::vector bytes; @@ -245,10 +245,10 @@ struct BitPackedBitMapCase { rle_size_t count; }; -class BitPackedRunToBitMapDecoderTest - : public ::testing::TestWithParam {}; +class BitPackedRunToBitmapDecoderTest + : public ::testing::TestWithParam {}; -TEST_P(BitPackedRunToBitMapDecoderTest, Decode) { +TEST_P(BitPackedRunToBitmapDecoderTest, Decode) { const auto& param = GetParam(); ASSERT_GE(param.bytes.size(), static_cast(bit_util::BytesForBits(param.count))); @@ -256,35 +256,35 @@ TEST_P(BitPackedRunToBitMapDecoderTest, Decode) { /*max_read_bytes=*/-1); const std::vector expected = BitsFromBytes(param.bytes, param.count); - CheckBitMapDecoder(run, expected); + CheckBitmapDecoder(run, expected); } INSTANTIATE_TEST_SUITE_P( // - RleBitmap, BitPackedRunToBitMapDecoderTest, + RleBitmap, BitPackedRunToBitmapDecoderTest, ::testing::Values( // - BitPackedBitMapCase{.name = "empty", .bytes = {0b10110010}, .count = 0}, - BitPackedBitMapCase{.name = "single", .bytes = {0b00000001}, .count = 1}, - BitPackedBitMapCase{.name = "three", .bytes = {0b00000101}, .count = 3}, - BitPackedBitMapCase{.name = "eight", .bytes = {0b11010010}, .count = 8}, - BitPackedBitMapCase{ + BitPackedBitmapCase{.name = "empty", .bytes = {0b10110010}, .count = 0}, + BitPackedBitmapCase{.name = "single", .bytes = {0b00000001}, .count = 1}, + BitPackedBitmapCase{.name = "three", .bytes = {0b00000101}, .count = 3}, + BitPackedBitmapCase{.name = "eight", .bytes = {0b11010010}, .count = 8}, + BitPackedBitmapCase{ .name = "alternating", .bytes = {0b10101010, 0b10101010}, .count = 13}, - BitPackedBitMapCase{.name = "all_zeros", .bytes = {0x00, 0x00}, .count = 16}, - BitPackedBitMapCase{.name = "all_ones", .bytes = {0xFF, 0xFF}, .count = 16}, - BitPackedBitMapCase{ + BitPackedBitmapCase{.name = "all_zeros", .bytes = {0x00, 0x00}, .count = 16}, + BitPackedBitmapCase{.name = "all_ones", .bytes = {0xFF, 0xFF}, .count = 16}, + BitPackedBitmapCase{ .name = "mixed", .bytes = {0b11001010, 0b00001111, 0b10110001}, .count = 24}, - BitPackedBitMapCase{ + BitPackedBitmapCase{ .name = "unaligned_count", .bytes = {0b00110101, 0b11100100}, .count = 11}, - BitPackedBitMapCase{ + BitPackedBitmapCase{ .name = "large", .bytes = std::vector(16, 0b01101001), .count = 128, }), - [](const ::testing::TestParamInfo& info) { + [](const ::testing::TestParamInfo& info) { return info.param.name; }); /********************************* - * RleBitPackedToBitMapDecoder * + * RleBitPackedToBitmapDecoder * *********************************/ namespace { @@ -326,7 +326,7 @@ void CheckRleBitPackedDecode(const std::vector& bytes, ARROW_SCOPED_TRACE("chunk = ", chunk, ", out_offset = ", out_offset); const auto n_vals = static_cast(expected.size()); - RleBitPackedToBitMapDecoder decoder(bytes.data(), + RleBitPackedToBitmapDecoder decoder(bytes.data(), static_cast(bytes.size())); EXPECT_EQ(decoder.exhausted(), n_vals == 0); @@ -359,7 +359,7 @@ void CheckRleBitPackedDecode(const std::vector& bytes, } /// Run the decode check over a battery of chunk sizes and output offsets. -void CheckRleBitPackedToBitMap(const std::vector& bytes, +void CheckRleBitPackedToBitmap(const std::vector& bytes, const std::vector& expected) { const auto n_vals = static_cast(expected.size()); ASSERT_GT(n_vals, 0); @@ -376,9 +376,9 @@ void CheckRleBitPackedToBitMap(const std::vector& bytes, } // namespace -TEST(RleBitPackedToBitMapDecoder, Empty) { +TEST(RleBitPackedToBitmapDecoder, Empty) { // A default-constructed decoder is already exhausted. - RleBitPackedToBitMapDecoder decoder; + RleBitPackedToBitmapDecoder decoder; EXPECT_TRUE(decoder.exhausted()); uint8_t out = 0; auto got = decoder.GetBatch(BitmapSpanMut(&out), 8); @@ -391,28 +391,28 @@ TEST(RleBitPackedToBitMapDecoder, Empty) { EXPECT_EQ(got, 0); } -TEST(RleBitPackedToBitMapDecoder, SingleRleZeros) { +TEST(RleBitPackedToBitmapDecoder, SingleRleZeros) { std::vector bytes; std::vector expected; AppendRleRun(bytes, expected, /*value=*/false, /*count=*/100); - CheckRleBitPackedToBitMap(bytes, expected); + CheckRleBitPackedToBitmap(bytes, expected); } -TEST(RleBitPackedToBitMapDecoder, SingleRleOnes) { +TEST(RleBitPackedToBitmapDecoder, SingleRleOnes) { std::vector bytes; std::vector expected; AppendRleRun(bytes, expected, /*value=*/true, /*count=*/100); - CheckRleBitPackedToBitMap(bytes, expected); + CheckRleBitPackedToBitmap(bytes, expected); } -TEST(RleBitPackedToBitMapDecoder, SingleBitPacked) { +TEST(RleBitPackedToBitmapDecoder, SingleBitPacked) { std::vector bytes; std::vector expected; AppendBitPackedRun(bytes, expected, {0b10101010, 0b11001100, 0b11110000}); - CheckRleBitPackedToBitMap(bytes, expected); + CheckRleBitPackedToBitmap(bytes, expected); } -TEST(RleBitPackedToBitMapDecoder, MixedRunsAligned) { +TEST(RleBitPackedToBitmapDecoder, MixedRunsAligned) { // All runs end on a byte boundary, so each run starts byte-aligned in the // output. std::vector bytes; @@ -421,10 +421,10 @@ TEST(RleBitPackedToBitMapDecoder, MixedRunsAligned) { AppendBitPackedRun(bytes, expected, {0b10101010, 0b01010101}); AppendRleRun(bytes, expected, /*value=*/true, /*count=*/64); AppendBitPackedRun(bytes, expected, {0b00001111}); - CheckRleBitPackedToBitMap(bytes, expected); + CheckRleBitPackedToBitmap(bytes, expected); } -TEST(RleBitPackedToBitMapDecoder, MixedRunsUnaligned) { +TEST(RleBitPackedToBitmapDecoder, MixedRunsUnaligned) { // RLE runs with counts that are not multiples of 8 make each following run // start at a non-byte-aligned output position. std::vector bytes; @@ -436,17 +436,17 @@ TEST(RleBitPackedToBitMapDecoder, MixedRunsUnaligned) { AppendBitPackedRun(bytes, expected, {0b11110000}); AppendRleRun(bytes, expected, /*value=*/false, /*count=*/3); AppendBitPackedRun(bytes, expected, {0b10110001, 0b00011101}); - CheckRleBitPackedToBitMap(bytes, expected); + CheckRleBitPackedToBitmap(bytes, expected); } -TEST(RleBitPackedToBitMapDecoder, ReadPastEnd) { +TEST(RleBitPackedToBitmapDecoder, ReadPastEnd) { std::vector bytes; std::vector expected; AppendRleRun(bytes, expected, /*value=*/true, /*count=*/10); AppendBitPackedRun(bytes, expected, {0b10110010}); const auto n_vals = static_cast(expected.size()); - RleBitPackedToBitMapDecoder decoder(bytes.data(), + RleBitPackedToBitmapDecoder decoder(bytes.data(), static_cast(bytes.size())); std::vector out(static_cast(bit_util::BytesForBits(n_vals)) + 1, 0); // Requesting more values than available produces only the available ones. @@ -458,7 +458,7 @@ TEST(RleBitPackedToBitMapDecoder, ReadPastEnd) { CheckDecodedBits(out, expected, /*count=*/n_vals); } -TEST(RleBitPackedToBitMapDecoder, Reset) { +TEST(RleBitPackedToBitmapDecoder, Reset) { std::vector bytes; std::vector expected; AppendRleRun(bytes, expected, /*value=*/true, /*count=*/13); @@ -467,7 +467,7 @@ TEST(RleBitPackedToBitMapDecoder, Reset) { const auto n_vals = static_cast(expected.size()); const auto data_size = static_cast(bytes.size()); - RleBitPackedToBitMapDecoder decoder(bytes.data(), data_size); + RleBitPackedToBitmapDecoder decoder(bytes.data(), data_size); std::vector out_1(static_cast(bit_util::BytesForBits(n_vals)), 0); const auto got_1 = decoder.GetBatch(BitmapSpanMut(out_1.data()), n_vals); EXPECT_EQ(got_1, n_vals); @@ -483,7 +483,7 @@ TEST(RleBitPackedToBitMapDecoder, Reset) { CheckDecodedBits(out_2, expected, /*count=*/n_vals); } -TEST(RleBitPackedToBitMapDecoder, Truncated) { +TEST(RleBitPackedToBitmapDecoder, Truncated) { // Malformed input: a bit-packed run declares more values than the buffer // holds. The decoder should return the values it can read and report that it // is not exhausted, rather than crash or read out of bounds. @@ -495,7 +495,7 @@ TEST(RleBitPackedToBitMapDecoder, Truncated) { AppendLeb128(bytes, (4u << 1) | 1); bytes.push_back(0b10101010); - RleBitPackedToBitMapDecoder decoder(bytes.data(), + RleBitPackedToBitmapDecoder decoder(bytes.data(), static_cast(bytes.size())); std::vector out(16, 0); // The RLE run decodes fully; the truncated bit-packed run cannot be parsed. From d26dba095a40cf145cf355f33232dc3cbd13da1c Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 18 Jun 2026 12:03:24 +0200 Subject: [PATCH 12/21] Fix class specialization --- cpp/src/arrow/util/rle_bitmap_internal.h | 30 +++++++++---------- cpp/src/arrow/util/rle_encoding_internal.h | 34 +++++++++++----------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 9cb9502f2033..6aaca3117bb6 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -359,20 +359,6 @@ class RleBitPackedToBitmapDecoder { [[nodiscard]] rle_size_t GetBatch(BitmapSpanMut out, rle_size_t batch_size); private: - /// Utility to map a run type to the associate decoder. - template - struct get_decoder; - template <> - struct get_decoder { - using type = RleRunToBitmapDecoder; - }; - template <> - struct get_decoder { - using type = BitPackedRunToBitmapDecoder; - }; - template - using get_decoder_t = get_decoder::type; - RleBitPackedParser parser_ = {}; std::variant decoder_ = {}; @@ -391,6 +377,20 @@ class RleBitPackedToBitmapDecoder { * RleBitPackedToBitmapDecoder implementation * ************************************************/ +/// Utility to map a run type to the associate decoder. +template +struct RleBitPackedToBitmapDecoderGetDecoder; + +template <> +struct RleBitPackedToBitmapDecoderGetDecoder { + using type = RleRunToBitmapDecoder; +}; + +template <> +struct RleBitPackedToBitmapDecoderGetDecoder { + using type = BitPackedRunToBitmapDecoder; +}; + inline auto RleBitPackedToBitmapDecoder::GetBatch(BitmapSpanMut out, rle_size_t batch_size) -> rle_size_t { using ControlFlow = RleBitPackedParser::ControlFlow; @@ -410,7 +410,7 @@ inline auto RleBitPackedToBitmapDecoder::GetBatch(BitmapSpanMut out, } parser_.ParseWithCallable([&](auto run) { - using RunDecoder = get_decoder_t; + using RunDecoder = RleBitPackedToBitmapDecoderGetDecoder::type; ARROW_DCHECK_LT(values_read, batch_size); RunDecoder decoder(run); diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index 9a4d9b0aa8de..89ca416b1176 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -490,20 +490,6 @@ class RleBitPackedDecoder { rle_size_t null_count, const uint8_t* valid_bits, int64_t valid_bits_offset); private: - /// Utility to map a run type to the associate decoder. - template - struct get_decoder; - template <> - struct get_decoder { - using type = RleRunDecoder; - }; - template <> - struct get_decoder { - using type = BitPackedRunDecoder; - }; - template - using get_decoder_t = get_decoder::type; - RleBitPackedParser parser_ = {}; std::variant, BitPackedRunDecoder> decoder_ = {}; rle_size_t value_bit_width_; @@ -780,6 +766,20 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const * RleBitPackedDecoder * *************************/ +/// Utility to map a run type to the associate decoder. +template +struct RleBitPackedDecoderGetRunDecoder; + +template +struct RleBitPackedDecoderGetRunDecoder { + using type = RleRunDecoder; +}; + +template +struct RleBitPackedDecoderGetRunDecoder { + using type = BitPackedRunDecoder; +}; + template bool RleBitPackedDecoder::Get(value_type* val) { return GetBatch(val, 1) == 1; @@ -806,7 +806,7 @@ auto RleBitPackedDecoder::GetBatch(value_type* out, } parser_.ParseWithCallable([&](auto run) { - using RunDecoder = get_decoder_t; + using RunDecoder = RleBitPackedDecoderGetRunDecoder::type; ARROW_DCHECK_LT(values_read, batch_size); RunDecoder decoder(run, value_bit_width_); @@ -1119,7 +1119,7 @@ auto RleBitPackedDecoder::GetSpaced(Converter converter, } parser_.ParseWithCallable([&](auto run) { - using RunDecoder = get_decoder_t; + using RunDecoder = RleBitPackedDecoderGetRunDecoder::type; RunDecoder decoder(run, value_bit_width_); @@ -1301,7 +1301,7 @@ auto RleBitPackedDecoder::GetBatchWithDict(const V* dictionary, } parser_.ParseWithCallable([&](auto run) { - using RunDecoder = get_decoder_t; + using RunDecoder = RleBitPackedDecoderGetRunDecoder::type; RunDecoder decoder(run, value_bit_width_); From 363c7095310041a12cce6c3eae899f29491fab34 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 18 Jun 2026 14:02:41 +0200 Subject: [PATCH 13/21] Explicit little endian name --- cpp/src/arrow/util/rle_bitmap_internal.h | 2 +- cpp/src/arrow/util/rle_encoding_internal.h | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 6aaca3117bb6..03a3f0cbb9c9 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -168,7 +168,7 @@ class RleRunToBitmapDecoder void Reset(const RunType& run) noexcept { values_left_ = run.values_count(); - if (run.value() == 0) { + if (run.value_little_endian() == 0) { value_pattern_ = uint8_t{0}; } else { value_pattern_ = uint8_t{0xFF}; diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index 89ca416b1176..2457e57b5134 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -114,8 +114,11 @@ class RleRun { std::copy(data, data + raw_data_size(value_bit_width), data_.begin()); } - /// The repeated value in the run - uint64_t value() const noexcept { return SafeLoadAs(data_.data()); } + /// The repeated value in the run in little endian form (as stored in the buffer). + uint64_t value_little_endian() const noexcept { + // Underlying memcpy is required to avoid undefined behavior. + return SafeLoadAs(data_.data()); + } /// The number of repeated values in this run. constexpr rle_size_t values_count() const noexcept { return values_count_; } @@ -278,10 +281,8 @@ class RleRunDecoder { // if the bool value isn't 0 or 1. value_ = *run.raw_data_ptr() & 1; } else { - // Memcopy is required to avoid undefined behavior. - value_ = {}; - std::memcpy(&value_, run.raw_data_ptr(), run.raw_data_size(value_bit_width)); - value_ = ::arrow::bit_util::FromLittleEndian(value_); + value_ = static_cast( + ::arrow::bit_util::FromLittleEndian(run.value_little_endian())); } } From 116fc5b63f0c70dd1e92c660d7f0327464378c86 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 18 Jun 2026 14:03:48 +0200 Subject: [PATCH 14/21] Safe decoder on zero batch_size --- cpp/src/arrow/util/rle_bitmap_internal.h | 4 ++++ cpp/src/arrow/util/rle_encoding_internal.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 03a3f0cbb9c9..5375663f9630 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -395,6 +395,10 @@ inline auto RleBitPackedToBitmapDecoder::GetBatch(BitmapSpanMut out, rle_size_t batch_size) -> rle_size_t { using ControlFlow = RleBitPackedParser::ControlFlow; + if (ARROW_PREDICT_FALSE(batch_size == 0 || exhausted())) { + return 0; + } + rle_size_t values_read = 0; // Remaining from a previous call that would have left some unread data from a run. diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index 2457e57b5134..825cd253df9b 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -791,6 +791,10 @@ auto RleBitPackedDecoder::GetBatch(value_type* out, rle_size_t batch_size) -> rle_size_t { using ControlFlow = RleBitPackedParser::ControlFlow; + if (ARROW_PREDICT_FALSE(batch_size == 0 || exhausted())) { + return 0; + } + rle_size_t values_read = 0; // Remaining from a previous call that would have left some unread data from a run. From b5d742d2004a6e0ddd0ab5cc2c9e6c9044efd7b6 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 19 Jun 2026 15:05:21 +0200 Subject: [PATCH 15/21] Aply some review comment --- cpp/src/arrow/util/rle_bitmap_internal.h | 27 ++++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 5375663f9630..ed429f1ecbd8 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -101,24 +101,22 @@ class RunToBitmapDecoderMixin { return 0; } + rle_size_t n_vals = 0; + // HEADER: Writing inside the first byte if caller gives a non-aligned input - if (ARROW_PREDICT_FALSE(out_bit_offset != 0)) { - const auto n_vals = derived()->GetBatchFirstByte(out, batch_size); - // If we exhausted the values in this decoder, or we filled what was required, - // then the following recursive call will return 0. - // If in the opposite case, then we will continue on a byte-aligned boundary. - return n_vals + GetBatch(out.NewStartingAt(n_vals), batch_size - n_vals); + if (out_bit_offset != 0) { + n_vals = derived()->GetBatchFirstByte(out, batch_size); } // Writing full bytes - const auto n_vals = derived()->GetBatchFast(out, batch_size); + n_vals += + derived()->GetBatchFullBytes(out.NewStartingAt(n_vals), batch_size - n_vals); // TRAILER: Writing inside the last byte if caller asked for non multiple of 8 values const auto n_last_vals = std::min(batch_size - n_vals, derived()->remaining()); if (ARROW_PREDICT_FALSE(n_last_vals > 0)) { ARROW_DCHECK_LT(n_last_vals, 8); - out = out.NewStartingAt(n_vals); - return n_vals + derived()->GetBatchFirstByte(out, n_last_vals); + n_vals += derived()->GetBatchFirstByte(out.NewStartingAt(n_vals), n_last_vals); } return n_vals; @@ -197,8 +195,8 @@ class RleRunToBitmapDecoder } /// Get batch in full bytes using memset. - [[nodiscard]] rle_size_t GetBatchFast(BitmapSpanMut out, rle_size_t batch_size) { - ARROW_DCHECK_EQ(out.bit_start(), 0); + [[nodiscard]] rle_size_t GetBatchFullBytes(BitmapSpanMut out, rle_size_t batch_size) { + ARROW_DCHECK(out.bit_start() == 0 || batch_size == 0); const auto n_bytes = std::min(batch_size, remaining()) / 8; std::memset(out.data(), value_pattern_, n_bytes); const auto n_vals = 8 * n_bytes; @@ -265,8 +263,9 @@ class BitPackedRunToBitmapDecoder } /// Get batch in full bytes using memcpy. - [[nodiscard]] rle_size_t GetBatchFast(BitmapSpanMut out, rle_size_t batch_size) { - ARROW_DCHECK_EQ(out.bit_start(), 0); + [[nodiscard]] rle_size_t GetBatchFullBytes(BitmapSpanMut out, rle_size_t batch_size) { + ARROW_DCHECK(out.bit_start() == 0 || batch_size == 0); + ARROW_DCHECK(unread_values_bit_offset() == 0 || batch_size == 0); const auto n_bytes = std::min(batch_size, remaining()) / 8; std::memcpy(out.data(), unread_values_ptr(), n_bytes); const auto n_vals = 8 * n_bytes; @@ -297,7 +296,7 @@ class BitPackedRunToBitmapDecoder const rle_size_t to_read = std::min(batch_size, remaining()); rle_size_t read = 0; - // HEADER: copy bits one by one unit the output is byte-aligned + // HEADER: copy bits one by one until the output is byte-aligned const rle_size_t to_read_until_aligned = (8 - out_bit_offset) % 8; const rle_size_t to_read_header = std::min(to_read, to_read_until_aligned); const auto read_header = GetBatchSlow(out, to_read_header); From ab16793d2b70a3c34ac936a2193ed57afbb2ff16 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 19 Jun 2026 15:30:42 +0200 Subject: [PATCH 16/21] Use CopyBitmap --- cpp/src/arrow/util/bit_util.h | 16 -- cpp/src/arrow/util/bit_util_test.cc | 17 -- cpp/src/arrow/util/rle_bitmap_internal.h | 228 ++++++++--------------- 3 files changed, 79 insertions(+), 182 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 8f471a38eb92..1646e5633210 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -179,22 +179,6 @@ static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } -/// Read 32 bits starting at bit `offset` into a uint32. -/// -/// The return value in in-memory byte layout matches the source bit-stream -/// identically on little- and big-endian platforms. -/// -/// The caller must guarantee that many bytes are readable. -static inline uint32_t Get32Bits(const uint8_t* bits, uint64_t offset) { - uint64_t buffer = {}; - std::memcpy(&buffer, bits + (offset / 8), BytesForBits(offset % 8 + 32)); - // Interpret the loaded bytes as little-endian, drop the low `offset % 8` bits - // to align LSB-first, then store back in little-endian byte order so the - // result's memory representation matches the bit-stream. - buffer = FromLittleEndian(buffer); - return ToLittleEndian(static_cast(buffer >> (offset % 8))); -} - template struct CopyBitsParams { Uint src = {}; diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index a04b006edbff..f8715333de19 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -726,23 +726,6 @@ TEST(BitUtil, ByteSwap) { EXPECT_EQ(bit_util::ByteSwap(srcd64), expectedd64); } -TEST(BitUtil, Get32Bits) { - const std::array src = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, - 0xde, 0xf0, 0x11, 0x22, 0x33, 0x44}; - - // The result's in-memory bytes must reproduce the source bit-stream slice - // so each result bit must match the corresponding source bit. - for (uint64_t offset = 0; offset <= 24; ++offset) { - ARROW_SCOPED_TRACE("offset = ", offset); - const uint32_t result = bit_util::Get32Bits(src.data(), offset); - for (uint64_t i = 0; i < 32; ++i) { - EXPECT_EQ(bit_util::GetBit(reinterpret_cast(&result), i), - bit_util::GetBit(src.data(), offset + i)) - << "bit " << i; - } - } -} - TEST(BitUtil, Log2) { EXPECT_EQ(bit_util::Log2(1), 0); EXPECT_EQ(bit_util::Log2(2), 1); diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index ed429f1ecbd8..b78f16304ed0 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -21,6 +21,7 @@ #include #include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "arrow/util/rle_encoding_internal.h" @@ -70,23 +71,49 @@ class BitmapSpan { using BitmapSpanMut = BitmapSpan; using BitmapSpanConst = BitmapSpan; -namespace internal_rle { -template -class RunToBitmapDecoderMixin { +class RleRunToBitmapDecoder { public: + /// The type of run that can be decoded. + using RunType = RleRun; + + constexpr RleRunToBitmapDecoder() noexcept = default; + + explicit RleRunToBitmapDecoder(const RunType& run) noexcept { Reset(run); } + + void Reset(const RunType& run) noexcept { + values_left_ = run.values_count(); + if (run.value_little_endian() == 0) { + value_pattern_ = uint8_t{0}; + } else { + value_pattern_ = uint8_t{0xFF}; + } + } + + /// Return the number of values that can be advanced. + rle_size_t remaining() const { return values_left_; } + + /// Return the repeated value of this decoder. + constexpr bool value() const { return value_pattern_ != 0; } + + /// Return how much the decoder would advance if asked to. + /// + /// Does not modify input. + rle_size_t AdvanceCapacity(rle_size_t batch_size) const noexcept { + const auto n_vals = std::min(batch_size, remaining()); + return n_vals; + } + /// Advance by as many values as provided or until exhaustion of the decoder. /// Return the number of values skipped. [[nodiscard]] rle_size_t Advance(rle_size_t batch_size) { - const auto steps = std::min(batch_size, derived()->remaining()); - derived()->AdvanceUnsafe(steps); - ARROW_DCHECK_GE(derived()->remaining(), 0); - return steps; + const auto n_vals = AdvanceCapacity(batch_size); + values_left_ -= n_vals; + ARROW_DCHECK_GE(remaining(), 0); + return n_vals; } /// Get the next value and return false if there are no more. - [[nodiscard]] constexpr bool Get(BitmapSpanMut out) { - return derived()->GetBatch(out, 1) == 1; - } + [[nodiscard]] constexpr bool Get(BitmapSpanMut out) { return GetBatch(out, 1) == 1; } /// Get a batch of values return the number of decoded elements. /// @@ -97,7 +124,7 @@ class RunToBitmapDecoderMixin { ARROW_DCHECK_GE(out_bit_offset, 0); ARROW_DCHECK_LT(out_bit_offset, 8); - if (ARROW_PREDICT_FALSE(derived()->remaining() == 0 || batch_size == 0)) { + if (ARROW_PREDICT_FALSE(remaining() == 0 || batch_size == 0)) { return 0; } @@ -105,30 +132,36 @@ class RunToBitmapDecoderMixin { // HEADER: Writing inside the first byte if caller gives a non-aligned input if (out_bit_offset != 0) { - n_vals = derived()->GetBatchFirstByte(out, batch_size); + n_vals = GetBatchInByte(out, batch_size); } // Writing full bytes - n_vals += - derived()->GetBatchFullBytes(out.NewStartingAt(n_vals), batch_size - n_vals); + n_vals += GetBatchFullBytes(out.NewStartingAt(n_vals), batch_size - n_vals); // TRAILER: Writing inside the last byte if caller asked for non multiple of 8 values - const auto n_last_vals = std::min(batch_size - n_vals, derived()->remaining()); + const auto n_last_vals = std::min(batch_size - n_vals, remaining()); if (ARROW_PREDICT_FALSE(n_last_vals > 0)) { ARROW_DCHECK_LT(n_last_vals, 8); - n_vals += derived()->GetBatchFirstByte(out.NewStartingAt(n_vals), n_last_vals); + n_vals += GetBatchInByte(out.NewStartingAt(n_vals), n_last_vals); } return n_vals; } - protected: - [[nodiscard]] rle_size_t GetBatchFromByte(BitmapSpanMut out, rle_size_t batch_size, - uint8_t src) { + private: + /// The byte pattern for 8 values (full ones or full zeros). + uint8_t value_pattern_ = {}; + /// Number of values left to decode. + rle_size_t values_left_ = 0; + + void AdvanceUnsafe(rle_size_t batch_size) { values_left_ -= batch_size; } + + /// Get batch values to fill the first incomplete byte of the output. + [[nodiscard]] rle_size_t GetBatchInByte(BitmapSpanMut out, rle_size_t batch_size) { const auto out_bit_offset = out.bit_start(); ARROW_DCHECK_GE(out_bit_offset, 0); ARROW_DCHECK_LT(out_bit_offset, 8); - ARROW_DCHECK_GT(derived()->remaining(), 0); + ARROW_DCHECK_GT(remaining(), 0); ARROW_DCHECK_GE(batch_size, 0); // Empty bits in first byte that we can fill @@ -139,7 +172,7 @@ class RunToBitmapDecoderMixin { const auto n_bits = Advance(desired_bits); // Copy relevant bits from the value pattern to the output. *out.data() = bit_util::CopyBits({ - .src = src, + .src = value_pattern_, .dst = *out.data(), .start = static_cast(out_bit_offset), .end = static_cast(out_bit_offset + n_bits), @@ -148,52 +181,6 @@ class RunToBitmapDecoderMixin { return n_bits; } - private: - CRTP* derived() { return static_cast(this); } - CRTP const* derived() const { return static_cast(this); } -}; -} // namespace internal_rle - -class RleRunToBitmapDecoder - : public internal_rle::RunToBitmapDecoderMixin { - public: - /// The type of run that can be decoded. - using RunType = RleRun; - - constexpr RleRunToBitmapDecoder() noexcept = default; - - explicit RleRunToBitmapDecoder(const RunType& run) noexcept { Reset(run); } - - void Reset(const RunType& run) noexcept { - values_left_ = run.values_count(); - if (run.value_little_endian() == 0) { - value_pattern_ = uint8_t{0}; - } else { - value_pattern_ = uint8_t{0xFF}; - } - } - - /// Return the number of values that can be advanced. - rle_size_t remaining() const { return values_left_; } - - /// Return the repeated value of this decoder. - constexpr bool value() const { return value_pattern_ != 0; } - - private: - friend class internal_rle::RunToBitmapDecoderMixin; - - /// The byte pattern for 8 values (full ones or full zeros). - uint8_t value_pattern_ = {}; - /// Number of values left to decode. - rle_size_t values_left_ = 0; - - void AdvanceUnsafe(rle_size_t batch_size) { values_left_ -= batch_size; } - - /// Get batch values to fill the first incomplete byte of the output. - [[nodiscard]] rle_size_t GetBatchFirstByte(BitmapSpanMut out, rle_size_t batch_size) { - return GetBatchFromByte(out, batch_size, value_pattern_); - } - /// Get batch in full bytes using memset. [[nodiscard]] rle_size_t GetBatchFullBytes(BitmapSpanMut out, rle_size_t batch_size) { ARROW_DCHECK(out.bit_start() == 0 || batch_size == 0); @@ -205,8 +192,7 @@ class RleRunToBitmapDecoder } }; -class BitPackedRunToBitmapDecoder - : public internal_rle::RunToBitmapDecoderMixin { +class BitPackedRunToBitmapDecoder { public: /// The type of run that can be decoded. using RunType = BitPackedRun; @@ -226,22 +212,37 @@ class BitPackedRunToBitmapDecoder /// Return the number of values that can be advanced. constexpr rle_size_t remaining() const { return values_count_ - values_read_; } + /// Return how much the decoder would advance if asked to. + /// + /// Does not modify input. + rle_size_t AdvanceCapacity(rle_size_t batch_size) const noexcept { + const auto n_vals = std::min(batch_size, remaining()); + return n_vals; + } + + /// Advance by as many values as provided or until exhaustion of the decoder. + /// Return the number of values skipped. + [[nodiscard]] rle_size_t Advance(rle_size_t batch_size) { + const auto n_vals = AdvanceCapacity(batch_size); + values_read_ += n_vals; + ARROW_DCHECK_GE(remaining(), 0); + return n_vals; + } + + /// Get the next value and return false if there are no more. + [[nodiscard]] constexpr bool Get(BitmapSpanMut out) { return GetBatch(out, 1) == 1; } + /// Get a batch of values return the number of decoded elements. /// May write fewer elements to the output than requested if there are not enough values /// left. [[nodiscard]] rle_size_t GetBatch(BitmapSpanMut out, rle_size_t batch_size) { - // WARN: Slow case where bit offsets of input and output are not aligned. - // We loose the ability to do memcpy - if (ARROW_PREDICT_FALSE(out.bit_start() != unread_values_bit_offset())) { - return GetBatchMisaligned(out, batch_size); - } - return Base::GetBatch(out, batch_size); + auto n_vals = AdvanceCapacity(batch_size); + arrow::internal::CopyBitmap(unread_values_ptr(), unread_values_bit_offset(), n_vals, + out.data(), out.bit_start()); + return Advance(n_vals); } private: - using Base = internal_rle::RunToBitmapDecoderMixin; - friend class internal_rle::RunToBitmapDecoderMixin; - /// The pointer to the beginning of the run const uint8_t* data_ = nullptr; /// The total number of values in the run @@ -254,77 +255,6 @@ class BitPackedRunToBitmapDecoder /// Bit in @ref unread_values_ptr where the unread values start. rle_size_t unread_values_bit_offset() const noexcept { return values_read_ % 8; } - - void AdvanceUnsafe(rle_size_t batch_size) { values_read_ += batch_size; } - - /// Get batch values to fill the first incomplete byte of the output. - [[nodiscard]] rle_size_t GetBatchFirstByte(BitmapSpanMut out, rle_size_t batch_size) { - return GetBatchFromByte(out, batch_size, *unread_values_ptr()); - } - - /// Get batch in full bytes using memcpy. - [[nodiscard]] rle_size_t GetBatchFullBytes(BitmapSpanMut out, rle_size_t batch_size) { - ARROW_DCHECK(out.bit_start() == 0 || batch_size == 0); - ARROW_DCHECK(unread_values_bit_offset() == 0 || batch_size == 0); - const auto n_bytes = std::min(batch_size, remaining()) / 8; - std::memcpy(out.data(), unread_values_ptr(), n_bytes); - const auto n_vals = 8 * n_bytes; - AdvanceUnsafe(n_vals); - return n_vals; - } - - /// Correct and slow function for reading a batch one bit at a time. - [[nodiscard]] rle_size_t GetBatchSlow(BitmapSpanMut out, rle_size_t batch_size) { - const rle_size_t to_read = std::min(batch_size, remaining()); - for (rle_size_t i = 0; i < to_read; ++i) { - const bool bit = bit_util::GetBit(data_, values_read_ + i); - bit_util::SetBitTo(out.data(), out.bit_start() + i, bit); - } - AdvanceUnsafe(to_read); - return to_read; - } - - [[nodiscard]] rle_size_t GetBatchMisaligned(BitmapSpanMut out, rle_size_t batch_size) { - const auto out_bit_offset = out.bit_start(); - ARROW_DCHECK_LT(out_bit_offset, 8); - ARROW_DCHECK_GE(out_bit_offset, 0); - - if (ARROW_PREDICT_FALSE(remaining() == 0 || batch_size == 0)) { - return 0; - } - - const rle_size_t to_read = std::min(batch_size, remaining()); - rle_size_t read = 0; - - // HEADER: copy bits one by one until the output is byte-aligned - const rle_size_t to_read_until_aligned = (8 - out_bit_offset) % 8; - const rle_size_t to_read_header = std::min(to_read, to_read_until_aligned); - const auto read_header = GetBatchSlow(out, to_read_header); - // We ensured there was enough capacity - ARROW_DCHECK_EQ(read_header, to_read_header); - read += read_header; - out = out.NewStartingAt(read_header); - // Either we are done or we have aligned the output values on a byte. - ARROW_DCHECK(read == to_read || read == to_read_until_aligned); - - // Main loop, copy 32 bits at the time - const rle_size_t n_batches = (to_read - read) / 32; - for (rle_size_t i = 0; i < n_batches; ++i) { - const auto bits = bit_util::Get32Bits(data_, values_read_ + 32 * i); - std::memcpy(out.data(), &bits, sizeof(bits)); - out = out.NewStartingAt(8 * sizeof(bits)); - } - const rle_size_t read_main = n_batches * 32; - AdvanceUnsafe(read_main); - read += read_main; - - // TRAILER: copy remaining bits one by one - const auto read_trailer = GetBatchSlow(out, to_read - read); - read += read_trailer; - ARROW_DCHECK_EQ(read, to_read); - - return to_read; - } }; /// A specialized decoder class to extract RLE+bitpacked booleans. From ed641b6c3e1452b7a2e1d2f157cbda110671a669 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 19 Jun 2026 16:07:28 +0200 Subject: [PATCH 17/21] Improve tests --- cpp/src/arrow/util/bit_util.h | 2 +- cpp/src/arrow/util/bit_util_test.cc | 42 ++++---- cpp/src/arrow/util/rle_bitmap_internal.h | 2 +- cpp/src/arrow/util/rle_bitmap_test.cc | 127 ++++++++++------------- 4 files changed, 78 insertions(+), 95 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 1646e5633210..c7ba7e4f4689 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -194,7 +194,7 @@ struct CopyBitsParams { /// Setting ``kAllowFullCopy`` to false is an optimization when the caller can /// guarantee that the range of bits to copy does not cover the whole range. template -[[nodiscard]] constexpr Uint CopyBits(const CopyBitsParams& params) { +[[nodiscard]] constexpr Uint CopyBitsInInteger(const CopyBitsParams& params) { constexpr auto kUintSizeBits = static_cast(sizeof(Uint) * 8); assert(params.start <= params.end); assert(params.start < kUintSizeBits); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index f8715333de19..a6af004f7c5b 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1067,35 +1067,35 @@ TEST(SpliceWord, SpliceWord) { TEST(BitUtil, CopyBits) { // Copy bits [start, end[ from src into dst, keeping dst's other bits. - using bit_util::CopyBits; + using bit_util::CopyBitsInInteger; // Empty range: result equals dst. - ASSERT_EQ( - CopyBits({.src = 0b11111111, .dst = 0b00010010, .start = 3, .end = 3}), - 0b00010010); + ASSERT_EQ(CopyBitsInInteger( + {.src = 0b11111111, .dst = 0b00010010, .start = 3, .end = 3}), + 0b00010010); // dst = 0101 0101, src = 1010 1010 -> 0101 1010 - ASSERT_EQ( - CopyBits({.src = 0b10101010, .dst = 0b01010101, .start = 0, .end = 4}), - 0b01011010); + ASSERT_EQ(CopyBitsInInteger( + {.src = 0b10101010, .dst = 0b01010101, .start = 0, .end = 4}), + 0b01011010); // Copy a middle span [2, 5[ of all-ones into an all-zeros dst. - ASSERT_EQ( - CopyBits({.src = 0b11111111, .dst = 0b00000000, .start = 2, .end = 5}), - 0b00011100); + ASSERT_EQ(CopyBitsInInteger( + {.src = 0b11111111, .dst = 0b00000000, .start = 2, .end = 5}), + 0b00011100); // Copy a middle span [2, 5[ of all-zeros into an all-ones dst. - ASSERT_EQ( - CopyBits({.src = 0b00000000, .dst = 0b11111111, .start = 2, .end = 5}), - 0b11100011); + ASSERT_EQ(CopyBitsInInteger( + {.src = 0b00000000, .dst = 0b11111111, .start = 2, .end = 5}), + 0b11100011); // Full-word copy returns src unchanged. - ASSERT_EQ( - CopyBits({.src = 0b10101011, .dst = 0b00010010, .start = 0, .end = 8}), - 0b10101011); + ASSERT_EQ(CopyBitsInInteger( + {.src = 0b10101011, .dst = 0b00010010, .start = 0, .end = 8}), + 0b10101011); // uint16_t partial range [4, 12[: dst keeps its bits outside, src fills inside. ASSERT_EQ( - CopyBits( + CopyBitsInInteger( {.src = 0b1010101010101010, .dst = 0b0101010101010101, .start = 4, .end = 12}), 0b0101101010100101); // uint64_t - ASSERT_EQ(CopyBits({ + ASSERT_EQ(CopyBitsInInteger({ .src = 0x0123456789abcdef, .dst = 0xfedcba9876543210, .start = 0, @@ -1103,9 +1103,9 @@ TEST(BitUtil, CopyBits) { }), 0x0123456789abcdef); // constexpr-evaluable. - static_assert( - CopyBits({.src = 0b10101010, .dst = 0b01010101, .start = 0, .end = 4}) == - 0b01011010); + static_assert(CopyBitsInInteger( + {.src = 0b10101010, .dst = 0b01010101, .start = 0, .end = 4}) == + 0b01011010); } } // namespace arrow diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index b78f16304ed0..e0b8e42ff63e 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -171,7 +171,7 @@ class RleRunToBitmapDecoder { // Try to advance, and get number of bits we had remaining const auto n_bits = Advance(desired_bits); // Copy relevant bits from the value pattern to the output. - *out.data() = bit_util::CopyBits({ + *out.data() = bit_util::CopyBitsInInteger({ .src = value_pattern_, .dst = *out.data(), .start = static_cast(out_bit_offset), diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index 7c97b3f6bce9..86e9f8c84cc1 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -41,42 +41,44 @@ std::vector BitsFromBytes(const std::vector& bytes, rle_size_t co } /// Check the decoded output in `out` against `expected`. -/// Bits `out[out_offset..out_offset + count]` must equal `expected[skip..skip + count]`. -/// The `out_offset` bits before them must still be zero. +/// Bits `out[out_offset..out_offset + count]` must equal +/// `expected[expected_skip..expected_skip + count]`. The `out_offset` bits before them +/// must still be zero. void CheckDecodedBits(const std::vector& out, const std::vector& expected, - rle_size_t count, rle_size_t out_offset = 0, rle_size_t skip = 0) { - ARROW_SCOPED_TRACE("out_offset = ", out_offset, ", skip = ", skip); + rle_size_t count, rle_size_t out_offset = 0, + rle_size_t expected_skip = 0) { + ARROW_SCOPED_TRACE("out_offset = ", out_offset, ", expected_skip = ", expected_skip); for (rle_size_t i = 0; i < out_offset; ++i) { EXPECT_FALSE(bit_util::GetBit(out.data(), i)) << "clobbered bit " << i; } for (rle_size_t i = 0; i < count; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), out_offset + i), expected[skip + i]) + EXPECT_EQ(bit_util::GetBit(out.data(), out_offset + i), expected[expected_skip + i]) << "at bit " << i; } } -/// Skip the first `skip` values with Advance(), then decode the rest of the run -/// into one output bitmap, `chunk` values at a time. Compare against `expected`. +/// Skip the first `expected_skip` values with Advance(), then decode the rest of the run +/// into one output bitmap, `chunk_size` values at a time. Compare against `expected`. /// -/// `chunk` controls output bit alignment. When `chunk` is not a multiple of 8, +/// `chunk_size` controls output bit alignment. When `chunk_size` is not a multiple of 8, /// later calls start at a non-zero output bit offset. /// -/// `skip` shifts the decoder's read offset relative to the output offset. -/// A non-zero `skip` makes the two differ, which exercises the bit-unaligned read -/// path of BitPackedRunToBitmapDecoder. With `skip == 0` they stay in sync and -/// only the aligned path runs. +/// `expected_skip` shifts the decoder's read offset relative to the output offset. +/// A non-zero `expected_skip` makes the two differ, which exercises the bit-unaligned +/// read path of BitPackedRunToBitmapDecoder. With `expected_skip == 0` they stay in sync +/// and only the aligned path runs. template void CheckChunkedDecode(const typename Decoder::RunType& run, - const std::vector& expected, rle_size_t chunk = 1, - rle_size_t skip = 0) { - ARROW_SCOPED_TRACE("chunk = ", chunk, ", skip = ", skip); + const std::vector& expected, rle_size_t chunk_size = 1, + rle_size_t expected_skip = 0) { + ARROW_SCOPED_TRACE("chunk_size = ", chunk_size, ", expected_skip = ", expected_skip); const auto n_vals = static_cast(expected.size()); - ASSERT_LE(skip, n_vals); + ASSERT_LE(expected_skip, n_vals); Decoder decoder(run); - const auto advanced = decoder.Advance(skip); - ASSERT_EQ(advanced, skip); - const auto rest = n_vals - skip; + const auto advanced = decoder.Advance(expected_skip); + ASSERT_EQ(advanced, expected_skip); + const auto rest = n_vals - expected_skip; // Output buffer with one guard byte to catch out-of-bounds writes. std::vector out(static_cast(bit_util::BytesForBits(rest)) + 1, 0); @@ -85,7 +87,7 @@ void CheckChunkedDecode(const typename Decoder::RunType& run, rle_size_t read = 0; while (read < rest) { - const auto want = std::min(chunk, rest - read); + const auto want = std::min(chunk_size, rest - read); const auto got = decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/read), want); EXPECT_EQ(got, want) << "at pos " << read; @@ -96,7 +98,7 @@ void CheckChunkedDecode(const typename Decoder::RunType& run, EXPECT_EQ(decoder.remaining(), 0); EXPECT_EQ(out.back(), guard) << "decoder wrote past the end of the output"; - CheckDecodedBits(out, expected, /*count=*/rest, /*out_offset=*/0, skip); + CheckDecodedBits(out, expected, /*count=*/rest, /*out_offset=*/0, expected_skip); } /// All the checks shared by both decoder types. @@ -123,17 +125,18 @@ void CheckBitmapDecoder(const typename Decoder::RunType& run, } // Decode the whole run in several chunks. - for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, - rle_size_t{8}, rle_size_t{9}, n_vals}) { - CheckChunkedDecode(run, expected, chunk); + for (const rle_size_t chunk_size : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, + rle_size_t{8}, rle_size_t{9}, n_vals, n_vals + 1}) { + CheckChunkedDecode(run, expected, chunk_size); } // Decode the whole run in several chunks, after an initial Advance that shifts // the run and output bit alignment. - for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, - rle_size_t{8}, rle_size_t{9}, n_vals}) { - for (rle_size_t skip = 1; skip < 8 && skip < n_vals; ++skip) { - CheckChunkedDecode(run, expected, chunk, skip); + for (const rle_size_t chunk_size : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, + rle_size_t{8}, rle_size_t{9}, n_vals, n_vals + 1}) { + for (rle_size_t expected_skip = 1; expected_skip < 8 && expected_skip < n_vals; + ++expected_skip) { + CheckChunkedDecode(run, expected, chunk_size, expected_skip); } } @@ -186,52 +189,31 @@ void CheckBitmapDecoder(const typename Decoder::RunType& run, * RleRunToBitmapDecoder * ***************************/ -struct RleBitmapCase { - // The repeated boolean value of the run. - bool value; - // The number of values in the run. - rle_size_t count; -}; - -class RleRunToBitmapDecoderTest : public ::testing::TestWithParam {}; +class RleRunToBitmapDecoderTest : public ::testing::TestWithParam {}; TEST_P(RleRunToBitmapDecoderTest, Decode) { - const auto& param = GetParam(); + const auto& count = GetParam(); - // A boolean RLE run stores its value in a single (1-bit-wide) byte. - const uint8_t data = param.value ? 1 : 0; - const auto run = RleRun(&data, param.count, /*value_bit_width=*/1); + // Only two possible repeated value + for (bool value : {true, false}) { + ARROW_SCOPED_TRACE("value = ", value); - // value() reports the repeated boolean. - { + // A boolean RLE run stores its value in a single (1-bit-wide) byte. + const uint8_t data = value ? 1 : 0; + const auto run = RleRun(&data, count, /*value_bit_width=*/1); + + // value() reports the repeated boolean. RleRunToBitmapDecoder decoder(run); - EXPECT_EQ(decoder.value(), param.value); - } + EXPECT_EQ(decoder.value(), value); - const std::vector expected(param.count, param.value); - CheckBitmapDecoder(run, expected); + const std::vector expected(count, value); + CheckBitmapDecoder(run, expected); + } } INSTANTIATE_TEST_SUITE_P( // RleBitmap, RleRunToBitmapDecoderTest, - ::testing::Values( // - RleBitmapCase{.value = false, .count = 0}, - RleBitmapCase{.value = true, .count = 1}, - RleBitmapCase{.value = false, .count = 3}, - RleBitmapCase{.value = true, .count = 8}, - RleBitmapCase{.value = false, .count = 9}, - RleBitmapCase{.value = true, .count = 9}, - RleBitmapCase{.value = false, .count = 13}, - RleBitmapCase{.value = true, .count = 13}, - RleBitmapCase{.value = false, .count = 64}, - RleBitmapCase{.value = true, .count = 64}, - RleBitmapCase{.value = false, .count = 100}, - RleBitmapCase{.value = true, .count = 100}, - RleBitmapCase{.value = true, .count = 1000}), - [](const ::testing::TestParamInfo& info) { - return std::string(info.param.value ? "true_" : "false_") + - std::to_string(info.param.count); - }); + ::testing::Values(0, 1, 3, 8, 9, 13, 64, 100, 100, 1000)); /********************************* * BitPackedRunToBitmapDecoder * @@ -317,13 +299,13 @@ void AppendBitPackedRun(std::vector& bytes, std::vector& expected /// Decode the whole `bytes` into a bitmap and check it against `expected`. /// -/// Decode `chunk` values per GetBatch call to check the decoder state between +/// Decode `chunk_size` values per GetBatch call to check the decoder state between /// calls. The output starts at bit offset `out_offset`. A non-zero offset makes /// the output and the encoded `bytes` use different bit alignment. void CheckRleBitPackedDecode(const std::vector& bytes, - const std::vector& expected, rle_size_t chunk, + const std::vector& expected, rle_size_t chunk_size, rle_size_t out_offset = 0) { - ARROW_SCOPED_TRACE("chunk = ", chunk, ", out_offset = ", out_offset); + ARROW_SCOPED_TRACE("chunk_size = ", chunk_size, ", out_offset = ", out_offset); const auto n_vals = static_cast(expected.size()); RleBitPackedToBitmapDecoder decoder(bytes.data(), @@ -338,7 +320,7 @@ void CheckRleBitPackedDecode(const std::vector& bytes, rle_size_t read = 0; while (read < n_vals) { - const auto want = std::min(chunk, n_vals - read); + const auto want = std::min(chunk_size, n_vals - read); const auto got = decoder.GetBatch( BitmapSpanMut(out.data(), /*bit_start=*/out_offset + read), want); EXPECT_EQ(got, want) << "at pos " << read; @@ -363,13 +345,14 @@ void CheckRleBitPackedToBitmap(const std::vector& bytes, const std::vector& expected) { const auto n_vals = static_cast(expected.size()); ASSERT_GT(n_vals, 0); - for (const rle_size_t chunk : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, - rle_size_t{8}, rle_size_t{9}, rle_size_t{33}, n_vals}) { - CheckRleBitPackedDecode(bytes, expected, chunk); + for (const rle_size_t chunk_size : + {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, rle_size_t{8}, rle_size_t{9}, + rle_size_t{33}, n_vals}) { + CheckRleBitPackedDecode(bytes, expected, chunk_size); // A non-zero output offset forces the first run to start at a non-byte // aligned output position. for (rle_size_t out_offset = 1; out_offset < 8; ++out_offset) { - CheckRleBitPackedDecode(bytes, expected, chunk, out_offset); + CheckRleBitPackedDecode(bytes, expected, chunk_size, out_offset); } } } From 88ba25eeda3ac46a6417a6937415107f0fa58bad Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 22 Jun 2026 11:01:23 +0200 Subject: [PATCH 18/21] Strong clobber test --- cpp/src/arrow/util/rle_bitmap_internal.h | 4 +- cpp/src/arrow/util/rle_bitmap_test.cc | 183 ++++++++++++++++++----- 2 files changed, 147 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index e0b8e42ff63e..ac19123fe7b4 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -113,7 +113,7 @@ class RleRunToBitmapDecoder { } /// Get the next value and return false if there are no more. - [[nodiscard]] constexpr bool Get(BitmapSpanMut out) { return GetBatch(out, 1) == 1; } + [[nodiscard]] bool Get(BitmapSpanMut out) { return GetBatch(out, 1) == 1; } /// Get a batch of values return the number of decoded elements. /// @@ -230,7 +230,7 @@ class BitPackedRunToBitmapDecoder { } /// Get the next value and return false if there are no more. - [[nodiscard]] constexpr bool Get(BitmapSpanMut out) { return GetBatch(out, 1) == 1; } + [[nodiscard]] bool Get(BitmapSpanMut out) { return GetBatch(out, 1) == 1; } /// Get a batch of values return the number of decoded elements. /// May write fewer elements to the output than requested if there are not enough values diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index 86e9f8c84cc1..5f6e663f666e 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -31,6 +32,17 @@ namespace arrow::util { namespace { +/// Make a vector of `size` pseudo-random bytes, deterministic for a given `seed`. +std::vector MakeRandomBytes(size_t size, uint32_t seed = 56) { + std::vector bytes(size); + std::minstd_rand gen(seed); + std::uniform_int_distribution dist(0, 255); + for (auto& byte : bytes) { + byte = dist(gen); + } + return bytes; +} + /// Read the first `count` bits of `bytes` (LSB first) into a vector of booleans. std::vector BitsFromBytes(const std::vector& bytes, rle_size_t count) { std::vector bits(count); @@ -40,20 +52,41 @@ std::vector BitsFromBytes(const std::vector& bytes, rle_size_t co return bits; } +struct CheckDecodedBitsParams { + const std::vector& actual; + const std::vector& expected; + rle_size_t count; + rle_size_t actual_start_bit = 0; + rle_size_t expected_start_idx = 0; +}; + /// Check the decoded output in `out` against `expected`. -/// Bits `out[out_offset..out_offset + count]` must equal -/// `expected[expected_skip..expected_skip + count]`. The `out_offset` bits before them -/// must still be zero. -void CheckDecodedBits(const std::vector& out, const std::vector& expected, - rle_size_t count, rle_size_t out_offset = 0, - rle_size_t expected_skip = 0) { - ARROW_SCOPED_TRACE("out_offset = ", out_offset, ", expected_skip = ", expected_skip); - for (rle_size_t i = 0; i < out_offset; ++i) { - EXPECT_FALSE(bit_util::GetBit(out.data(), i)) << "clobbered bit " << i; +void CheckDecodedBits(const CheckDecodedBitsParams& params) { + ARROW_SCOPED_TRACE("out_start_bit = ", params.actual_start_bit, + ", expected_start_idx = ", params.expected_start_idx); + for (rle_size_t i = 0; i < params.count; ++i) { + ASSERT_EQ(bit_util::GetBit(params.actual.data(), params.actual_start_bit + i), + params.expected[params.expected_start_idx + i]) + << "first difference at bit " << i; } - for (rle_size_t i = 0; i < count; ++i) { - EXPECT_EQ(bit_util::GetBit(out.data(), out_offset + i), expected[expected_skip + i]) - << "at bit " << i; +} + +struct CheckBitsEqualParams { + const std::vector& actual; + const std::vector& expected; + rle_size_t count; + rle_size_t actual_start_bit = 0; + rle_size_t expected_start_bit = 0; +}; + +/// Check that two bit ranges, stored in `actual` and `expected`, are equal. +void CheckBitsEqual(const CheckBitsEqualParams& params) { + ARROW_SCOPED_TRACE("actual_start_bit = ", params.actual_start_bit, + ", expected_start_bit = ", params.expected_start_bit); + for (rle_size_t i = 0; i < params.count; ++i) { + ASSERT_EQ(bit_util::GetBit(params.actual.data(), params.actual_start_bit + i), + bit_util::GetBit(params.expected.data(), params.expected_start_bit + i)) + << "first difference at bit " << i; } } @@ -68,37 +101,103 @@ void CheckDecodedBits(const std::vector& out, const std::vector& /// read path of BitPackedRunToBitmapDecoder. With `expected_skip == 0` they stay in sync /// and only the aligned path runs. template -void CheckChunkedDecode(const typename Decoder::RunType& run, - const std::vector& expected, rle_size_t chunk_size = 1, - rle_size_t expected_skip = 0) { +void CheckDecoderValuesChunked(const typename Decoder::RunType& run, + const std::vector& expected, + rle_size_t chunk_size = 1, rle_size_t expected_skip = 0) { ARROW_SCOPED_TRACE("chunk_size = ", chunk_size, ", expected_skip = ", expected_skip); + const auto n_vals = static_cast(expected.size()); ASSERT_LE(expected_skip, n_vals); Decoder decoder(run); const auto advanced = decoder.Advance(expected_skip); ASSERT_EQ(advanced, expected_skip); - const auto rest = n_vals - expected_skip; + const auto n_vals_to_decode = n_vals - expected_skip; - // Output buffer with one guard byte to catch out-of-bounds writes. - std::vector out(static_cast(bit_util::BytesForBits(rest)) + 1, 0); - const uint8_t guard = 0xA5; - out.back() = guard; + // Output buffer + const auto n_bytes = static_cast(bit_util::BytesForBits(n_vals_to_decode)); + std::vector out(n_bytes, 0); - rle_size_t read = 0; - while (read < rest) { - const auto want = std::min(chunk_size, rest - read); + rle_size_t n_val_read = 0; + while (n_val_read < n_vals_to_decode) { + const auto want = std::min(chunk_size, n_vals_to_decode - n_val_read); const auto got = - decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/read), want); - EXPECT_EQ(got, want) << "at pos " << read; - ASSERT_GT(got, 0) << "at pos " << read; // break on failure - read += got; - EXPECT_EQ(decoder.remaining(), rest - read); + decoder.GetBatch(BitmapSpanMut(out.data(), /*bit_start=*/n_val_read), want); + EXPECT_EQ(got, want) << "at pos " << n_val_read; + ASSERT_GT(got, 0) << "at pos " << n_val_read; // break on failure + n_val_read += got; + EXPECT_EQ(decoder.remaining(), n_vals_to_decode - n_val_read); } EXPECT_EQ(decoder.remaining(), 0); - EXPECT_EQ(out.back(), guard) << "decoder wrote past the end of the output"; - CheckDecodedBits(out, expected, /*count=*/rest, /*out_offset=*/0, expected_skip); + CheckDecodedBits({ + .actual = out, + .expected = expected, + .count = n_vals_to_decode, + .actual_start_bit = 0, + .expected_start_idx = expected_skip, + }); +} + +/// Decode a chunk of data into a known output to check for out of bounds write. +/// +/// @see CheckDecoderValuesChunked +template +void CheckDecoderClobber(const typename Decoder::RunType& run, + const std::vector& expected, rle_size_t chunk_size = 1, + rle_size_t expected_skip = 0) { + ARROW_SCOPED_TRACE("chunk_size = ", chunk_size, ", expected_skip = ", expected_skip); + + const auto n_vals = static_cast(expected.size()); + ASSERT_LE(expected_skip, n_vals); + + Decoder decoder(run); + const auto advanced = decoder.Advance(expected_skip); + ASSERT_EQ(advanced, expected_skip); + const auto n_vals_to_decode = n_vals - expected_skip; + + // Output buffer with enough capacity to store a full chunk plus extra bytes as + // clobbers/guard to check for out of bounds write. + const auto n_bytes = static_cast(bit_util::BytesForBits(chunk_size) + + bit_util::CeilDiv(n_vals, chunk_size) + 2); + // This seed is arbitrary and of little importance. We are simply trying to avoid an + // unlikely case where guards have the same pattern in all invocations. + const auto out_pattern = + MakeRandomBytes(n_bytes, /* seed= */ (chunk_size << 16) ^ expected_skip); + auto out = out_pattern; + + rle_size_t n_val_read = 0; + rle_size_t out_bit_start = 0; + while (n_val_read < n_vals_to_decode) { + // Clean output buffer + out = out_pattern; + const auto want = std::min(chunk_size, n_vals_to_decode - n_val_read); + const auto got = decoder.GetBatch(BitmapSpanMut(out.data(), out_bit_start), want); + ASSERT_GT(got, 0) << "at pos " << n_val_read; // break on failure + EXPECT_EQ(got, want) << "at pos " << n_val_read; + // Check that the leading bits have not been modified + CheckBitsEqual({.actual = out, .expected = out_pattern, .count = out_bit_start}); + // Check that the trailing bits have not been modified + CheckBitsEqual({ + .actual = out, + .expected = out_pattern, + .count = static_cast(8 * n_bytes) - (out_bit_start + want), + .actual_start_bit = out_bit_start + want, + .expected_start_bit = out_bit_start + want, + }); + // Check decoded bits are also correct + CheckDecodedBits({ + .actual = out, + .expected = expected, + .count = want, + .actual_start_bit = out_bit_start, + .expected_start_idx = expected_skip + n_val_read, + }); + + n_val_read += got; + ++out_bit_start; + EXPECT_EQ(decoder.remaining(), n_vals_to_decode - n_val_read); + } } /// All the checks shared by both decoder types. @@ -127,7 +226,7 @@ void CheckBitmapDecoder(const typename Decoder::RunType& run, // Decode the whole run in several chunks. for (const rle_size_t chunk_size : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, rle_size_t{8}, rle_size_t{9}, n_vals, n_vals + 1}) { - CheckChunkedDecode(run, expected, chunk_size); + CheckDecoderValuesChunked(run, expected, chunk_size); } // Decode the whole run in several chunks, after an initial Advance that shifts @@ -136,7 +235,10 @@ void CheckBitmapDecoder(const typename Decoder::RunType& run, rle_size_t{8}, rle_size_t{9}, n_vals, n_vals + 1}) { for (rle_size_t expected_skip = 1; expected_skip < 8 && expected_skip < n_vals; ++expected_skip) { - CheckChunkedDecode(run, expected, chunk_size, expected_skip); + // Check the decoding happens as expected + CheckDecoderValuesChunked(run, expected, chunk_size, expected_skip); + // Check the decoding does not write out of bounds + CheckDecoderClobber(run, expected, chunk_size, expected_skip); } } @@ -155,7 +257,7 @@ void CheckBitmapDecoder(const typename Decoder::RunType& run, const auto advanced = decoder.Advance(1); EXPECT_EQ(advanced, 0); EXPECT_EQ(decoder.remaining(), 0); - CheckDecodedBits(out, expected, /*count=*/n_vals); + CheckDecodedBits({.actual = out, .expected = expected, .count = n_vals}); } // Advancing more than available stops at the run boundary. @@ -179,7 +281,7 @@ void CheckBitmapDecoder(const typename Decoder::RunType& run, std::vector out_2(static_cast(bit_util::BytesForBits(n_vals)), 0); const auto got = decoder.GetBatch(BitmapSpanMut(out_2.data()), n_vals); EXPECT_EQ(got, n_vals); - CheckDecodedBits(out_2, expected, /*count=*/n_vals); + CheckDecodedBits({.actual = out_2, .expected = expected, .count = n_vals}); } } @@ -337,7 +439,12 @@ void CheckRleBitPackedDecode(const std::vector& bytes, EXPECT_TRUE(decoder.exhausted()); EXPECT_EQ(out.back(), guard) << "decoder wrote past the end of the output"; - CheckDecodedBits(out, expected, /*count=*/n_vals, out_offset); + CheckDecodedBits({ + .actual = out, + .expected = expected, + .count = n_vals, + .actual_start_bit = out_offset, + }); } /// Run the decode check over a battery of chunk sizes and output offsets. @@ -438,7 +545,7 @@ TEST(RleBitPackedToBitmapDecoder, ReadPastEnd) { EXPECT_TRUE(decoder.exhausted()); got = decoder.GetBatch(BitmapSpanMut(out.data()), 10); EXPECT_EQ(got, 0); - CheckDecodedBits(out, expected, /*count=*/n_vals); + CheckDecodedBits({.actual = out, .expected = expected, .count = n_vals}); } TEST(RleBitPackedToBitmapDecoder, Reset) { @@ -463,7 +570,7 @@ TEST(RleBitPackedToBitmapDecoder, Reset) { const auto got_2 = decoder.GetBatch(BitmapSpanMut(out_2.data()), n_vals); EXPECT_EQ(got_2, n_vals); EXPECT_TRUE(decoder.exhausted()); - CheckDecodedBits(out_2, expected, /*count=*/n_vals); + CheckDecodedBits({.actual = out_2, .expected = expected, .count = n_vals}); } TEST(RleBitPackedToBitmapDecoder, Truncated) { @@ -485,7 +592,7 @@ TEST(RleBitPackedToBitmapDecoder, Truncated) { const auto got = decoder.GetBatch(BitmapSpanMut(out.data()), 1000); EXPECT_EQ(got, 10); EXPECT_FALSE(decoder.exhausted()); - CheckDecodedBits(out, expected, /*count=*/10); + CheckDecodedBits({.actual = out, .expected = expected, .count = 10}); } } // namespace arrow::util From 41fc6c78676cf635e3d4559264b9289c55ebc855 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 22 Jun 2026 11:57:23 +0200 Subject: [PATCH 19/21] Fix assertion --- cpp/src/arrow/util/rle_bitmap_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index ac19123fe7b4..4a91a8474f96 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -183,8 +183,8 @@ class RleRunToBitmapDecoder { /// Get batch in full bytes using memset. [[nodiscard]] rle_size_t GetBatchFullBytes(BitmapSpanMut out, rle_size_t batch_size) { - ARROW_DCHECK(out.bit_start() == 0 || batch_size == 0); const auto n_bytes = std::min(batch_size, remaining()) / 8; + ARROW_DCHECK(out.bit_start() == 0 || n_bytes == 0); std::memset(out.data(), value_pattern_, n_bytes); const auto n_vals = 8 * n_bytes; AdvanceUnsafe(n_vals); From 6fc9785559931f94a67bd807e985b6e38f5d75d0 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 23 Jun 2026 13:56:57 +0200 Subject: [PATCH 20/21] Adjust for review --- cpp/src/arrow/util/bit_util.h | 11 ++++++----- cpp/src/arrow/util/rle_bitmap_internal.h | 16 ++++++++-------- cpp/src/arrow/util/rle_bitmap_test.cc | 4 ++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index c7ba7e4f4689..cd6d15f91341 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -183,8 +183,8 @@ template struct CopyBitsParams { Uint src = {}; Uint dst = {}; - Uint start = {}; - Uint end = {}; + int start = {}; + int end = {}; }; /// Copy a contiguous span of bits from src into dst. @@ -195,19 +195,20 @@ struct CopyBitsParams { /// guarantee that the range of bits to copy does not cover the whole range. template [[nodiscard]] constexpr Uint CopyBitsInInteger(const CopyBitsParams& params) { - constexpr auto kUintSizeBits = static_cast(sizeof(Uint) * 8); + constexpr auto kUintSizeBits = static_cast(sizeof(Uint) * 8); assert(params.start <= params.end); assert(params.start < kUintSizeBits); assert(params.end <= kUintSizeBits); - const auto length = static_cast(params.end - params.start); + const int length = params.end - params.start; if constexpr (kAllowFullCopy) { if (length == kUintSizeBits) { return params.src; } } assert(length < kUintSizeBits); - const Uint mask = LeastSignificantBitMask(length) << params.start; + const Uint mask = + static_cast(LeastSignificantBitMask(length) << params.start); return (~mask & params.dst) | (mask & params.src); } diff --git a/cpp/src/arrow/util/rle_bitmap_internal.h b/cpp/src/arrow/util/rle_bitmap_internal.h index 4a91a8474f96..0667a9f0a8ec 100644 --- a/cpp/src/arrow/util/rle_bitmap_internal.h +++ b/cpp/src/arrow/util/rle_bitmap_internal.h @@ -98,7 +98,7 @@ class RleRunToBitmapDecoder { /// Return how much the decoder would advance if asked to. /// /// Does not modify input. - rle_size_t AdvanceCapacity(rle_size_t batch_size) const noexcept { + rle_size_t GetAdvanceCapacity(rle_size_t batch_size) const noexcept { const auto n_vals = std::min(batch_size, remaining()); return n_vals; } @@ -106,16 +106,16 @@ class RleRunToBitmapDecoder { /// Advance by as many values as provided or until exhaustion of the decoder. /// Return the number of values skipped. [[nodiscard]] rle_size_t Advance(rle_size_t batch_size) { - const auto n_vals = AdvanceCapacity(batch_size); + const auto n_vals = GetAdvanceCapacity(batch_size); values_left_ -= n_vals; ARROW_DCHECK_GE(remaining(), 0); return n_vals; } - /// Get the next value and return false if there are no more. + /// Read the next value into `out` and return false if there are no more. [[nodiscard]] bool Get(BitmapSpanMut out) { return GetBatch(out, 1) == 1; } - /// Get a batch of values return the number of decoded elements. + /// Get a batch of values into `out` and return the number of decoded elements. /// /// May write fewer elements to the output than requested if there are not /// enough values left. @@ -140,7 +140,7 @@ class RleRunToBitmapDecoder { // TRAILER: Writing inside the last byte if caller asked for non multiple of 8 values const auto n_last_vals = std::min(batch_size - n_vals, remaining()); - if (ARROW_PREDICT_FALSE(n_last_vals > 0)) { + if (n_last_vals > 0) { ARROW_DCHECK_LT(n_last_vals, 8); n_vals += GetBatchInByte(out.NewStartingAt(n_vals), n_last_vals); } @@ -215,7 +215,7 @@ class BitPackedRunToBitmapDecoder { /// Return how much the decoder would advance if asked to. /// /// Does not modify input. - rle_size_t AdvanceCapacity(rle_size_t batch_size) const noexcept { + rle_size_t GetAdvanceCapacity(rle_size_t batch_size) const noexcept { const auto n_vals = std::min(batch_size, remaining()); return n_vals; } @@ -223,7 +223,7 @@ class BitPackedRunToBitmapDecoder { /// Advance by as many values as provided or until exhaustion of the decoder. /// Return the number of values skipped. [[nodiscard]] rle_size_t Advance(rle_size_t batch_size) { - const auto n_vals = AdvanceCapacity(batch_size); + const auto n_vals = GetAdvanceCapacity(batch_size); values_read_ += n_vals; ARROW_DCHECK_GE(remaining(), 0); return n_vals; @@ -236,7 +236,7 @@ class BitPackedRunToBitmapDecoder { /// May write fewer elements to the output than requested if there are not enough values /// left. [[nodiscard]] rle_size_t GetBatch(BitmapSpanMut out, rle_size_t batch_size) { - auto n_vals = AdvanceCapacity(batch_size); + auto n_vals = GetAdvanceCapacity(batch_size); arrow::internal::CopyBitmap(unread_values_ptr(), unread_values_bit_offset(), n_vals, out.data(), out.bit_start()); return Advance(n_vals); diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index 5f6e663f666e..4875de7dbbcf 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -315,7 +315,7 @@ TEST_P(RleRunToBitmapDecoderTest, Decode) { INSTANTIATE_TEST_SUITE_P( // RleBitmap, RleRunToBitmapDecoderTest, - ::testing::Values(0, 1, 3, 8, 9, 13, 64, 100, 100, 1000)); + ::testing::Values(0, 1, 3, 8, 9, 13, 64, 100, 177)); /********************************* * BitPackedRunToBitmapDecoder * @@ -454,7 +454,7 @@ void CheckRleBitPackedToBitmap(const std::vector& bytes, ASSERT_GT(n_vals, 0); for (const rle_size_t chunk_size : {rle_size_t{1}, rle_size_t{3}, rle_size_t{7}, rle_size_t{8}, rle_size_t{9}, - rle_size_t{33}, n_vals}) { + rle_size_t{33}, n_vals, n_vals + 1}) { CheckRleBitPackedDecode(bytes, expected, chunk_size); // A non-zero output offset forces the first run to start at a non-byte // aligned output position. From 441408df445b6fb6b62d79c203dd50801f6764ac Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 23 Jun 2026 16:34:02 +0200 Subject: [PATCH 21/21] Fix uniform int distribution type --- cpp/src/arrow/util/rle_bitmap_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/rle_bitmap_test.cc b/cpp/src/arrow/util/rle_bitmap_test.cc index 4875de7dbbcf..ef7524407d32 100644 --- a/cpp/src/arrow/util/rle_bitmap_test.cc +++ b/cpp/src/arrow/util/rle_bitmap_test.cc @@ -36,9 +36,9 @@ namespace { std::vector MakeRandomBytes(size_t size, uint32_t seed = 56) { std::vector bytes(size); std::minstd_rand gen(seed); - std::uniform_int_distribution dist(0, 255); + std::uniform_int_distribution dist(0, 255); // no standard support for uint8_t for (auto& byte : bytes) { - byte = dist(gen); + byte = static_cast(dist(gen)); } return bytes; }