From 533ec516cd1edc8bcd4103a952d4a386ae82bf28 Mon Sep 17 00:00:00 2001 From: CocoRoF Date: Fri, 23 Jan 2026 18:27:53 +0900 Subject: [PATCH] feat: Enhance table splitting logic to respect force_chunking flag --- contextifier/chunking/protected_regions.py | 39 +++++++++++++--------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/contextifier/chunking/protected_regions.py b/contextifier/chunking/protected_regions.py index 69344d0..2a7170f 100644 --- a/contextifier/chunking/protected_regions.py +++ b/contextifier/chunking/protected_regions.py @@ -418,19 +418,27 @@ def split_with_protected_regions( # Table/block is larger than chunk_size table_content = text[t_start:t_end].strip() - # Check type and split efficiently - if block_type == 'html' or table_content.startswith(' single chunk (never split) + if table_content: + chunks.append(table_content) else: - # Charts, textboxes, etc. -> single chunk (never split) + # force_chunking=False: Keep entire block as single chunk + # Tables, charts, textboxes, etc. are protected and never split if table_content: chunks.append(table_content) @@ -525,8 +533,9 @@ def split_with_protected_regions( # Space before table too small -> handle table table_content = text[t_start:t_end].strip() - # Split table if larger than chunk_size - if table_size > chunk_size: + # CRITICAL: Only split tables when force_chunking=True + # When force_chunking=False, tables are protected and should NOT be split + if table_size > chunk_size and force_chunking: if block_type == 'html' or table_content.startswith('