diff --git a/contextifier/chunking/protected_regions.py b/contextifier/chunking/protected_regions.py index 69344d0..2a7170f 100644 --- a/contextifier/chunking/protected_regions.py +++ b/contextifier/chunking/protected_regions.py @@ -418,19 +418,27 @@ def split_with_protected_regions( # Table/block is larger than chunk_size table_content = text[t_start:t_end].strip() - # Check type and split efficiently - if block_type == 'html' or table_content.startswith(' single chunk (never split) + if table_content: + chunks.append(table_content) else: - # Charts, textboxes, etc. -> single chunk (never split) + # force_chunking=False: Keep entire block as single chunk + # Tables, charts, textboxes, etc. are protected and never split if table_content: chunks.append(table_content) @@ -525,8 +533,9 @@ def split_with_protected_regions( # Space before table too small -> handle table table_content = text[t_start:t_end].strip() - # Split table if larger than chunk_size - if table_size > chunk_size: + # CRITICAL: Only split tables when force_chunking=True + # When force_chunking=False, tables are protected and should NOT be split + if table_size > chunk_size and force_chunking: if block_type == 'html' or table_content.startswith('