From 9138b990e0ed9a49c2a9bb564a364e74935f7b56 Mon Sep 17 00:00:00 2001 From: Octopus Date: Thu, 2 Apr 2026 15:07:28 +0800 Subject: [PATCH] fix: add type checks to prevent AttributeError when LLM returns malformed JSON (fixes #199) --- pageindex/page_index.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pageindex/page_index.py b/pageindex/page_index.py index 9004309fb..d337a0197 100644 --- a/pageindex/page_index.py +++ b/pageindex/page_index.py @@ -584,9 +584,12 @@ def process_no_toc(page_list, start_index=1, model=None, logger=None): logger.info(f'len(group_texts): {len(group_texts)}') toc_with_page_number= generate_toc_init(group_texts[0], model) + if not isinstance(toc_with_page_number, list): + toc_with_page_number = [] for group_text in group_texts[1:]: - toc_with_page_number_additional = generate_toc_continue(toc_with_page_number, group_text, model) - toc_with_page_number.extend(toc_with_page_number_additional) + toc_with_page_number_additional = generate_toc_continue(toc_with_page_number, group_text, model) + if isinstance(toc_with_page_number_additional, list): + toc_with_page_number.extend(toc_with_page_number_additional) logger.info(f'generate_toc: {toc_with_page_number}') toc_with_page_number = convert_physical_index_to_int(toc_with_page_number) @@ -967,7 +970,7 @@ async def meta_processor(page_list, mode=None, toc_content=None, toc_page_list=N else: toc_with_page_number = process_no_toc(page_list, start_index=start_index, model=opt.model, logger=logger) - toc_with_page_number = [item for item in toc_with_page_number if item.get('physical_index') is not None] + toc_with_page_number = [item for item in toc_with_page_number if isinstance(item, dict) and item.get('physical_index') is not None] toc_with_page_number = validate_and_truncate_physical_indices( toc_with_page_number,