diff --git a/doc-maker/config.json b/doc-maker/config.json index 68d73a2b..79a3da47 100644 --- a/doc-maker/config.json +++ b/doc-maker/config.json @@ -1,6 +1,6 @@ { "name": "Doc Maker", - "version": "2.0.0", + "version": "2.1.0", "description": "Word document automation integration using python-docx with markdown-first content creation. AI agents should use markdown syntax for most content creation - headings (#), paragraphs, lists (- or 1.), formatting (**bold**, *italic*), tables, blockquotes (>), and code blocks. Only use non-markdown actions for images, page breaks, or when you need direct table creation with structured data arrays.", "entry_point": "doc_maker.py", "actions": { @@ -10,6 +10,10 @@ "input_schema": { "type": "object", "properties": { + "title": { + "type": "string", + "description": "Optional title for the document. Added as a heading if no markdown content is provided" + }, "markdown_content": { "type": "string", "description": "Markdown content to convert to Word document format. Use standard markdown: # for headings, **bold**, *italic*, - for bullets, 1. for numbers, | tables |, > blockquotes, ```code blocks```" diff --git a/doc-maker/doc_maker.py b/doc-maker/doc_maker.py index f9fbe4ab..707fd164 100644 --- a/doc-maker/doc_maker.py +++ b/doc-maker/doc_maker.py @@ -10,6 +10,8 @@ from docx.shared import Inches from docx.enum.text import WD_BREAK from docx.document import Document as _Document +from docx.oxml import OxmlElement +from docx.oxml.ns import qn from docx.oxml.text.paragraph import CT_P from docx.oxml.table import CT_Tbl from docx.table import _Cell, Table @@ -554,12 +556,537 @@ def analyze_document_structure(doc: Document) -> dict: } +_PAREN_ITEM_RE = re.compile( + r"\((" + r"\d+" # (1), (2), … + r"|[a-z]" # (a), (b), … + r"|[mdclxvi]{2,}" # (ii), (iv), (xiii), (xlii), … + r")\)\s+", + re.IGNORECASE, +) + +_ROMAN_CHAR_VALS = {"m": 1000, "d": 500, "c": 100, "l": 50, "x": 10, "v": 5, "i": 1} +_AMBIGUOUS_ROMAN_LETTERS = frozenset(_ROMAN_CHAR_VALS) + + +def _parse_roman(s: str) -> int | None: + """Parse a roman numeral string and return its integer value, or None if invalid.""" + low = s.lower() + if not low or not all(c in _ROMAN_CHAR_VALS for c in low): + return None + total = 0 + for idx, ch in enumerate(low): + val = _ROMAN_CHAR_VALS[ch] + if idx + 1 < len(low) and _ROMAN_CHAR_VALS[low[idx + 1]] > val: + total -= val + else: + total += val + return total + + +def _detect_paren_type(marker: str) -> tuple[str, int]: + """Given the text between parens (e.g. 'a', 'ii', '3'), return (ol_type, start_val).""" + low = marker.lower() + roman_val = _parse_roman(low) + if roman_val is not None and (len(low) > 1 or low in _AMBIGUOUS_ROMAN_LETTERS): + roman_type = "I" if marker[0].isupper() else "i" + return roman_type, roman_val + if low.isalpha() and len(low) == 1: + ol_type = "A" if marker.isupper() else "a" + return ol_type, ord(low) - ord("a") + 1 + if low.isdigit(): + return "1", int(low) + return "1", 1 + + +def _reconcile_ambiguous_markers( + list_items: list[tuple[str, int, str, int]], +) -> list[tuple[str, int, str, int]]: + """Fix single-letter markers (i/v/x/l/c/d/m) misclassified as roman when + they belong to an alphabetic sequence. + + Walk the collected items and look at the preceding run. When a marker + was tagged as roman (``"i"`` or ``"I"``) but is a single ambiguous letter + whose alphabetic position is consistent with the prior alphabetic item, + reclassify it as lowercase (``"a"``) or uppercase (``"A"``) accordingly. + """ + if not list_items: + return list_items + + result = list(list_items) + for idx in range(len(result)): + ol_type, start_val, text, indent = result[idx] + if ol_type not in ("i", "I") or start_val not in _ROMAN_CHAR_VALS.values(): + continue + # Only single-letter ambiguous markers need reconciliation + letter = next((ch for ch, val in _ROMAN_CHAR_VALS.items() if val == start_val), None) + if letter is None: + continue + alpha_pos = ord(letter) - ord("a") + 1 + # Determine the target alphabetic type based on the roman marker's case + target_alpha = "A" if ol_type == "I" else "a" + # Check if the preceding item is alphabetic and this letter continues it + if idx > 0: + prev_type, prev_val, _, _ = result[idx - 1] + if prev_type == target_alpha and alpha_pos == prev_val + 1: + result[idx] = (target_alpha, alpha_pos, text, indent) + return result + + +def _set_li_text_with_breaks(soup, li_tag, text: str) -> None: + """Set the text of a ``
  • `` element, inserting ``
    `` tags for newlines.""" + from bs4 import NavigableString + + parts = text.split("\n") + li_tag.append(NavigableString(parts[0])) + for part in parts[1:]: + li_tag.append(soup.new_tag("br")) + li_tag.append(NavigableString(part)) + + +def _post_process_paren_lists(soup) -> None: + """Walk the soup and convert parenthesized numbering in text into nested
      elements. + + After the markdown parser runs, ``(a) text`` patterns appear as plain text + inside ``
    1. `` or ``

      `` elements. This function finds those patterns + and restructures the HTML so that ``_add_list_items`` sees proper nested + ``

        `` elements with ``type`` and ``data-paren`` attributes. + """ + from bs4 import NavigableString + + # Process
      1. elements that contain inline (a)/(1)/(i) patterns + for li in list(soup.find_all("li")): + # Get the raw text content of this li (may span multiple NavigableStrings) + full_text = li.get_text() + if not _PAREN_ITEM_RE.search(full_text): + continue + + # Split text into the leading part (before the first marker) and the list items + lines = full_text.split("\n") + leading_lines: list[str] = [] + list_items: list[tuple[str, int, str, int]] = [] # (type, start_val, text, indent_spaces) + + for line in lines: + stripped = line.strip() + if not stripped: + continue + m = _PAREN_ITEM_RE.match(stripped) + if m: + ol_type, start_val = _detect_paren_type(m.group(1)) + item_text = stripped[m.end() :] + indent_spaces = len(line) - len(line.lstrip()) + list_items.append((ol_type, start_val, item_text, indent_spaces)) + else: + if not list_items: + leading_lines.append(stripped) + else: + # Continuation text for the last list item + last = list_items[-1] + list_items[-1] = ( + last[0], + last[1], + last[2] + "\n" + stripped, + last[3], + ) + + if not list_items: + continue + + list_items = _reconcile_ambiguous_markers(list_items) + + # Rebuild the
      2. contents + li.clear() + if leading_lines: + li.append(NavigableString(" ".join(leading_lines))) + + # Group consecutive items by type and build
          elements + current_type = None + current_ol = None + for ol_type, start_val, item_text, indent_spaces in list_items: + if ol_type != current_type: + current_type = ol_type + indent_level = indent_spaces // 4 + current_ol = soup.new_tag( + "ol", + attrs={ + "type": ol_type, + "data-paren": "true", + "data-indent-level": str(indent_level), + }, + ) + if start_val != 1: + current_ol["start"] = str(start_val) + li.append(current_ol) + new_li = soup.new_tag("li") + _set_li_text_with_breaks(soup, new_li, item_text) + current_ol.append(new_li) + + # Also handle standalone

          elements with (a)/(1)/(i) patterns (not inside a list) + for p in list(soup.find_all("p", recursive=False)): + full_text = p.get_text() + if not _PAREN_ITEM_RE.search(full_text): + continue + + lines = full_text.split("\n") + leading_lines: list[str] = [] + list_items: list[tuple[str, int, str, int]] = [] + for line in lines: + stripped = line.strip() + if not stripped: + continue + m = _PAREN_ITEM_RE.match(stripped) + if m: + ol_type, start_val = _detect_paren_type(m.group(1)) + indent_spaces = len(line) - len(line.lstrip()) + list_items.append((ol_type, start_val, stripped[m.end() :], indent_spaces)) + elif not list_items: + leading_lines.append(stripped) + else: + last = list_items[-1] + list_items[-1] = ( + last[0], + last[1], + last[2] + "\n" + stripped, + last[3], + ) + if not list_items: + continue + + list_items = _reconcile_ambiguous_markers(list_items) + + # Preserve any non-list text that appeared before the first marker + if leading_lines: + new_p = soup.new_tag("p") + new_p.string = "\n".join(leading_lines) + p.insert_before(new_p) + + current_type = None + current_ol = None + for ol_type, start_val, item_text, indent_spaces in list_items: + if ol_type != current_type: + current_type = ol_type + indent_level = indent_spaces // 4 + current_ol = soup.new_tag( + "ol", + attrs={ + "type": ol_type, + "data-paren": "true", + "data-indent-level": str(indent_level), + }, + ) + if start_val != 1: + current_ol["start"] = str(start_val) + p.insert_before(current_ol) + new_li = soup.new_tag("li") + _set_li_text_with_breaks(soup, new_li, item_text) + current_ol.append(new_li) + p.decompose() + + +# --------------------------------------------------------------------------- +# Low-level OOXML numbering helpers +# --------------------------------------------------------------------------- + + +def _numbering_root(doc): + """Return the root element, creating the numbering part if needed.""" + try: + return doc.part.numbering_part._element + except Exception: + # No numbering part yet – force creation by adding and removing a list paragraph + dummy = doc.add_paragraph("", style="List Number") + dummy._element.getparent().remove(dummy._element) + return doc.part.numbering_part._element + + +def _next_abstract_num_id(numbering) -> int: + ids = [int(el.get(qn("w:abstractNumId"))) for el in numbering.findall(qn("w:abstractNum"))] + return max(ids, default=-1) + 1 + + +def _next_num_id(numbering) -> int: + ids = [int(el.get(qn("w:numId"))) for el in numbering.findall(qn("w:num"))] + return max(ids, default=0) + 1 + + +_LIST_HANGING_INDENT = 504 +"""Hanging indent in twips used for every numbered-list level. + +A single value is used for **all** numbering formats so that item text aligns +at the same column across lists that use different label styles (e.g. ``1.``, +``(a)``, ``(iii)``). 504 twips (≈ 0.35 in) is wide enough for the widest +common parenthesized roman label ``(viii)`` while keeping the indent compact. +""" + + +def _get_or_create_abstract_num(doc, num_fmt: str, lvl_text: str, nesting_levels: int = 3, start: int = 1) -> int: + """Create an abstract numbering definition for the given format. + + Always creates a new definition so that each independent list gets its own + ``abstractNumId``. Sharing an abstract num across multiple ```` + elements can cause Word to silently drop numbering on some lists. + + Creates a multilevel abstract numbering so nested lists at different ilvl + values share a single definition with increasing indentation. + + *start* sets the ```` value for the first level (ilvl 0). + This ensures renderers that ignore ``/`` + still produce the correct numbering. + """ + numbering = _numbering_root(doc) + abstract_num_id = _next_abstract_num_id(numbering) + + abstract_num = OxmlElement("w:abstractNum") + abstract_num.set(qn("w:abstractNumId"), str(abstract_num_id)) + + multi_level_type = OxmlElement("w:multiLevelType") + multi_level_type.set(qn("w:val"), "multilevel") + abstract_num.append(multi_level_type) + + hanging = _LIST_HANGING_INDENT + + for ilvl in range(nesting_levels): + lvl = OxmlElement("w:lvl") + lvl.set(qn("w:ilvl"), str(ilvl)) + + start_el = OxmlElement("w:start") + start_el.set(qn("w:val"), str(start if ilvl == 0 else 1)) + lvl.append(start_el) + + fmt_el = OxmlElement("w:numFmt") + fmt_el.set(qn("w:val"), num_fmt) + lvl.append(fmt_el) + + # Use the ilvl+1 placeholder for each level (e.g. %1, %2, %3) + actual_lvl_text = lvl_text.replace("%1", f"%{ilvl + 1}") + text_el = OxmlElement("w:lvlText") + text_el.set(qn("w:val"), actual_lvl_text) + lvl.append(text_el) + + jc = OxmlElement("w:lvlJc") + jc.set(qn("w:val"), "left") + lvl.append(jc) + + # Force a tab character after the label so text aligns at the + # left-indent position regardless of label width. + suff = OxmlElement("w:suff") + suff.set(qn("w:val"), "tab") + lvl.append(suff) + + left = hanging + (hanging * ilvl) + ppr = OxmlElement("w:pPr") + ind = OxmlElement("w:ind") + ind.set(qn("w:left"), str(left)) + ind.set(qn("w:hanging"), str(hanging)) + ppr.append(ind) + + # Explicit tab stop at the text position so the tab after the + # label lands exactly at the left indent. + tabs = OxmlElement("w:tabs") + tab = OxmlElement("w:tab") + tab.set(qn("w:val"), "num") + tab.set(qn("w:pos"), str(left)) + tabs.append(tab) + ppr.append(tabs) + + lvl.append(ppr) + + abstract_num.append(lvl) + + # OOXML requires all elements before any . + # Insert before the first so Word doesn't silently ignore it. + first_num = numbering.find(qn("w:num")) + if first_num is not None: + first_num.addprevious(abstract_num) + else: + numbering.append(abstract_num) + return abstract_num_id + + +def _create_num(doc, abstract_num_id: int, start_override: int | None = None, level: int = 0) -> int: + """Create a new referencing the given abstract numbering. + + If *start_override* is provided, a ```` element is added so + that numbering starts at the given value rather than continuing. + """ + numbering = _numbering_root(doc) + num_id = _next_num_id(numbering) + + num = OxmlElement("w:num") + num.set(qn("w:numId"), str(num_id)) + + abstract_ref = OxmlElement("w:abstractNumId") + abstract_ref.set(qn("w:val"), str(abstract_num_id)) + num.append(abstract_ref) + + if start_override is not None: + lvl_override = OxmlElement("w:lvlOverride") + lvl_override.set(qn("w:ilvl"), str(level)) + + start_el = OxmlElement("w:startOverride") + start_el.set(qn("w:val"), str(start_override)) + lvl_override.append(start_el) + + num.append(lvl_override) + + numbering.append(num) + return num_id + + +def _apply_numbering(paragraph, num_id: int, level: int = 0) -> None: + """Apply numbering properties to a paragraph at the given nesting level.""" + p_pr = paragraph._p.get_or_add_pPr() + + num_pr = p_pr.find(qn("w:numPr")) + if num_pr is None: + num_pr = OxmlElement("w:numPr") + p_pr.append(num_pr) + + ilvl = num_pr.find(qn("w:ilvl")) + if ilvl is None: + ilvl = OxmlElement("w:ilvl") + num_pr.append(ilvl) + ilvl.set(qn("w:val"), str(level)) + + num_id_el = num_pr.find(qn("w:numId")) + if num_id_el is None: + num_id_el = OxmlElement("w:numId") + num_pr.append(num_id_el) + num_id_el.set(qn("w:val"), str(num_id)) + + +def _patch_abstract_num_level(doc, num_id: int, level: int, num_fmt: str, lvl_text: str) -> None: + """Patch the abstractNum referenced by *num_id* so that *level* uses the given format. + + When a child list (e.g. ``(a)``) is nested under a parent list (e.g. ``1.``), + both must share the same ``numId``. This function updates the parent's + abstract numbering definition so that the child's ``ilvl`` has the correct + ``numFmt`` and ``lvlText``. + """ + numbering = _numbering_root(doc) + + # Find the for this numId and get its abstractNumId + abstract_num_id = None + for num_el in numbering.findall(qn("w:num")): + if int(num_el.get(qn("w:numId"))) == num_id: + abstract_num_id = int(num_el.find(qn("w:abstractNumId")).get(qn("w:val"))) + break + if abstract_num_id is None: + return + + # Find the abstractNum + abstract_num = None + for an in numbering.findall(qn("w:abstractNum")): + if int(an.get(qn("w:abstractNumId"))) == abstract_num_id: + abstract_num = an + break + if abstract_num is None: + return + + # Find or create the for this ilvl + target_lvl = None + for lvl in abstract_num.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == level: + target_lvl = lvl + break + + if target_lvl is None: + # Create a new level + target_lvl = OxmlElement("w:lvl") + target_lvl.set(qn("w:ilvl"), str(level)) + start_el = OxmlElement("w:start") + start_el.set(qn("w:val"), "1") + target_lvl.append(start_el) + abstract_num.append(target_lvl) + + # Update numFmt + fmt_el = target_lvl.find(qn("w:numFmt")) + if fmt_el is None: + fmt_el = OxmlElement("w:numFmt") + target_lvl.append(fmt_el) + fmt_el.set(qn("w:val"), num_fmt) + + # Update lvlText + actual_lvl_text = lvl_text.replace("%1", f"%{level + 1}") + txt_el = target_lvl.find(qn("w:lvlText")) + if txt_el is None: + txt_el = OxmlElement("w:lvlText") + target_lvl.append(txt_el) + txt_el.set(qn("w:val"), actual_lvl_text) + + # Ensure lvlJc exists + jc = target_lvl.find(qn("w:lvlJc")) + if jc is None: + jc = OxmlElement("w:lvlJc") + jc.set(qn("w:val"), "left") + target_lvl.append(jc) + + # Ensure suffix is tab-based for consistent text alignment + suff = target_lvl.find(qn("w:suff")) + if suff is None: + suff = OxmlElement("w:suff") + target_lvl.append(suff) + suff.set(qn("w:val"), "tab") + + # Ensure indentation + hanging = _LIST_HANGING_INDENT + left = hanging + (hanging * level) + ppr = target_lvl.find(qn("w:pPr")) + if ppr is None: + ppr = OxmlElement("w:pPr") + target_lvl.append(ppr) + ind = ppr.find(qn("w:ind")) + if ind is None: + ind = OxmlElement("w:ind") + ppr.append(ind) + ind.set(qn("w:left"), str(left)) + ind.set(qn("w:hanging"), str(hanging)) + + # Explicit tab stop at the text position + tabs = ppr.find(qn("w:tabs")) + if tabs is None: + tabs = OxmlElement("w:tabs") + ppr.append(tabs) + tab = OxmlElement("w:tab") + tab.set(qn("w:val"), "num") + tab.set(qn("w:pos"), str(left)) + tabs.append(tab) + + +def _ol_type_to_numfmt(type_attr: str | None, paren: bool = False) -> tuple[str, str]: + """Map HTML

            to (OOXML numFmt, lvlText). + + When *paren* is True the level text uses parenthesized form ``(%1)`` + for all types. Otherwise decimal uses ``%1.`` (standard ``1. 2. 3.``). + """ + type_attr = type_attr or "1" + fmt_map = { + "1": "decimal", + "a": "lowerLetter", + "A": "upperLetter", + "i": "lowerRoman", + "I": "upperRoman", + } + num_fmt = fmt_map.get(type_attr, "decimal") + if paren or type_attr.lower() in ("a", "i"): + lvl_text = "(%1)" + else: + lvl_text = "%1." + return num_fmt, lvl_text + + def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None: """Parse markdown text and add elements to Word document""" # Convert markdown to HTML - html = markdown.markdown(markdown_text, extensions=["tables", "fenced_code"]) + html = markdown.markdown(markdown_text, extensions=["tables", "fenced_code", "sane_lists"]) soup = BeautifulSoup(html, "html.parser") + # Post-process: convert (a), (1), (i) text patterns into nested
              elements + _post_process_paren_lists(soup) + + # Track ordered list numbering state for restart/continue semantics + list_state: dict[str, Any] = {"ordered": {}} + # Process each HTML element in order for element in soup.find_all( [ @@ -575,10 +1102,13 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None: "blockquote", "table", "pre", - ] + ], + recursive=False, ): if element.name.startswith("h"): - # Handle headings + # Handle headings – reset list continuation state so lists + # after a heading start fresh + list_state["ordered"] = {} level = int(element.name[1]) # Extract number from h1, h2, etc. text = element.get_text().strip() if text: @@ -590,15 +1120,8 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None: _add_formatted_text_to_paragraph(paragraph, element) elif element.name in ["ul", "ol"]: - # Handle lists - is_numbered = element.name == "ol" - for li in element.find_all("li", recursive=False): - text = li.get_text().strip() - if text: - if is_numbered: - doc.add_paragraph(text, style="List Number") - else: - doc.add_paragraph(text, style="List Bullet") + # Handle lists (including nested) + _add_list_items(doc, element, level=0, list_state=list_state) elif element.name == "blockquote": # Handle blockquotes @@ -620,13 +1143,116 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None: run.font.name = "Courier New" -def _add_formatted_text_to_paragraph(paragraph, html_element): +def _add_list_items( + doc: Document, + list_element, + level: int, + list_state: dict, + parent_num_id: int | None = None, +) -> None: + """Recursively add list items to Word document with proper nesting. + + For bullet lists, uses Word's built-in 'List Bullet' styles. + For ordered lists, creates low-level OOXML numbering definitions that + support custom formats (decimal, lowerLetter, lowerRoman) and proper + restart/continuation semantics. + + *parent_num_id* is passed when a child ordered list should share the + parent's numbering instance so that Word renders all nesting levels + under one coherent list. + """ + is_numbered = list_element.name == "ol" + + # Respect original markdown indentation via data-indent-level attribute. + # Every 4 leading spaces in the markdown source maps to one indent level. + # When data-indent-level is set it already encodes the absolute nesting + # depth relative to the top-level list, so we must NOT add `level` (which + # the recursive call already incremented) on top of it — that would + # double-count the nesting. + indent_level = int(list_element.get("data-indent-level", 0)) + effective_level = indent_level if indent_level > 0 else level + + num_id = None + if is_numbered: + start = int(list_element.get("start", 1)) + type_attr = list_element.get("type") or "1" + + paren = list_element.get("data-paren") == "true" + num_fmt, lvl_text = _ol_type_to_numfmt(type_attr, paren=paren) + + if parent_num_id is not None and level > 0: + # Child list: reuse parent numId but patch the abstractNum to + # have the correct format at this ilvl. + num_id = parent_num_id + _patch_abstract_num_level(doc, num_id, effective_level, num_fmt, lvl_text) + else: + abstract_num_id = _get_or_create_abstract_num(doc, num_fmt, lvl_text, start=start) + + # Key for tracking continuation: lists at the same nesting level + # with the same format can continue numbering across boundaries + key = (effective_level, num_fmt, lvl_text) + + if start == 1: + num_id = _create_num(doc, abstract_num_id, start_override=1, level=effective_level) + else: + num_id = list_state["ordered"].get(key) + if num_id is None: + num_id = _create_num( + doc, + abstract_num_id, + start_override=start, + level=effective_level, + ) + + list_state["ordered"][key] = num_id + + else: + clamped_level = min(effective_level, 2) + bullet_style = "List Bullet" if clamped_level == 0 else f"List Bullet {clamped_level + 1}" + + for li in list_element.find_all("li", recursive=False): + # Collect direct text of this
            1. , ignoring nested
                /
                  + text_parts = [] + for child in li.children: + if hasattr(child, "name") and child.name in ("ul", "ol"): + continue + text_parts.append(child.get_text() if hasattr(child, "get_text") else str(child)) + text = "".join(text_parts).strip() + + if text: + if is_numbered: + p = doc.add_paragraph() + p.style = doc.styles["List Paragraph"] + _apply_numbering(p, num_id=num_id, level=effective_level) + _add_formatted_text_to_paragraph(p, li, skip_nested_lists=True) + else: + doc.add_paragraph(text, style=bullet_style) + + # Recurse into nested
                    or
                      (direct children of this
                    1. ) + # Nested ordered lists inherit the parent numId so Word keeps them + # under one coherent multilevel numbering instance. + effective_parent = num_id if is_numbered else parent_num_id + for nested_list in li.find_all(["ul", "ol"], recursive=False): + _add_list_items( + doc, + nested_list, + level + 1, + list_state=list_state, + parent_num_id=effective_parent, + ) + + +def _add_formatted_text_to_paragraph(paragraph, html_element, skip_nested_lists: bool = False): """Add formatted text from HTML element to Word paragraph""" # Handle direct text and formatting for content in html_element.contents: + if skip_nested_lists and hasattr(content, "name") and content.name in ("ul", "ol"): + continue if hasattr(content, "name") and content.name: # This is an HTML tag - if content.name == "strong" or content.name == "b": + if content.name == "br": + paragraph.add_run().add_break() + elif content.name == "strong" or content.name == "b": run = paragraph.add_run(content.get_text()) run.bold = True elif content.name == "em" or content.name == "i": @@ -641,7 +1267,7 @@ def _add_formatted_text_to_paragraph(paragraph, html_element): else: # Nested elements - recursively process only if it has contents if hasattr(content, "contents"): - _add_formatted_text_to_paragraph(paragraph, content) + _add_formatted_text_to_paragraph(paragraph, content, skip_nested_lists=skip_nested_lists) else: # Just add the text content text = content.get_text() @@ -842,7 +1468,7 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext): document_id = inputs["document_id"] width = inputs.get("width") # in inches height = inputs.get("height") # in inches - files = inputs.get("files", []) + files = inputs["files"] try: load_document_from_files(document_id, files) diff --git a/doc-maker/tests/test_doc_maker_unit.py b/doc-maker/tests/test_doc_maker_unit.py index f7dd0187..e617c85e 100644 --- a/doc-maker/tests/test_doc_maker_unit.py +++ b/doc-maker/tests/test_doc_maker_unit.py @@ -28,6 +28,7 @@ is_likely_placeholder_context = _mod.is_likely_placeholder_context analyze_replacement_safety = _mod.analyze_replacement_safety _save_document_to_dict = _mod._save_document_to_dict +parse_markdown_to_docx = _mod.parse_markdown_to_docx documents = _mod.documents pytestmark = pytest.mark.unit @@ -583,3 +584,1273 @@ def test_missing_document_returns_error_dict(self): assert result["saved"] is False assert "nonexistent-id" in result["error"] assert result["file"]["content"] == "" + + +class TestParenthesizedListNumbering: + """Verify that (1), (a), (i) style lists produce correct Word numbering.""" + + MARKDOWN = ( + "1. Elephant\n" + " (a) Elephants are the largest land animals on Earth, " + "with African elephants weighing up to 14,000 lbs.\n" + " (b) They have an exceptional memory and can recognize " + "themselves in mirrors, indicating self-awareness.\n" + "2. Axolotl\n" + " (a) Axolotls can regenerate entire limbs, including " + "parts of their heart and brain.\n" + " (b) Unlike most amphibians, axolotls retain their larval " + "features throughout their entire lives, a trait called neoteny." + ) + + @staticmethod + def _get_numpr(paragraph): + """Return (numId, ilvl) from a paragraph's w:numPr, or None.""" + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + @staticmethod + def _get_abstract_num_for(doc, num_id): + """Return the abstractNum element referenced by a given numId.""" + from docx.oxml.ns import qn + + numbering = doc.part.numbering_part._element + for num_el in numbering.findall(qn("w:num")): + if int(num_el.get(qn("w:numId"))) == num_id: + abstract_ref = num_el.find(qn("w:abstractNumId")) + abstract_id = int(abstract_ref.get(qn("w:val"))) + for an in numbering.findall(qn("w:abstractNum")): + if int(an.get(qn("w:abstractNumId"))) == abstract_id: + return an + return None + + def test_produces_six_numbered_paragraphs(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + assert len(numbered) == 6, f"Expected 6 numbered paragraphs, got {len(numbered)}: {numbered}" + + def test_top_level_items_are_at_ilvl_zero(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + top_items = [(text, numpr) for text, numpr in numbered if "Elephant" == text or "Axolotl" == text] + assert len(top_items) == 2, f"Expected 2 top-level items, got {top_items}" + for text, (num_id, ilvl) in top_items: + assert ilvl == 0, f"'{text}' should be at ilvl 0, got {ilvl}" + + def test_sub_items_are_indented(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + sub_items = [(text, numpr) for text, numpr in numbered if text not in ("Elephant", "Axolotl")] + assert len(sub_items) == 4, f"Expected 4 sub-items, got {len(sub_items)}" + for text, (num_id, ilvl) in sub_items: + assert ilvl >= 1, f"Sub-item should be indented (ilvl >= 1), got {ilvl}: {text}" + + def test_top_level_uses_decimal_numbering(self): + from docx import Document + from docx.oxml.ns import qn + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + elephant = next((text, numpr) for text, numpr in numbered if text == "Elephant") + num_id = elephant[1][0] + abstract = self._get_abstract_num_for(doc, num_id) + assert abstract is not None + lvl0 = abstract.find(qn("w:lvl")) + fmt = lvl0.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "decimal", f"Top-level should be decimal, got {fmt}" + + def test_sub_items_use_lower_letter_parenthesized(self): + from docx import Document + from docx.oxml.ns import qn + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + first_sub = next((text, numpr) for text, numpr in numbered if "Elephants are" in text) + num_id, ilvl = first_sub[1] + abstract = self._get_abstract_num_for(doc, num_id) + assert abstract is not None + + # Find the lvl element matching the ilvl used + target_lvl = None + for lvl in abstract.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == ilvl: + target_lvl = lvl + break + assert target_lvl is not None + + fmt = target_lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "lowerLetter", f"Sub-items should be lowerLetter, got {fmt}" + lvl_text = target_lvl.find(qn("w:lvlText")).get(qn("w:val")) + assert "(" in lvl_text, f"Sub-items should have parenthesized format, got '{lvl_text}'" + + def test_elephant_text_on_same_line_as_number(self): + """The parent item text must appear in the same paragraph as the numbering.""" + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + elephant_paras = [(t, n) for t, n in numbered if "Elephant" in t and n[1] == 0] + assert len(elephant_paras) >= 1 + assert elephant_paras[0][0] == "Elephant", ( + f"Top-level text should be exactly 'Elephant', got '{elephant_paras[0][0]}'" + ) + + def test_parent_and_children_share_same_numid(self): + """Word requires nested lists to share the same numId to render correctly.""" + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + num_ids = set(numpr[0] for _, numpr in numbered) + assert len(num_ids) == 1, f"All items should share one numId for coherent multilevel numbering, got {num_ids}" + + +class TestAlphabeticListDoesNotSwitchAtI: + """Regression: an (a)…(z) alphabetic list must not switch numbering + format at ambiguous roman-numeral letters like ``(i)``, ``(v)``, + ``(x)``, ``(l)``, ``(c)``, ``(d)`` or ``(m)``. + + ``_detect_paren_type`` classifies these single letters as lowerRoman + before considering them as alphabetic. The reconciliation pass must + correct that when the letter continues an existing alphabetic run. + """ + + MARKDOWN = "\n".join(f"({chr(ord('a') + n)}) item {n + 1}" for n in range(26)) + + @staticmethod + def _get_numpr(paragraph): + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + def test_all_items_share_same_numid(self): + """All (a)–(z) items must share a single numId, confirming they + form one continuous list and no letter is misclassified as roman.""" + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + assert len(numbered) == 26, f"Expected 26 numbered paragraphs (a)–(z), got {len(numbered)}: {numbered}" + + num_ids = set(numpr[0] for _, numpr in numbered) + assert len(num_ids) == 1, ( + f"All (a)–(z) items should share one numId for a continuous " + f"list, but got {len(num_ids)} distinct numIds: {num_ids}" + ) + + +class TestRomanNumeralListUpTo100: + """Verify that a parenthesized roman-numeral list (i)–(c) covering all + 100 items is recognised and rendered as a single continuous Word list. + + The integration currently only supports roman numerals up to xii (12), + so this test is expected to **fail** until that support is extended. + """ + + _ROMAN_MAP = [ + (1000, "m"), + (900, "cm"), + (500, "d"), + (400, "cd"), + (100, "c"), + (90, "xc"), + (50, "l"), + (40, "xl"), + (10, "x"), + (9, "ix"), + (5, "v"), + (4, "iv"), + (1, "i"), + ] + + @classmethod + def _to_roman(cls, n: int) -> str: + result = [] + for value, numeral in cls._ROMAN_MAP: + while n >= value: + result.append(numeral) + n -= value + return "".join(result) + + @classmethod + def _build_markdown(cls) -> str: + return "\n".join(f"({cls._to_roman(n)}) item {n}" for n in range(1, 101)) + + MARKDOWN = None # built lazily via _build_markdown + + @pytest.fixture(autouse=True) + def _setup_markdown(self): + if TestRomanNumeralListUpTo100.MARKDOWN is None: + TestRomanNumeralListUpTo100.MARKDOWN = self._build_markdown() + + @staticmethod + def _get_numpr(paragraph): + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + def test_produces_100_numbered_paragraphs(self): + """All 100 roman-numeral items must appear as numbered paragraphs.""" + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + assert len(numbered) == 100, f"Expected 100 numbered paragraphs, got {len(numbered)}" + + def test_all_items_share_same_numid(self): + """All (i)–(c) items must share a single numId, confirming they + form one continuous list.""" + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + num_ids = set(numpr[0] for _, numpr in numbered) + assert len(num_ids) == 1, ( + f"All (i)–(c) items should share one numId for a continuous " + f"list, but got {len(num_ids)} distinct numIds: {num_ids}" + ) + + +class TestMultipleParenListsAfterHeadings: + """Verify that multiple (1)-style lists separated by headings all display numbering + and are left-aligned when the markdown has no leading spaces.""" + + MARKDOWN = "# Animals\n(1) Elephant\n(2) Tiger\n# Fish\n(1) squid\n(2) Whale" + + @staticmethod + def _get_numpr(paragraph): + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + @staticmethod + def _get_abstract_num_for(doc, num_id): + from docx.oxml.ns import qn + + numbering = doc.part.numbering_part._element + for num_el in numbering.findall(qn("w:num")): + if int(num_el.get(qn("w:numId"))) == num_id: + abstract_ref = num_el.find(qn("w:abstractNumId")) + abstract_id = int(abstract_ref.get(qn("w:val"))) + for an in numbering.findall(qn("w:abstractNum")): + if int(an.get(qn("w:abstractNumId"))) == abstract_id: + return an + return None + + def test_both_lists_produce_numbered_paragraphs(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + assert len(numbered) == 4, f"Expected 4 numbered paragraphs, got {len(numbered)}: {numbered}" + + def test_each_list_has_its_own_abstract_num(self): + """Each independent list must get its own abstractNum to avoid Word dropping numbers.""" + from docx import Document + from docx.oxml.ns import qn + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + animals_num_id = numbered[0][1][0] + fish_num_id = numbered[2][1][0] + + animals_abstract = self._get_abstract_num_for(doc, animals_num_id) + fish_abstract = self._get_abstract_num_for(doc, fish_num_id) + + assert animals_abstract is not None + assert fish_abstract is not None + + animals_abstract_id = int(animals_abstract.get(qn("w:abstractNumId"))) + fish_abstract_id = int(fish_abstract.get(qn("w:abstractNumId"))) + assert animals_abstract_id != fish_abstract_id, ( + "Each list should reference a different abstractNum to prevent Word from dropping numbers" + ) + + def test_all_items_at_ilvl_zero(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + for text, (num_id, ilvl) in numbered: + assert ilvl == 0, f"'{text}' should be at ilvl 0, got {ilvl}" + + def test_level_zero_is_left_aligned(self): + """Level-0 paren lists with no leading spaces should be left-aligned (left=hanging, hanging=504).""" + from docx import Document + from docx.oxml.ns import qn + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + first_num_id = numbered[0][1][0] + abstract = self._get_abstract_num_for(doc, first_num_id) + assert abstract is not None + + lvl0 = None + for lvl in abstract.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == 0: + lvl0 = lvl + break + assert lvl0 is not None + + pPr = lvl0.find(qn("w:pPr")) + assert pPr is not None + ind = pPr.find(qn("w:ind")) + assert ind is not None + left = ind.get(qn("w:left")) + hanging = ind.get(qn("w:hanging")) + assert left == hanging, ( + f"Level 0 left indent should equal hanging (left-aligned), got left={left}, hanging={hanging}" + ) + + def test_all_use_decimal_parenthesized_format(self): + from docx import Document + from docx.oxml.ns import qn + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + for text, (num_id, ilvl) in numbered: + abstract = self._get_abstract_num_for(doc, num_id) + assert abstract is not None + for lvl in abstract.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == ilvl: + fmt = lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "decimal", f"'{text}' should use decimal, got {fmt}" + lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val")) + assert "(" in lvl_text, f"'{text}' should have paren format, got '{lvl_text}'" + + +class TestMixedNumberedListFormats: + """Verify that five different numbered-list styles all render correctly: + 1. standard ``1. 2. 3.`` decimal + 2. parenthesized decimal ``(1) (2) (3)`` + 3. parenthesized lower letter ``(a) (b) (c)`` + 4. parenthesized upper letter ``(A) (B) (C)`` + 5. parenthesized lower roman ``(i) (ii) (iii)`` + + Each list must: + - be a real numbered (not bullet) list in the OOXML + - use the correct numFmt + - use left-aligned justification + - display the correct item text ("one", "two", "three") + - have a consistent hanging indent so text is aligned across items + whose numbering labels differ in width (e.g. ``(i)`` vs ``(iii)``). + """ + + MARKDOWN = ( + "# numbers\n" + "1. one\n" + "2. two\n" + "3. three\n" + "# numbers in brackets\n" + "(1) one\n" + "(2) two\n" + "(3) three\n" + "# letters in brackets\n" + "(a) one\n" + "(b) two\n" + "(c) three\n" + "# capital letters in brackets\n" + "(A) one\n" + "(B) two\n" + "(C) three\n" + "# roman numerals\n" + "(i) one\n" + "(ii) two\n" + "(iii) three" + ) + + @staticmethod + def _get_numpr(paragraph): + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + @staticmethod + def _get_abstract_num_for(doc, num_id): + from docx.oxml.ns import qn + + numbering = doc.part.numbering_part._element + for num_el in numbering.findall(qn("w:num")): + if int(num_el.get(qn("w:numId"))) == num_id: + abstract_ref = num_el.find(qn("w:abstractNumId")) + abstract_id = int(abstract_ref.get(qn("w:val"))) + for an in numbering.findall(qn("w:abstractNum")): + if int(an.get(qn("w:abstractNumId"))) == abstract_id: + return an + return None + + @staticmethod + def _get_lvl(abstract, ilvl): + from docx.oxml.ns import qn + + for lvl in abstract.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == ilvl: + return lvl + return None + + def _build_doc(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + return doc + + def _numbered_paragraphs(self, doc): + return [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + # ---- 1. All 15 items are numbered paragraphs ---- + + def test_produces_fifteen_numbered_paragraphs(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + assert len(numbered) == 15, ( + f"Expected 15 numbered paragraphs (5 lists × 3 items), got {len(numbered)}: {[t for t, _ in numbered]}" + ) + + # ---- 2. Item text is correct ---- + + def test_item_text_is_correct(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + texts = [t for t, _ in numbered] + for i in range(5): + group = texts[i * 3 : i * 3 + 3] + assert group == ["one", "two", "three"], ( + f"List group {i} text should be ['one', 'two', 'three'], got {group}" + ) + + # ---- 3. Each list uses the correct numFmt ---- + + def test_standard_decimal_uses_decimal_format(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id = numbered[0][1][0] + ilvl = numbered[0][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + fmt = lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "decimal", f"Standard numbered list should be decimal, got {fmt}" + + def test_paren_decimal_uses_decimal_format(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id = numbered[3][1][0] + ilvl = numbered[3][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + fmt = lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "decimal", f"Paren decimal list should be decimal, got {fmt}" + + def test_paren_lower_letter_uses_lower_letter_format(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id = numbered[6][1][0] + ilvl = numbered[6][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + fmt = lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "lowerLetter", f"Lower letter list should be lowerLetter, got {fmt}" + + def test_paren_upper_letter_uses_upper_letter_format(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id = numbered[9][1][0] + ilvl = numbered[9][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + fmt = lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "upperLetter", f"Upper letter list should be upperLetter, got {fmt}" + + def test_paren_roman_uses_lower_roman_format(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id = numbered[12][1][0] + ilvl = numbered[12][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + fmt = lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "lowerRoman", f"Roman numeral list should be lowerRoman, got {fmt}" + + # ---- 4. Parenthesized lvlText for bracket lists ---- + + def test_paren_lists_use_parenthesized_lvl_text(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + # Lists at indices 3, 6, 9, 12 are the paren lists + for start_idx in (3, 6, 9, 12): + num_id = numbered[start_idx][1][0] + ilvl = numbered[start_idx][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val")) + assert "(" in lvl_text and ")" in lvl_text, ( + f"Item '{numbered[start_idx][0]}' (idx {start_idx}) should have parenthesized lvlText, got '{lvl_text}'" + ) + + def test_standard_decimal_uses_dot_lvl_text(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id = numbered[0][1][0] + ilvl = numbered[0][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val")) + assert "." in lvl_text, f"Standard decimal should use dot format, got '{lvl_text}'" + + # ---- 5. Left-aligned justification ---- + + def test_all_lists_are_left_aligned(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + checked = set() + for text, (num_id, ilvl) in numbered: + key = (num_id, ilvl) + if key in checked: + continue + checked.add(key) + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + jc = lvl.find(qn("w:lvlJc")) + assert jc is not None, f"'{text}' level should have lvlJc element" + assert jc.get(qn("w:val")) == "left", f"'{text}' should be left-aligned, got '{jc.get(qn('w:val'))}'" + + # ---- 6. Text alignment consistency (hanging indent) ---- + + def test_items_within_each_list_share_same_hanging_indent(self): + """All items in a single list must use the same hanging indent so that + the text column is aligned even when numbering labels vary in width + (e.g. ``(i)`` vs ``(iii)``).""" + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + + for start_idx in range(0, 15, 3): + group = numbered[start_idx : start_idx + 3] + # All items in a group share the same numId + ilvl, so they share + # the same abstractNum level definition → same indent. + num_id = group[0][1][0] + ilvl = group[0][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + pPr = lvl.find(qn("w:pPr")) + assert pPr is not None, f"Level {ilvl} should have pPr" + ind = pPr.find(qn("w:ind")) + assert ind is not None, f"Level {ilvl} should have indent" + hanging = ind.get(qn("w:hanging")) + left = ind.get(qn("w:left")) + assert hanging is not None, f"Hanging indent should be set for list starting at idx {start_idx}" + assert left is not None, f"Left indent should be set for list starting at idx {start_idx}" + + def test_all_lists_share_same_hanging_indent(self): + """All lists at the same indentation level must use the same left and + hanging indent so that item text aligns at the same column regardless + of whether the list uses ``1.``, ``(a)``, ``(A)``, or ``(iii)`` labels.""" + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + + indent_values = [] + for start_idx in range(0, 15, 3): + num_id = numbered[start_idx][1][0] + ilvl = numbered[start_idx][1][1] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + ind = lvl.find(qn("w:pPr")).find(qn("w:ind")) + hanging = ind.get(qn("w:hanging")) + left = ind.get(qn("w:left")) + indent_values.append((left, hanging)) + + first = indent_values[0] + for i, val in enumerate(indent_values): + assert val == first, ( + f"List group {i} indent {val} differs from group 0 indent {first}; " + f"all lists must share the same indent for consistent text alignment" + ) + + def test_all_levels_use_tab_suffix(self): + """Each numbering level must use ```` so Word + inserts a tab (not a space) after the label. This ensures text aligns + at the left-indent position regardless of label width.""" + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + + checked = set() + for text, (num_id, ilvl) in numbered: + key = (num_id, ilvl) + if key in checked: + continue + checked.add(key) + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + suff = lvl.find(qn("w:suff")) + assert suff is not None, f"'{text}' level should have a element" + assert suff.get(qn("w:val")) == "tab", f"'{text}' suffix should be 'tab', got '{suff.get(qn('w:val'))}'" + + +class TestOrderedListStartOverride: + """Verify that ordered lists respect the start number from the markdown. + + Markdown input: + # numbers + 1. one + 2. two + 3. three + # continuation of numbers + 4. four + 5. five + 6. six + + Expected: two separate lists, each with 3 items. + The first list numbers 1, 2, 3; the second list numbers 4, 5, 6. + """ + + MARKDOWN = "# numbers\n1. one\n2. two\n3. three\n# continuation of numbers\n4. four\n5. five\n6. six\n" + + EXPECTED_ITEMS = [ + ("one", 1), + ("two", 2), + ("three", 3), + ("four", 4), + ("five", 5), + ("six", 6), + ] + + @staticmethod + def _get_numpr(paragraph): + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + @staticmethod + def _get_start_val(doc, num_id, ilvl): + """Return the effective start value for a numbering instance. + + Checks / first, then falls back to + the value in the abstract numbering level definition. + """ + from docx.oxml.ns import qn + + numbering = doc.part.numbering_part._element + for num_el in numbering.findall(qn("w:num")): + if int(num_el.get(qn("w:numId"))) != num_id: + continue + # Check for startOverride + for ovr in num_el.findall(qn("w:lvlOverride")): + if int(ovr.get(qn("w:ilvl"))) == ilvl: + start_ovr = ovr.find(qn("w:startOverride")) + if start_ovr is not None: + return int(start_ovr.get(qn("w:val"))) + # Fall back to abstract num + abs_ref = num_el.find(qn("w:abstractNumId")) + abs_id = int(abs_ref.get(qn("w:val"))) + for an in numbering.findall(qn("w:abstractNum")): + if int(an.get(qn("w:abstractNumId"))) == abs_id: + for lvl_el in an.findall(qn("w:lvl")): + if int(lvl_el.get(qn("w:ilvl"))) == ilvl: + start_el = lvl_el.find(qn("w:start")) + if start_el is not None: + return int(start_el.get(qn("w:val"))) + return None + + def _build_doc(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + return doc + + def _numbered_paragraphs(self, doc): + return [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + def test_produces_six_numbered_paragraphs(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + assert len(numbered) == 6, f"Expected 6 numbered paragraphs, got {len(numbered)}: {[t for t, _ in numbered]}" + + def test_two_distinct_lists(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_ids = [numpr[0] for _, numpr in numbered] + distinct = list(dict.fromkeys(num_ids)) + assert len(distinct) == 2, f"Expected 2 distinct numIds (two lists), got {len(distinct)}: {distinct}" + + def test_each_list_has_three_items(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_ids = [numpr[0] for _, numpr in numbered] + distinct = list(dict.fromkeys(num_ids)) + first_count = sum(1 for n in num_ids if n == distinct[0]) + second_count = sum(1 for n in num_ids if n == distinct[1]) + assert first_count == 3, f"First list should have 3 items, got {first_count}" + assert second_count == 3, f"Second list should have 3 items, got {second_count}" + + def test_item_text_matches(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + for idx, (text, _) in enumerate(numbered): + expected_text = self.EXPECTED_ITEMS[idx][0] + assert text == expected_text, f"Item {idx}: expected text {expected_text!r}, got {text!r}" + + def test_first_list_starts_at_one(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id, ilvl = numbered[0][1] + start = self._get_start_val(doc, num_id, ilvl) + assert start == 1, f"First list should start at 1, got {start}" + + def test_second_list_starts_at_four(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + num_id, ilvl = numbered[3][1] + start = self._get_start_val(doc, num_id, ilvl) + assert start == 4, f"Second list should start at 4, got {start}" + + def test_effective_numbers_are_correct(self): + """Verify that the effective number for each item is correct by + checking the start value of its list and its position within the list.""" + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + + for idx, (text, (num_id, ilvl)) in enumerate(numbered): + list_start = self._get_start_val(doc, num_id, ilvl) + position_in_list = sum(1 for i in range(idx) if numbered[i][1][0] == num_id) + effective_number = list_start + position_in_list + expected_number = self.EXPECTED_ITEMS[idx][1] + assert effective_number == expected_number, ( + f"Item {idx} ({text!r}): expected number {expected_number}, " + f"got {effective_number} (list_start={list_start}, pos={position_in_list})" + ) + + def test_abstract_num_start_matches_override(self): + """The abstract numbering value must match the startOverride + so that renderers which ignore lvlOverride still produce correct + numbering.""" + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + numbering = doc.part.numbering_part._element + + # Second list (items 3-5) should have abstract start = 4 + num_id, ilvl = numbered[3][1] + for num_el in numbering.findall(qn("w:num")): + if int(num_el.get(qn("w:numId"))) != num_id: + continue + abs_ref = num_el.find(qn("w:abstractNumId")) + abs_id = int(abs_ref.get(qn("w:val"))) + for an in numbering.findall(qn("w:abstractNum")): + if int(an.get(qn("w:abstractNumId"))) == abs_id: + for lvl_el in an.findall(qn("w:lvl")): + if int(lvl_el.get(qn("w:ilvl"))) == ilvl: + start_el = lvl_el.find(qn("w:start")) + assert start_el is not None + assert int(start_el.get(qn("w:val"))) == 4, ( + f"Abstract numbering start should be 4, got {start_el.get(qn('w:val'))}" + ) + + +class TestNestedNumberedListIndentation: + """Verify deeply nested numbered lists with mixed parenthesized formats. + + Markdown input: + 1. one + (1) one + (a) one + (A) one + (i) one + (ii) two + (B) two + (i) one + (ii) two + (b) two + (A) one + (B) two + (2) two + (a) one + (b) two + 2. two + (1) one + (2) two + + Expected nesting levels (ilvl): + 0 → 1. / 2. + 1 → (1) / (2) + 2 → (a) / (b) + 3 → (A) / (B) + 4 → (i) / (ii) + + Each ilvl must have left indent = hanging * (ilvl + 1) where + hanging = 504 twips. + """ + + MARKDOWN = ( + "# nested numbered lists\n" + "1. one\n" + " (1) one\n" + "\t (a) one\n" + "\t\t (A) one\n" + "\t\t\t (i) one\n" + "\t\t\t\t(ii) two\n" + "\t\t\t(B) two\n" + "\t\t\t (i) one\n" + "\t\t\t\t(ii) two\n" + "\t\t(b) two\n" + "\t\t (A) one\n" + "\t\t\t(B) two\n" + "\t(2) two\n" + "\t (a) one\n" + "\t\t(b) two\n" + "2. two\n" + " (1) one\n" + "\t(2) two\n" + ) + + # (expected_text, expected_ilvl, expected_num_fmt) + EXPECTED_ITEMS = [ + ("one", 0, "decimal"), # 1. + ("one", 1, "decimal"), # (1) + ("one", 2, "lowerLetter"), # (a) + ("one", 3, "upperLetter"), # (A) + ("one", 4, "lowerRoman"), # (i) + ("two", 4, "lowerRoman"), # (ii) + ("two", 3, "upperLetter"), # (B) + ("one", 4, "lowerRoman"), # (i) + ("two", 4, "lowerRoman"), # (ii) + ("two", 2, "lowerLetter"), # (b) + ("one", 3, "upperLetter"), # (A) + ("two", 3, "upperLetter"), # (B) + ("two", 1, "decimal"), # (2) + ("one", 2, "lowerLetter"), # (a) + ("two", 2, "lowerLetter"), # (b) + ("two", 0, "decimal"), # 2. + ("one", 1, "decimal"), # (1) + ("two", 1, "decimal"), # (2) + ] + + HANGING = 504 # _LIST_HANGING_INDENT + + @staticmethod + def _get_numpr(paragraph): + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + @staticmethod + def _get_abstract_num_for(doc, num_id): + from docx.oxml.ns import qn + + numbering = doc.part.numbering_part._element + for num_el in numbering.findall(qn("w:num")): + if int(num_el.get(qn("w:numId"))) == num_id: + abstract_ref = num_el.find(qn("w:abstractNumId")) + abstract_id = int(abstract_ref.get(qn("w:val"))) + for an in numbering.findall(qn("w:abstractNum")): + if int(an.get(qn("w:abstractNumId"))) == abstract_id: + return an + return None + + @staticmethod + def _get_lvl(abstract, ilvl): + from docx.oxml.ns import qn + + for lvl in abstract.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == ilvl: + return lvl + return None + + def _build_doc(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + return doc + + def _numbered_paragraphs(self, doc): + return [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + def test_produces_eighteen_numbered_paragraphs(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + assert len(numbered) == 18, f"Expected 18 numbered paragraphs, got {len(numbered)}: {[t for t, _ in numbered]}" + + def test_item_text_matches_expected(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + for idx, (text, _) in enumerate(numbered): + expected_text = self.EXPECTED_ITEMS[idx][0] + assert text == expected_text, f"Item {idx}: expected text {expected_text!r}, got {text!r}" + + def test_ilvl_matches_expected(self): + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + for idx, (text, (num_id, ilvl)) in enumerate(numbered): + expected_ilvl = self.EXPECTED_ITEMS[idx][1] + assert ilvl == expected_ilvl, f"Item {idx} ({text!r}): expected ilvl={expected_ilvl}, got ilvl={ilvl}" + + def test_num_fmt_matches_expected(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + for idx, (text, (num_id, ilvl)) in enumerate(numbered): + expected_fmt = self.EXPECTED_ITEMS[idx][2] + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + fmt = lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == expected_fmt, f"Item {idx} ({text!r}): expected numFmt={expected_fmt!r}, got {fmt!r}" + + def test_left_indent_matches_ilvl(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + for idx, (text, (num_id, ilvl)) in enumerate(numbered): + expected_left = self.HANGING * (ilvl + 1) + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + pPr = lvl.find(qn("w:pPr")) + assert pPr is not None, f"Item {idx} ({text!r}): missing pPr" + ind = pPr.find(qn("w:ind")) + assert ind is not None, f"Item {idx} ({text!r}): missing ind" + left = int(ind.get(qn("w:left"))) + assert left == expected_left, ( + f"Item {idx} ({text!r}, ilvl={ilvl}): expected left indent={expected_left}, got {left}" + ) + + def test_hanging_indent_is_consistent(self): + from docx.oxml.ns import qn + + doc = self._build_doc() + numbered = self._numbered_paragraphs(doc) + for idx, (text, (num_id, ilvl)) in enumerate(numbered): + abstract = self._get_abstract_num_for(doc, num_id) + lvl = self._get_lvl(abstract, ilvl) + ind = lvl.find(qn("w:pPr")).find(qn("w:ind")) + hanging = int(ind.get(qn("w:hanging"))) + assert hanging == self.HANGING, f"Item {idx} ({text!r}): expected hanging={self.HANGING}, got {hanging}" + + +class TestMixedParagraphPreservesNonListText: + """Non-list text in a paragraph containing parenthesized markers must be preserved. + + When markdown produces a soft-broken block like: + + Intro text here + (1) first item + (2) second item + + the leading "Intro text here" line must survive as a paragraph in the + generated document rather than being silently dropped when the ``

                      `` + element is decomposed during paren-list post-processing. + """ + + MARKDOWN = "Intro text here\n(1) first item\n(2) second item" + + def test_intro_text_preserved_in_output(self): + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + all_texts = [p.text.strip() for p in doc.paragraphs if p.text.strip()] + + assert any("Intro text here" in t for t in all_texts), ( + f"Expected 'Intro text here' to appear in the document paragraphs, but got: {all_texts}" + ) + + +class TestUppercaseAlphabeticListAtoZ: + """Verify that a parenthesized uppercase alphabetic list (A)–(Z) is + recognised and rendered as a single continuous Word list with the correct + uppercase-letter numbering format and correct item text.""" + + MARKDOWN = "\n".join(f"({chr(ord('A') + n)}) item {n + 1}" for n in range(26)) + + @staticmethod + def _get_numpr(paragraph): + from docx.oxml.ns import qn + + pPr = paragraph._p.find(qn("w:pPr")) + if pPr is None: + return None + numPr = pPr.find(qn("w:numPr")) + if numPr is None: + return None + numId_el = numPr.find(qn("w:numId")) + ilvl_el = numPr.find(qn("w:ilvl")) + if numId_el is None or ilvl_el is None: + return None + return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val"))) + + def test_all_items_share_same_numid(self): + """All (A)–(Z) items must share a single numId, confirming they + form one continuous list and no letter is misclassified.""" + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + assert len(numbered) == 26, f"Expected 26 numbered paragraphs (A)–(Z), got {len(numbered)}: {numbered}" + + num_ids = set(numpr[0] for _, numpr in numbered) + assert len(num_ids) == 1, ( + f"All (A)–(Z) items should share one numId for a continuous " + f"list, but got {len(num_ids)} distinct numIds: {num_ids}" + ) + + def test_numbering_format_is_upper_letter(self): + """The numbering format must be upperLetter.""" + from docx import Document + from docx.oxml.ns import qn + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + num_id = numbered[0][1][0] + ilvl = numbered[0][1][1] + + numbering = doc.part.numbering_part.numbering_definitions._numbering + num_el = None + for n in numbering.findall(qn("w:num")): + if int(n.get(qn("w:numId"))) == num_id: + num_el = n + break + abstract_num_id = int(num_el.find(qn("w:abstractNumId")).get(qn("w:val"))) + abstract = None + for a in numbering.findall(qn("w:abstractNum")): + if int(a.get(qn("w:abstractNumId"))) == abstract_num_id: + abstract = a + break + + target_lvl = None + for lvl in abstract.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == ilvl: + target_lvl = lvl + break + + fmt = target_lvl.find(qn("w:numFmt")).get(qn("w:val")) + assert fmt == "upperLetter", f"Expected upperLetter numbering format, got {fmt}" + + def test_lvl_text_is_parenthesized(self): + """The lvlText must contain both '(' and ')' for bracket formatting.""" + from docx import Document + from docx.oxml.ns import qn + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + num_id = numbered[0][1][0] + ilvl = numbered[0][1][1] + + numbering = doc.part.numbering_part.numbering_definitions._numbering + num_el = None + for n in numbering.findall(qn("w:num")): + if int(n.get(qn("w:numId"))) == num_id: + num_el = n + break + abstract_num_id = int(num_el.find(qn("w:abstractNumId")).get(qn("w:val"))) + abstract = None + for a in numbering.findall(qn("w:abstractNum")): + if int(a.get(qn("w:abstractNumId"))) == abstract_num_id: + abstract = a + break + + target_lvl = None + for lvl in abstract.findall(qn("w:lvl")): + if int(lvl.get(qn("w:ilvl"))) == ilvl: + target_lvl = lvl + break + + lvl_text = target_lvl.find(qn("w:lvlText")).get(qn("w:val")) + assert "(" in lvl_text and ")" in lvl_text, f"Expected parenthesized lvlText like '(%1)', got '{lvl_text}'" + + def test_each_item_has_correct_text(self): + """Each paragraph text must match 'item N' for N=1..26.""" + from docx import Document + + doc = Document() + parse_markdown_to_docx(doc, self.MARKDOWN) + + numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)] + + assert len(numbered) == 26, f"Expected 26 items, got {len(numbered)}" + + for i, (text, _) in enumerate(numbered): + expected = f"item {i + 1}" + assert text == expected, f"Item {i} (letter {chr(ord('A') + i)}): expected text '{expected}', got '{text}'" + + +class TestMidListNonMarkerTextInParagraph: + """Verify that non-marker text between parenthesized list items inside a +

                      element is not silently discarded.""" + + def test_no_blank_line_indented_paragraph_belongs_to_item(self): + """When there is no blank line between a list item and a following + indented paragraph, the paragraph text should be preserved as part of + the preceding list item on a new line (continuation text).""" + from docx import Document + + md = "(1) first item\nsome trailing note\n(2) second item" + doc = Document() + parse_markdown_to_docx(doc, md) + + texts = [p.text.strip() for p in doc.paragraphs if p.text.strip()] + # The continuation text should be part of the first list item paragraph + assert any("some trailing note" in t for t in texts), ( + f"Expected 'some trailing note' to be preserved as continuation text " + f"of the first list item, but it was lost. Paragraphs: {texts}" + ) + # It should be on a new line within the same paragraph, not concatenated + first_item_text = texts[0] + assert "first item\nsome trailing note" == first_item_text, ( + f"Expected continuation text on a new line within the list item, got: {first_item_text!r}" + ) + + def test_blank_line_paragraph_is_standalone(self): + """When there is a blank line separating a non-marker paragraph from + the surrounding list items, the paragraph should appear as its own + standalone paragraph between the two list items, and the second item + should continue numbering from 2.""" + from docx import Document + + md = "(1) first item\n\nsome standalone paragraph\n\n(2) second item" + doc = Document() + parse_markdown_to_docx(doc, md) + + texts = [p.text.strip() for p in doc.paragraphs if p.text.strip()] + assert "some standalone paragraph" in texts, ( + f"Expected 'some standalone paragraph' as its own paragraph, but it was not found. Paragraphs: {texts}" + ) diff --git a/pyproject.toml b/pyproject.toml index bf93580f..ab5f70c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,9 @@ addopts = "--import-mode=importlib -m unit --tb=short" norecursedirs = ["dependencies", "__pycache__", ".git", ".ruff_cache"] +[tool.ruff] +line-length = 120 + # Environment variables for integration tests are loaded from .env # by the root conftest.py (stdlib-only, no plugin dependency).