elements
+ current_type = None
+ current_ol = None
+ for ol_type, start_val, item_text, indent_spaces in list_items:
+ if ol_type != current_type:
+ current_type = ol_type
+ indent_level = indent_spaces // 4
+ current_ol = soup.new_tag(
+ "ol",
+ attrs={
+ "type": ol_type,
+ "data-paren": "true",
+ "data-indent-level": str(indent_level),
+ },
+ )
+ if start_val != 1:
+ current_ol["start"] = str(start_val)
+ li.append(current_ol)
+ new_li = soup.new_tag("li")
+ _set_li_text_with_breaks(soup, new_li, item_text)
+ current_ol.append(new_li)
+
+ # Also handle standalone elements with (a)/(1)/(i) patterns (not inside a list)
+ for p in list(soup.find_all("p", recursive=False)):
+ full_text = p.get_text()
+ if not _PAREN_ITEM_RE.search(full_text):
+ continue
+
+ lines = full_text.split("\n")
+ leading_lines: list[str] = []
+ list_items: list[tuple[str, int, str, int]] = []
+ for line in lines:
+ stripped = line.strip()
+ if not stripped:
+ continue
+ m = _PAREN_ITEM_RE.match(stripped)
+ if m:
+ ol_type, start_val = _detect_paren_type(m.group(1))
+ indent_spaces = len(line) - len(line.lstrip())
+ list_items.append((ol_type, start_val, stripped[m.end() :], indent_spaces))
+ elif not list_items:
+ leading_lines.append(stripped)
+ else:
+ last = list_items[-1]
+ list_items[-1] = (
+ last[0],
+ last[1],
+ last[2] + "\n" + stripped,
+ last[3],
+ )
+ if not list_items:
+ continue
+
+ list_items = _reconcile_ambiguous_markers(list_items)
+
+ # Preserve any non-list text that appeared before the first marker
+ if leading_lines:
+ new_p = soup.new_tag("p")
+ new_p.string = "\n".join(leading_lines)
+ p.insert_before(new_p)
+
+ current_type = None
+ current_ol = None
+ for ol_type, start_val, item_text, indent_spaces in list_items:
+ if ol_type != current_type:
+ current_type = ol_type
+ indent_level = indent_spaces // 4
+ current_ol = soup.new_tag(
+ "ol",
+ attrs={
+ "type": ol_type,
+ "data-paren": "true",
+ "data-indent-level": str(indent_level),
+ },
+ )
+ if start_val != 1:
+ current_ol["start"] = str(start_val)
+ p.insert_before(current_ol)
+ new_li = soup.new_tag("li")
+ _set_li_text_with_breaks(soup, new_li, item_text)
+ current_ol.append(new_li)
+ p.decompose()
+
+
+# ---------------------------------------------------------------------------
+# Low-level OOXML numbering helpers
+# ---------------------------------------------------------------------------
+
+
+def _numbering_root(doc):
+ """Return the root element, creating the numbering part if needed."""
+ try:
+ return doc.part.numbering_part._element
+ except Exception:
+ # No numbering part yet – force creation by adding and removing a list paragraph
+ dummy = doc.add_paragraph("", style="List Number")
+ dummy._element.getparent().remove(dummy._element)
+ return doc.part.numbering_part._element
+
+
+def _next_abstract_num_id(numbering) -> int:
+ ids = [int(el.get(qn("w:abstractNumId"))) for el in numbering.findall(qn("w:abstractNum"))]
+ return max(ids, default=-1) + 1
+
+
+def _next_num_id(numbering) -> int:
+ ids = [int(el.get(qn("w:numId"))) for el in numbering.findall(qn("w:num"))]
+ return max(ids, default=0) + 1
+
+
+_LIST_HANGING_INDENT = 504
+"""Hanging indent in twips used for every numbered-list level.
+
+A single value is used for **all** numbering formats so that item text aligns
+at the same column across lists that use different label styles (e.g. ``1.``,
+``(a)``, ``(iii)``). 504 twips (≈ 0.35 in) is wide enough for the widest
+common parenthesized roman label ``(viii)`` while keeping the indent compact.
+"""
+
+
+def _get_or_create_abstract_num(doc, num_fmt: str, lvl_text: str, nesting_levels: int = 3, start: int = 1) -> int:
+ """Create an abstract numbering definition for the given format.
+
+ Always creates a new definition so that each independent list gets its own
+ ``abstractNumId``. Sharing an abstract num across multiple ````
+ elements can cause Word to silently drop numbering on some lists.
+
+ Creates a multilevel abstract numbering so nested lists at different ilvl
+ values share a single definition with increasing indentation.
+
+ *start* sets the ```` value for the first level (ilvl 0).
+ This ensures renderers that ignore ``/``
+ still produce the correct numbering.
+ """
+ numbering = _numbering_root(doc)
+ abstract_num_id = _next_abstract_num_id(numbering)
+
+ abstract_num = OxmlElement("w:abstractNum")
+ abstract_num.set(qn("w:abstractNumId"), str(abstract_num_id))
+
+ multi_level_type = OxmlElement("w:multiLevelType")
+ multi_level_type.set(qn("w:val"), "multilevel")
+ abstract_num.append(multi_level_type)
+
+ hanging = _LIST_HANGING_INDENT
+
+ for ilvl in range(nesting_levels):
+ lvl = OxmlElement("w:lvl")
+ lvl.set(qn("w:ilvl"), str(ilvl))
+
+ start_el = OxmlElement("w:start")
+ start_el.set(qn("w:val"), str(start if ilvl == 0 else 1))
+ lvl.append(start_el)
+
+ fmt_el = OxmlElement("w:numFmt")
+ fmt_el.set(qn("w:val"), num_fmt)
+ lvl.append(fmt_el)
+
+ # Use the ilvl+1 placeholder for each level (e.g. %1, %2, %3)
+ actual_lvl_text = lvl_text.replace("%1", f"%{ilvl + 1}")
+ text_el = OxmlElement("w:lvlText")
+ text_el.set(qn("w:val"), actual_lvl_text)
+ lvl.append(text_el)
+
+ jc = OxmlElement("w:lvlJc")
+ jc.set(qn("w:val"), "left")
+ lvl.append(jc)
+
+ # Force a tab character after the label so text aligns at the
+ # left-indent position regardless of label width.
+ suff = OxmlElement("w:suff")
+ suff.set(qn("w:val"), "tab")
+ lvl.append(suff)
+
+ left = hanging + (hanging * ilvl)
+ ppr = OxmlElement("w:pPr")
+ ind = OxmlElement("w:ind")
+ ind.set(qn("w:left"), str(left))
+ ind.set(qn("w:hanging"), str(hanging))
+ ppr.append(ind)
+
+ # Explicit tab stop at the text position so the tab after the
+ # label lands exactly at the left indent.
+ tabs = OxmlElement("w:tabs")
+ tab = OxmlElement("w:tab")
+ tab.set(qn("w:val"), "num")
+ tab.set(qn("w:pos"), str(left))
+ tabs.append(tab)
+ ppr.append(tabs)
+
+ lvl.append(ppr)
+
+ abstract_num.append(lvl)
+
+ # OOXML requires all elements before any .
+ # Insert before the first so Word doesn't silently ignore it.
+ first_num = numbering.find(qn("w:num"))
+ if first_num is not None:
+ first_num.addprevious(abstract_num)
+ else:
+ numbering.append(abstract_num)
+ return abstract_num_id
+
+
+def _create_num(doc, abstract_num_id: int, start_override: int | None = None, level: int = 0) -> int:
+ """Create a new referencing the given abstract numbering.
+
+ If *start_override* is provided, a ```` element is added so
+ that numbering starts at the given value rather than continuing.
+ """
+ numbering = _numbering_root(doc)
+ num_id = _next_num_id(numbering)
+
+ num = OxmlElement("w:num")
+ num.set(qn("w:numId"), str(num_id))
+
+ abstract_ref = OxmlElement("w:abstractNumId")
+ abstract_ref.set(qn("w:val"), str(abstract_num_id))
+ num.append(abstract_ref)
+
+ if start_override is not None:
+ lvl_override = OxmlElement("w:lvlOverride")
+ lvl_override.set(qn("w:ilvl"), str(level))
+
+ start_el = OxmlElement("w:startOverride")
+ start_el.set(qn("w:val"), str(start_override))
+ lvl_override.append(start_el)
+
+ num.append(lvl_override)
+
+ numbering.append(num)
+ return num_id
+
+
+def _apply_numbering(paragraph, num_id: int, level: int = 0) -> None:
+ """Apply numbering properties to a paragraph at the given nesting level."""
+ p_pr = paragraph._p.get_or_add_pPr()
+
+ num_pr = p_pr.find(qn("w:numPr"))
+ if num_pr is None:
+ num_pr = OxmlElement("w:numPr")
+ p_pr.append(num_pr)
+
+ ilvl = num_pr.find(qn("w:ilvl"))
+ if ilvl is None:
+ ilvl = OxmlElement("w:ilvl")
+ num_pr.append(ilvl)
+ ilvl.set(qn("w:val"), str(level))
+
+ num_id_el = num_pr.find(qn("w:numId"))
+ if num_id_el is None:
+ num_id_el = OxmlElement("w:numId")
+ num_pr.append(num_id_el)
+ num_id_el.set(qn("w:val"), str(num_id))
+
+
+def _patch_abstract_num_level(doc, num_id: int, level: int, num_fmt: str, lvl_text: str) -> None:
+ """Patch the abstractNum referenced by *num_id* so that *level* uses the given format.
+
+ When a child list (e.g. ``(a)``) is nested under a parent list (e.g. ``1.``),
+ both must share the same ``numId``. This function updates the parent's
+ abstract numbering definition so that the child's ``ilvl`` has the correct
+ ``numFmt`` and ``lvlText``.
+ """
+ numbering = _numbering_root(doc)
+
+ # Find the for this numId and get its abstractNumId
+ abstract_num_id = None
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_num_id = int(num_el.find(qn("w:abstractNumId")).get(qn("w:val")))
+ break
+ if abstract_num_id is None:
+ return
+
+ # Find the abstractNum
+ abstract_num = None
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_num_id:
+ abstract_num = an
+ break
+ if abstract_num is None:
+ return
+
+ # Find or create the for this ilvl
+ target_lvl = None
+ for lvl in abstract_num.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == level:
+ target_lvl = lvl
+ break
+
+ if target_lvl is None:
+ # Create a new level
+ target_lvl = OxmlElement("w:lvl")
+ target_lvl.set(qn("w:ilvl"), str(level))
+ start_el = OxmlElement("w:start")
+ start_el.set(qn("w:val"), "1")
+ target_lvl.append(start_el)
+ abstract_num.append(target_lvl)
+
+ # Update numFmt
+ fmt_el = target_lvl.find(qn("w:numFmt"))
+ if fmt_el is None:
+ fmt_el = OxmlElement("w:numFmt")
+ target_lvl.append(fmt_el)
+ fmt_el.set(qn("w:val"), num_fmt)
+
+ # Update lvlText
+ actual_lvl_text = lvl_text.replace("%1", f"%{level + 1}")
+ txt_el = target_lvl.find(qn("w:lvlText"))
+ if txt_el is None:
+ txt_el = OxmlElement("w:lvlText")
+ target_lvl.append(txt_el)
+ txt_el.set(qn("w:val"), actual_lvl_text)
+
+ # Ensure lvlJc exists
+ jc = target_lvl.find(qn("w:lvlJc"))
+ if jc is None:
+ jc = OxmlElement("w:lvlJc")
+ jc.set(qn("w:val"), "left")
+ target_lvl.append(jc)
+
+ # Ensure suffix is tab-based for consistent text alignment
+ suff = target_lvl.find(qn("w:suff"))
+ if suff is None:
+ suff = OxmlElement("w:suff")
+ target_lvl.append(suff)
+ suff.set(qn("w:val"), "tab")
+
+ # Ensure indentation
+ hanging = _LIST_HANGING_INDENT
+ left = hanging + (hanging * level)
+ ppr = target_lvl.find(qn("w:pPr"))
+ if ppr is None:
+ ppr = OxmlElement("w:pPr")
+ target_lvl.append(ppr)
+ ind = ppr.find(qn("w:ind"))
+ if ind is None:
+ ind = OxmlElement("w:ind")
+ ppr.append(ind)
+ ind.set(qn("w:left"), str(left))
+ ind.set(qn("w:hanging"), str(hanging))
+
+ # Explicit tab stop at the text position
+ tabs = ppr.find(qn("w:tabs"))
+ if tabs is None:
+ tabs = OxmlElement("w:tabs")
+ ppr.append(tabs)
+ tab = OxmlElement("w:tab")
+ tab.set(qn("w:val"), "num")
+ tab.set(qn("w:pos"), str(left))
+ tabs.append(tab)
+
+
+def _ol_type_to_numfmt(type_attr: str | None, paren: bool = False) -> tuple[str, str]:
+ """Map HTML to (OOXML numFmt, lvlText).
+
+ When *paren* is True the level text uses parenthesized form ``(%1)``
+ for all types. Otherwise decimal uses ``%1.`` (standard ``1. 2. 3.``).
+ """
+ type_attr = type_attr or "1"
+ fmt_map = {
+ "1": "decimal",
+ "a": "lowerLetter",
+ "A": "upperLetter",
+ "i": "lowerRoman",
+ "I": "upperRoman",
+ }
+ num_fmt = fmt_map.get(type_attr, "decimal")
+ if paren or type_attr.lower() in ("a", "i"):
+ lvl_text = "(%1)"
+ else:
+ lvl_text = "%1."
+ return num_fmt, lvl_text
+
+
def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
"""Parse markdown text and add elements to Word document"""
# Convert markdown to HTML
- html = markdown.markdown(markdown_text, extensions=["tables", "fenced_code"])
+ html = markdown.markdown(markdown_text, extensions=["tables", "fenced_code", "sane_lists"])
soup = BeautifulSoup(html, "html.parser")
+ # Post-process: convert (a), (1), (i) text patterns into nested elements
+ _post_process_paren_lists(soup)
+
+ # Track ordered list numbering state for restart/continue semantics
+ list_state: dict[str, Any] = {"ordered": {}}
+
# Process each HTML element in order
for element in soup.find_all(
[
@@ -575,10 +1102,13 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
"blockquote",
"table",
"pre",
- ]
+ ],
+ recursive=False,
):
if element.name.startswith("h"):
- # Handle headings
+ # Handle headings – reset list continuation state so lists
+ # after a heading start fresh
+ list_state["ordered"] = {}
level = int(element.name[1]) # Extract number from h1, h2, etc.
text = element.get_text().strip()
if text:
@@ -590,15 +1120,8 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
_add_formatted_text_to_paragraph(paragraph, element)
elif element.name in ["ul", "ol"]:
- # Handle lists
- is_numbered = element.name == "ol"
- for li in element.find_all("li", recursive=False):
- text = li.get_text().strip()
- if text:
- if is_numbered:
- doc.add_paragraph(text, style="List Number")
- else:
- doc.add_paragraph(text, style="List Bullet")
+ # Handle lists (including nested)
+ _add_list_items(doc, element, level=0, list_state=list_state)
elif element.name == "blockquote":
# Handle blockquotes
@@ -620,13 +1143,116 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
run.font.name = "Courier New"
-def _add_formatted_text_to_paragraph(paragraph, html_element):
+def _add_list_items(
+ doc: Document,
+ list_element,
+ level: int,
+ list_state: dict,
+ parent_num_id: int | None = None,
+) -> None:
+ """Recursively add list items to Word document with proper nesting.
+
+ For bullet lists, uses Word's built-in 'List Bullet' styles.
+ For ordered lists, creates low-level OOXML numbering definitions that
+ support custom formats (decimal, lowerLetter, lowerRoman) and proper
+ restart/continuation semantics.
+
+ *parent_num_id* is passed when a child ordered list should share the
+ parent's numbering instance so that Word renders all nesting levels
+ under one coherent list.
+ """
+ is_numbered = list_element.name == "ol"
+
+ # Respect original markdown indentation via data-indent-level attribute.
+ # Every 4 leading spaces in the markdown source maps to one indent level.
+ # When data-indent-level is set it already encodes the absolute nesting
+ # depth relative to the top-level list, so we must NOT add `level` (which
+ # the recursive call already incremented) on top of it — that would
+ # double-count the nesting.
+ indent_level = int(list_element.get("data-indent-level", 0))
+ effective_level = indent_level if indent_level > 0 else level
+
+ num_id = None
+ if is_numbered:
+ start = int(list_element.get("start", 1))
+ type_attr = list_element.get("type") or "1"
+
+ paren = list_element.get("data-paren") == "true"
+ num_fmt, lvl_text = _ol_type_to_numfmt(type_attr, paren=paren)
+
+ if parent_num_id is not None and level > 0:
+ # Child list: reuse parent numId but patch the abstractNum to
+ # have the correct format at this ilvl.
+ num_id = parent_num_id
+ _patch_abstract_num_level(doc, num_id, effective_level, num_fmt, lvl_text)
+ else:
+ abstract_num_id = _get_or_create_abstract_num(doc, num_fmt, lvl_text, start=start)
+
+ # Key for tracking continuation: lists at the same nesting level
+ # with the same format can continue numbering across boundaries
+ key = (effective_level, num_fmt, lvl_text)
+
+ if start == 1:
+ num_id = _create_num(doc, abstract_num_id, start_override=1, level=effective_level)
+ else:
+ num_id = list_state["ordered"].get(key)
+ if num_id is None:
+ num_id = _create_num(
+ doc,
+ abstract_num_id,
+ start_override=start,
+ level=effective_level,
+ )
+
+ list_state["ordered"][key] = num_id
+
+ else:
+ clamped_level = min(effective_level, 2)
+ bullet_style = "List Bullet" if clamped_level == 0 else f"List Bullet {clamped_level + 1}"
+
+ for li in list_element.find_all("li", recursive=False):
+ # Collect direct text of this - , ignoring nested
/
+ text_parts = []
+ for child in li.children:
+ if hasattr(child, "name") and child.name in ("ul", "ol"):
+ continue
+ text_parts.append(child.get_text() if hasattr(child, "get_text") else str(child))
+ text = "".join(text_parts).strip()
+
+ if text:
+ if is_numbered:
+ p = doc.add_paragraph()
+ p.style = doc.styles["List Paragraph"]
+ _apply_numbering(p, num_id=num_id, level=effective_level)
+ _add_formatted_text_to_paragraph(p, li, skip_nested_lists=True)
+ else:
+ doc.add_paragraph(text, style=bullet_style)
+
+ # Recurse into nested or (direct children of this - )
+ # Nested ordered lists inherit the parent numId so Word keeps them
+ # under one coherent multilevel numbering instance.
+ effective_parent = num_id if is_numbered else parent_num_id
+ for nested_list in li.find_all(["ul", "ol"], recursive=False):
+ _add_list_items(
+ doc,
+ nested_list,
+ level + 1,
+ list_state=list_state,
+ parent_num_id=effective_parent,
+ )
+
+
+def _add_formatted_text_to_paragraph(paragraph, html_element, skip_nested_lists: bool = False):
"""Add formatted text from HTML element to Word paragraph"""
# Handle direct text and formatting
for content in html_element.contents:
+ if skip_nested_lists and hasattr(content, "name") and content.name in ("ul", "ol"):
+ continue
if hasattr(content, "name") and content.name:
# This is an HTML tag
- if content.name == "strong" or content.name == "b":
+ if content.name == "br":
+ paragraph.add_run().add_break()
+ elif content.name == "strong" or content.name == "b":
run = paragraph.add_run(content.get_text())
run.bold = True
elif content.name == "em" or content.name == "i":
@@ -641,7 +1267,7 @@ def _add_formatted_text_to_paragraph(paragraph, html_element):
else:
# Nested elements - recursively process only if it has contents
if hasattr(content, "contents"):
- _add_formatted_text_to_paragraph(paragraph, content)
+ _add_formatted_text_to_paragraph(paragraph, content, skip_nested_lists=skip_nested_lists)
else:
# Just add the text content
text = content.get_text()
@@ -842,7 +1468,7 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
document_id = inputs["document_id"]
width = inputs.get("width") # in inches
height = inputs.get("height") # in inches
- files = inputs.get("files", [])
+ files = inputs["files"]
try:
load_document_from_files(document_id, files)
diff --git a/doc-maker/tests/test_doc_maker_unit.py b/doc-maker/tests/test_doc_maker_unit.py
index f7dd0187..e617c85e 100644
--- a/doc-maker/tests/test_doc_maker_unit.py
+++ b/doc-maker/tests/test_doc_maker_unit.py
@@ -28,6 +28,7 @@
is_likely_placeholder_context = _mod.is_likely_placeholder_context
analyze_replacement_safety = _mod.analyze_replacement_safety
_save_document_to_dict = _mod._save_document_to_dict
+parse_markdown_to_docx = _mod.parse_markdown_to_docx
documents = _mod.documents
pytestmark = pytest.mark.unit
@@ -583,3 +584,1273 @@ def test_missing_document_returns_error_dict(self):
assert result["saved"] is False
assert "nonexistent-id" in result["error"]
assert result["file"]["content"] == ""
+
+
+class TestParenthesizedListNumbering:
+ """Verify that (1), (a), (i) style lists produce correct Word numbering."""
+
+ MARKDOWN = (
+ "1. Elephant\n"
+ " (a) Elephants are the largest land animals on Earth, "
+ "with African elephants weighing up to 14,000 lbs.\n"
+ " (b) They have an exceptional memory and can recognize "
+ "themselves in mirrors, indicating self-awareness.\n"
+ "2. Axolotl\n"
+ " (a) Axolotls can regenerate entire limbs, including "
+ "parts of their heart and brain.\n"
+ " (b) Unlike most amphibians, axolotls retain their larval "
+ "features throughout their entire lives, a trait called neoteny."
+ )
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ """Return (numId, ilvl) from a paragraph's w:numPr, or None."""
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ """Return the abstractNum element referenced by a given numId."""
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ def test_produces_six_numbered_paragraphs(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ assert len(numbered) == 6, f"Expected 6 numbered paragraphs, got {len(numbered)}: {numbered}"
+
+ def test_top_level_items_are_at_ilvl_zero(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ top_items = [(text, numpr) for text, numpr in numbered if "Elephant" == text or "Axolotl" == text]
+ assert len(top_items) == 2, f"Expected 2 top-level items, got {top_items}"
+ for text, (num_id, ilvl) in top_items:
+ assert ilvl == 0, f"'{text}' should be at ilvl 0, got {ilvl}"
+
+ def test_sub_items_are_indented(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ sub_items = [(text, numpr) for text, numpr in numbered if text not in ("Elephant", "Axolotl")]
+ assert len(sub_items) == 4, f"Expected 4 sub-items, got {len(sub_items)}"
+ for text, (num_id, ilvl) in sub_items:
+ assert ilvl >= 1, f"Sub-item should be indented (ilvl >= 1), got {ilvl}: {text}"
+
+ def test_top_level_uses_decimal_numbering(self):
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ elephant = next((text, numpr) for text, numpr in numbered if text == "Elephant")
+ num_id = elephant[1][0]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ assert abstract is not None
+ lvl0 = abstract.find(qn("w:lvl"))
+ fmt = lvl0.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"Top-level should be decimal, got {fmt}"
+
+ def test_sub_items_use_lower_letter_parenthesized(self):
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ first_sub = next((text, numpr) for text, numpr in numbered if "Elephants are" in text)
+ num_id, ilvl = first_sub[1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ assert abstract is not None
+
+ # Find the lvl element matching the ilvl used
+ target_lvl = None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ target_lvl = lvl
+ break
+ assert target_lvl is not None
+
+ fmt = target_lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "lowerLetter", f"Sub-items should be lowerLetter, got {fmt}"
+ lvl_text = target_lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "(" in lvl_text, f"Sub-items should have parenthesized format, got '{lvl_text}'"
+
+ def test_elephant_text_on_same_line_as_number(self):
+ """The parent item text must appear in the same paragraph as the numbering."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ elephant_paras = [(t, n) for t, n in numbered if "Elephant" in t and n[1] == 0]
+ assert len(elephant_paras) >= 1
+ assert elephant_paras[0][0] == "Elephant", (
+ f"Top-level text should be exactly 'Elephant', got '{elephant_paras[0][0]}'"
+ )
+
+ def test_parent_and_children_share_same_numid(self):
+ """Word requires nested lists to share the same numId to render correctly."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ num_ids = set(numpr[0] for _, numpr in numbered)
+ assert len(num_ids) == 1, f"All items should share one numId for coherent multilevel numbering, got {num_ids}"
+
+
+class TestAlphabeticListDoesNotSwitchAtI:
+ """Regression: an (a)…(z) alphabetic list must not switch numbering
+ format at ambiguous roman-numeral letters like ``(i)``, ``(v)``,
+ ``(x)``, ``(l)``, ``(c)``, ``(d)`` or ``(m)``.
+
+ ``_detect_paren_type`` classifies these single letters as lowerRoman
+ before considering them as alphabetic. The reconciliation pass must
+ correct that when the letter continues an existing alphabetic run.
+ """
+
+ MARKDOWN = "\n".join(f"({chr(ord('a') + n)}) item {n + 1}" for n in range(26))
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ def test_all_items_share_same_numid(self):
+ """All (a)–(z) items must share a single numId, confirming they
+ form one continuous list and no letter is misclassified as roman."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ assert len(numbered) == 26, f"Expected 26 numbered paragraphs (a)–(z), got {len(numbered)}: {numbered}"
+
+ num_ids = set(numpr[0] for _, numpr in numbered)
+ assert len(num_ids) == 1, (
+ f"All (a)–(z) items should share one numId for a continuous "
+ f"list, but got {len(num_ids)} distinct numIds: {num_ids}"
+ )
+
+
+class TestRomanNumeralListUpTo100:
+ """Verify that a parenthesized roman-numeral list (i)–(c) covering all
+ 100 items is recognised and rendered as a single continuous Word list.
+
+ The integration currently only supports roman numerals up to xii (12),
+ so this test is expected to **fail** until that support is extended.
+ """
+
+ _ROMAN_MAP = [
+ (1000, "m"),
+ (900, "cm"),
+ (500, "d"),
+ (400, "cd"),
+ (100, "c"),
+ (90, "xc"),
+ (50, "l"),
+ (40, "xl"),
+ (10, "x"),
+ (9, "ix"),
+ (5, "v"),
+ (4, "iv"),
+ (1, "i"),
+ ]
+
+ @classmethod
+ def _to_roman(cls, n: int) -> str:
+ result = []
+ for value, numeral in cls._ROMAN_MAP:
+ while n >= value:
+ result.append(numeral)
+ n -= value
+ return "".join(result)
+
+ @classmethod
+ def _build_markdown(cls) -> str:
+ return "\n".join(f"({cls._to_roman(n)}) item {n}" for n in range(1, 101))
+
+ MARKDOWN = None # built lazily via _build_markdown
+
+ @pytest.fixture(autouse=True)
+ def _setup_markdown(self):
+ if TestRomanNumeralListUpTo100.MARKDOWN is None:
+ TestRomanNumeralListUpTo100.MARKDOWN = self._build_markdown()
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ def test_produces_100_numbered_paragraphs(self):
+ """All 100 roman-numeral items must appear as numbered paragraphs."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ assert len(numbered) == 100, f"Expected 100 numbered paragraphs, got {len(numbered)}"
+
+ def test_all_items_share_same_numid(self):
+ """All (i)–(c) items must share a single numId, confirming they
+ form one continuous list."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ num_ids = set(numpr[0] for _, numpr in numbered)
+ assert len(num_ids) == 1, (
+ f"All (i)–(c) items should share one numId for a continuous "
+ f"list, but got {len(num_ids)} distinct numIds: {num_ids}"
+ )
+
+
+class TestMultipleParenListsAfterHeadings:
+ """Verify that multiple (1)-style lists separated by headings all display numbering
+ and are left-aligned when the markdown has no leading spaces."""
+
+ MARKDOWN = "# Animals\n(1) Elephant\n(2) Tiger\n# Fish\n(1) squid\n(2) Whale"
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ def test_both_lists_produce_numbered_paragraphs(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ assert len(numbered) == 4, f"Expected 4 numbered paragraphs, got {len(numbered)}: {numbered}"
+
+ def test_each_list_has_its_own_abstract_num(self):
+ """Each independent list must get its own abstractNum to avoid Word dropping numbers."""
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ animals_num_id = numbered[0][1][0]
+ fish_num_id = numbered[2][1][0]
+
+ animals_abstract = self._get_abstract_num_for(doc, animals_num_id)
+ fish_abstract = self._get_abstract_num_for(doc, fish_num_id)
+
+ assert animals_abstract is not None
+ assert fish_abstract is not None
+
+ animals_abstract_id = int(animals_abstract.get(qn("w:abstractNumId")))
+ fish_abstract_id = int(fish_abstract.get(qn("w:abstractNumId")))
+ assert animals_abstract_id != fish_abstract_id, (
+ "Each list should reference a different abstractNum to prevent Word from dropping numbers"
+ )
+
+ def test_all_items_at_ilvl_zero(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ for text, (num_id, ilvl) in numbered:
+ assert ilvl == 0, f"'{text}' should be at ilvl 0, got {ilvl}"
+
+ def test_level_zero_is_left_aligned(self):
+ """Level-0 paren lists with no leading spaces should be left-aligned (left=hanging, hanging=504)."""
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ first_num_id = numbered[0][1][0]
+ abstract = self._get_abstract_num_for(doc, first_num_id)
+ assert abstract is not None
+
+ lvl0 = None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == 0:
+ lvl0 = lvl
+ break
+ assert lvl0 is not None
+
+ pPr = lvl0.find(qn("w:pPr"))
+ assert pPr is not None
+ ind = pPr.find(qn("w:ind"))
+ assert ind is not None
+ left = ind.get(qn("w:left"))
+ hanging = ind.get(qn("w:hanging"))
+ assert left == hanging, (
+ f"Level 0 left indent should equal hanging (left-aligned), got left={left}, hanging={hanging}"
+ )
+
+ def test_all_use_decimal_parenthesized_format(self):
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ for text, (num_id, ilvl) in numbered:
+ abstract = self._get_abstract_num_for(doc, num_id)
+ assert abstract is not None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"'{text}' should use decimal, got {fmt}"
+ lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "(" in lvl_text, f"'{text}' should have paren format, got '{lvl_text}'"
+
+
+class TestMixedNumberedListFormats:
+ """Verify that five different numbered-list styles all render correctly:
+ 1. standard ``1. 2. 3.`` decimal
+ 2. parenthesized decimal ``(1) (2) (3)``
+ 3. parenthesized lower letter ``(a) (b) (c)``
+ 4. parenthesized upper letter ``(A) (B) (C)``
+ 5. parenthesized lower roman ``(i) (ii) (iii)``
+
+ Each list must:
+ - be a real numbered (not bullet) list in the OOXML
+ - use the correct numFmt
+ - use left-aligned justification
+ - display the correct item text ("one", "two", "three")
+ - have a consistent hanging indent so text is aligned across items
+ whose numbering labels differ in width (e.g. ``(i)`` vs ``(iii)``).
+ """
+
+ MARKDOWN = (
+ "# numbers\n"
+ "1. one\n"
+ "2. two\n"
+ "3. three\n"
+ "# numbers in brackets\n"
+ "(1) one\n"
+ "(2) two\n"
+ "(3) three\n"
+ "# letters in brackets\n"
+ "(a) one\n"
+ "(b) two\n"
+ "(c) three\n"
+ "# capital letters in brackets\n"
+ "(A) one\n"
+ "(B) two\n"
+ "(C) three\n"
+ "# roman numerals\n"
+ "(i) one\n"
+ "(ii) two\n"
+ "(iii) three"
+ )
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ @staticmethod
+ def _get_lvl(abstract, ilvl):
+ from docx.oxml.ns import qn
+
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ return lvl
+ return None
+
+ def _build_doc(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+ return doc
+
+ def _numbered_paragraphs(self, doc):
+ return [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ # ---- 1. All 15 items are numbered paragraphs ----
+
+ def test_produces_fifteen_numbered_paragraphs(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ assert len(numbered) == 15, (
+ f"Expected 15 numbered paragraphs (5 lists × 3 items), got {len(numbered)}: {[t for t, _ in numbered]}"
+ )
+
+ # ---- 2. Item text is correct ----
+
+ def test_item_text_is_correct(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ texts = [t for t, _ in numbered]
+ for i in range(5):
+ group = texts[i * 3 : i * 3 + 3]
+ assert group == ["one", "two", "three"], (
+ f"List group {i} text should be ['one', 'two', 'three'], got {group}"
+ )
+
+ # ---- 3. Each list uses the correct numFmt ----
+
+ def test_standard_decimal_uses_decimal_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[0][1][0]
+ ilvl = numbered[0][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"Standard numbered list should be decimal, got {fmt}"
+
+ def test_paren_decimal_uses_decimal_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[3][1][0]
+ ilvl = numbered[3][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"Paren decimal list should be decimal, got {fmt}"
+
+ def test_paren_lower_letter_uses_lower_letter_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[6][1][0]
+ ilvl = numbered[6][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "lowerLetter", f"Lower letter list should be lowerLetter, got {fmt}"
+
+ def test_paren_upper_letter_uses_upper_letter_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[9][1][0]
+ ilvl = numbered[9][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "upperLetter", f"Upper letter list should be upperLetter, got {fmt}"
+
+ def test_paren_roman_uses_lower_roman_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[12][1][0]
+ ilvl = numbered[12][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "lowerRoman", f"Roman numeral list should be lowerRoman, got {fmt}"
+
+ # ---- 4. Parenthesized lvlText for bracket lists ----
+
+ def test_paren_lists_use_parenthesized_lvl_text(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ # Lists at indices 3, 6, 9, 12 are the paren lists
+ for start_idx in (3, 6, 9, 12):
+ num_id = numbered[start_idx][1][0]
+ ilvl = numbered[start_idx][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "(" in lvl_text and ")" in lvl_text, (
+ f"Item '{numbered[start_idx][0]}' (idx {start_idx}) should have parenthesized lvlText, got '{lvl_text}'"
+ )
+
+ def test_standard_decimal_uses_dot_lvl_text(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[0][1][0]
+ ilvl = numbered[0][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "." in lvl_text, f"Standard decimal should use dot format, got '{lvl_text}'"
+
+ # ---- 5. Left-aligned justification ----
+
+ def test_all_lists_are_left_aligned(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ checked = set()
+ for text, (num_id, ilvl) in numbered:
+ key = (num_id, ilvl)
+ if key in checked:
+ continue
+ checked.add(key)
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ jc = lvl.find(qn("w:lvlJc"))
+ assert jc is not None, f"'{text}' level should have lvlJc element"
+ assert jc.get(qn("w:val")) == "left", f"'{text}' should be left-aligned, got '{jc.get(qn('w:val'))}'"
+
+ # ---- 6. Text alignment consistency (hanging indent) ----
+
+ def test_items_within_each_list_share_same_hanging_indent(self):
+ """All items in a single list must use the same hanging indent so that
+ the text column is aligned even when numbering labels vary in width
+ (e.g. ``(i)`` vs ``(iii)``)."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ for start_idx in range(0, 15, 3):
+ group = numbered[start_idx : start_idx + 3]
+ # All items in a group share the same numId + ilvl, so they share
+ # the same abstractNum level definition → same indent.
+ num_id = group[0][1][0]
+ ilvl = group[0][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ pPr = lvl.find(qn("w:pPr"))
+ assert pPr is not None, f"Level {ilvl} should have pPr"
+ ind = pPr.find(qn("w:ind"))
+ assert ind is not None, f"Level {ilvl} should have indent"
+ hanging = ind.get(qn("w:hanging"))
+ left = ind.get(qn("w:left"))
+ assert hanging is not None, f"Hanging indent should be set for list starting at idx {start_idx}"
+ assert left is not None, f"Left indent should be set for list starting at idx {start_idx}"
+
+ def test_all_lists_share_same_hanging_indent(self):
+ """All lists at the same indentation level must use the same left and
+ hanging indent so that item text aligns at the same column regardless
+ of whether the list uses ``1.``, ``(a)``, ``(A)``, or ``(iii)`` labels."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ indent_values = []
+ for start_idx in range(0, 15, 3):
+ num_id = numbered[start_idx][1][0]
+ ilvl = numbered[start_idx][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ ind = lvl.find(qn("w:pPr")).find(qn("w:ind"))
+ hanging = ind.get(qn("w:hanging"))
+ left = ind.get(qn("w:left"))
+ indent_values.append((left, hanging))
+
+ first = indent_values[0]
+ for i, val in enumerate(indent_values):
+ assert val == first, (
+ f"List group {i} indent {val} differs from group 0 indent {first}; "
+ f"all lists must share the same indent for consistent text alignment"
+ )
+
+ def test_all_levels_use_tab_suffix(self):
+ """Each numbering level must use ```` so Word
+ inserts a tab (not a space) after the label. This ensures text aligns
+ at the left-indent position regardless of label width."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ checked = set()
+ for text, (num_id, ilvl) in numbered:
+ key = (num_id, ilvl)
+ if key in checked:
+ continue
+ checked.add(key)
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ suff = lvl.find(qn("w:suff"))
+ assert suff is not None, f"'{text}' level should have a element"
+ assert suff.get(qn("w:val")) == "tab", f"'{text}' suffix should be 'tab', got '{suff.get(qn('w:val'))}'"
+
+
+class TestOrderedListStartOverride:
+ """Verify that ordered lists respect the start number from the markdown.
+
+ Markdown input:
+ # numbers
+ 1. one
+ 2. two
+ 3. three
+ # continuation of numbers
+ 4. four
+ 5. five
+ 6. six
+
+ Expected: two separate lists, each with 3 items.
+ The first list numbers 1, 2, 3; the second list numbers 4, 5, 6.
+ """
+
+ MARKDOWN = "# numbers\n1. one\n2. two\n3. three\n# continuation of numbers\n4. four\n5. five\n6. six\n"
+
+ EXPECTED_ITEMS = [
+ ("one", 1),
+ ("two", 2),
+ ("three", 3),
+ ("four", 4),
+ ("five", 5),
+ ("six", 6),
+ ]
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_start_val(doc, num_id, ilvl):
+ """Return the effective start value for a numbering instance.
+
+ Checks / first, then falls back to
+ the value in the abstract numbering level definition.
+ """
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) != num_id:
+ continue
+ # Check for startOverride
+ for ovr in num_el.findall(qn("w:lvlOverride")):
+ if int(ovr.get(qn("w:ilvl"))) == ilvl:
+ start_ovr = ovr.find(qn("w:startOverride"))
+ if start_ovr is not None:
+ return int(start_ovr.get(qn("w:val")))
+ # Fall back to abstract num
+ abs_ref = num_el.find(qn("w:abstractNumId"))
+ abs_id = int(abs_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abs_id:
+ for lvl_el in an.findall(qn("w:lvl")):
+ if int(lvl_el.get(qn("w:ilvl"))) == ilvl:
+ start_el = lvl_el.find(qn("w:start"))
+ if start_el is not None:
+ return int(start_el.get(qn("w:val")))
+ return None
+
+ def _build_doc(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+ return doc
+
+ def _numbered_paragraphs(self, doc):
+ return [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ def test_produces_six_numbered_paragraphs(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ assert len(numbered) == 6, f"Expected 6 numbered paragraphs, got {len(numbered)}: {[t for t, _ in numbered]}"
+
+ def test_two_distinct_lists(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_ids = [numpr[0] for _, numpr in numbered]
+ distinct = list(dict.fromkeys(num_ids))
+ assert len(distinct) == 2, f"Expected 2 distinct numIds (two lists), got {len(distinct)}: {distinct}"
+
+ def test_each_list_has_three_items(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_ids = [numpr[0] for _, numpr in numbered]
+ distinct = list(dict.fromkeys(num_ids))
+ first_count = sum(1 for n in num_ids if n == distinct[0])
+ second_count = sum(1 for n in num_ids if n == distinct[1])
+ assert first_count == 3, f"First list should have 3 items, got {first_count}"
+ assert second_count == 3, f"Second list should have 3 items, got {second_count}"
+
+ def test_item_text_matches(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, _) in enumerate(numbered):
+ expected_text = self.EXPECTED_ITEMS[idx][0]
+ assert text == expected_text, f"Item {idx}: expected text {expected_text!r}, got {text!r}"
+
+ def test_first_list_starts_at_one(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id, ilvl = numbered[0][1]
+ start = self._get_start_val(doc, num_id, ilvl)
+ assert start == 1, f"First list should start at 1, got {start}"
+
+ def test_second_list_starts_at_four(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id, ilvl = numbered[3][1]
+ start = self._get_start_val(doc, num_id, ilvl)
+ assert start == 4, f"Second list should start at 4, got {start}"
+
+ def test_effective_numbers_are_correct(self):
+ """Verify that the effective number for each item is correct by
+ checking the start value of its list and its position within the list."""
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ list_start = self._get_start_val(doc, num_id, ilvl)
+ position_in_list = sum(1 for i in range(idx) if numbered[i][1][0] == num_id)
+ effective_number = list_start + position_in_list
+ expected_number = self.EXPECTED_ITEMS[idx][1]
+ assert effective_number == expected_number, (
+ f"Item {idx} ({text!r}): expected number {expected_number}, "
+ f"got {effective_number} (list_start={list_start}, pos={position_in_list})"
+ )
+
+ def test_abstract_num_start_matches_override(self):
+ """The abstract numbering value must match the startOverride
+ so that renderers which ignore lvlOverride still produce correct
+ numbering."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ numbering = doc.part.numbering_part._element
+
+ # Second list (items 3-5) should have abstract start = 4
+ num_id, ilvl = numbered[3][1]
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) != num_id:
+ continue
+ abs_ref = num_el.find(qn("w:abstractNumId"))
+ abs_id = int(abs_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abs_id:
+ for lvl_el in an.findall(qn("w:lvl")):
+ if int(lvl_el.get(qn("w:ilvl"))) == ilvl:
+ start_el = lvl_el.find(qn("w:start"))
+ assert start_el is not None
+ assert int(start_el.get(qn("w:val"))) == 4, (
+ f"Abstract numbering start should be 4, got {start_el.get(qn('w:val'))}"
+ )
+
+
+class TestNestedNumberedListIndentation:
+ """Verify deeply nested numbered lists with mixed parenthesized formats.
+
+ Markdown input:
+ 1. one
+ (1) one
+ (a) one
+ (A) one
+ (i) one
+ (ii) two
+ (B) two
+ (i) one
+ (ii) two
+ (b) two
+ (A) one
+ (B) two
+ (2) two
+ (a) one
+ (b) two
+ 2. two
+ (1) one
+ (2) two
+
+ Expected nesting levels (ilvl):
+ 0 → 1. / 2.
+ 1 → (1) / (2)
+ 2 → (a) / (b)
+ 3 → (A) / (B)
+ 4 → (i) / (ii)
+
+ Each ilvl must have left indent = hanging * (ilvl + 1) where
+ hanging = 504 twips.
+ """
+
+ MARKDOWN = (
+ "# nested numbered lists\n"
+ "1. one\n"
+ " (1) one\n"
+ "\t (a) one\n"
+ "\t\t (A) one\n"
+ "\t\t\t (i) one\n"
+ "\t\t\t\t(ii) two\n"
+ "\t\t\t(B) two\n"
+ "\t\t\t (i) one\n"
+ "\t\t\t\t(ii) two\n"
+ "\t\t(b) two\n"
+ "\t\t (A) one\n"
+ "\t\t\t(B) two\n"
+ "\t(2) two\n"
+ "\t (a) one\n"
+ "\t\t(b) two\n"
+ "2. two\n"
+ " (1) one\n"
+ "\t(2) two\n"
+ )
+
+ # (expected_text, expected_ilvl, expected_num_fmt)
+ EXPECTED_ITEMS = [
+ ("one", 0, "decimal"), # 1.
+ ("one", 1, "decimal"), # (1)
+ ("one", 2, "lowerLetter"), # (a)
+ ("one", 3, "upperLetter"), # (A)
+ ("one", 4, "lowerRoman"), # (i)
+ ("two", 4, "lowerRoman"), # (ii)
+ ("two", 3, "upperLetter"), # (B)
+ ("one", 4, "lowerRoman"), # (i)
+ ("two", 4, "lowerRoman"), # (ii)
+ ("two", 2, "lowerLetter"), # (b)
+ ("one", 3, "upperLetter"), # (A)
+ ("two", 3, "upperLetter"), # (B)
+ ("two", 1, "decimal"), # (2)
+ ("one", 2, "lowerLetter"), # (a)
+ ("two", 2, "lowerLetter"), # (b)
+ ("two", 0, "decimal"), # 2.
+ ("one", 1, "decimal"), # (1)
+ ("two", 1, "decimal"), # (2)
+ ]
+
+ HANGING = 504 # _LIST_HANGING_INDENT
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ @staticmethod
+ def _get_lvl(abstract, ilvl):
+ from docx.oxml.ns import qn
+
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ return lvl
+ return None
+
+ def _build_doc(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+ return doc
+
+ def _numbered_paragraphs(self, doc):
+ return [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ def test_produces_eighteen_numbered_paragraphs(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ assert len(numbered) == 18, f"Expected 18 numbered paragraphs, got {len(numbered)}: {[t for t, _ in numbered]}"
+
+ def test_item_text_matches_expected(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, _) in enumerate(numbered):
+ expected_text = self.EXPECTED_ITEMS[idx][0]
+ assert text == expected_text, f"Item {idx}: expected text {expected_text!r}, got {text!r}"
+
+ def test_ilvl_matches_expected(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ expected_ilvl = self.EXPECTED_ITEMS[idx][1]
+ assert ilvl == expected_ilvl, f"Item {idx} ({text!r}): expected ilvl={expected_ilvl}, got ilvl={ilvl}"
+
+ def test_num_fmt_matches_expected(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ expected_fmt = self.EXPECTED_ITEMS[idx][2]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == expected_fmt, f"Item {idx} ({text!r}): expected numFmt={expected_fmt!r}, got {fmt!r}"
+
+ def test_left_indent_matches_ilvl(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ expected_left = self.HANGING * (ilvl + 1)
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ pPr = lvl.find(qn("w:pPr"))
+ assert pPr is not None, f"Item {idx} ({text!r}): missing pPr"
+ ind = pPr.find(qn("w:ind"))
+ assert ind is not None, f"Item {idx} ({text!r}): missing ind"
+ left = int(ind.get(qn("w:left")))
+ assert left == expected_left, (
+ f"Item {idx} ({text!r}, ilvl={ilvl}): expected left indent={expected_left}, got {left}"
+ )
+
+ def test_hanging_indent_is_consistent(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ ind = lvl.find(qn("w:pPr")).find(qn("w:ind"))
+ hanging = int(ind.get(qn("w:hanging")))
+ assert hanging == self.HANGING, f"Item {idx} ({text!r}): expected hanging={self.HANGING}, got {hanging}"
+
+
+class TestMixedParagraphPreservesNonListText:
+ """Non-list text in a paragraph containing parenthesized markers must be preserved.
+
+ When markdown produces a soft-broken block like:
+
+ Intro text here
+ (1) first item
+ (2) second item
+
+ the leading "Intro text here" line must survive as a paragraph in the
+ generated document rather than being silently dropped when the ``
``
+ element is decomposed during paren-list post-processing.
+ """
+
+ MARKDOWN = "Intro text here\n(1) first item\n(2) second item"
+
+ def test_intro_text_preserved_in_output(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ all_texts = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
+
+ assert any("Intro text here" in t for t in all_texts), (
+ f"Expected 'Intro text here' to appear in the document paragraphs, but got: {all_texts}"
+ )
+
+
+class TestUppercaseAlphabeticListAtoZ:
+ """Verify that a parenthesized uppercase alphabetic list (A)–(Z) is
+ recognised and rendered as a single continuous Word list with the correct
+ uppercase-letter numbering format and correct item text."""
+
+ MARKDOWN = "\n".join(f"({chr(ord('A') + n)}) item {n + 1}" for n in range(26))
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ def test_all_items_share_same_numid(self):
+ """All (A)–(Z) items must share a single numId, confirming they
+ form one continuous list and no letter is misclassified."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ assert len(numbered) == 26, f"Expected 26 numbered paragraphs (A)–(Z), got {len(numbered)}: {numbered}"
+
+ num_ids = set(numpr[0] for _, numpr in numbered)
+ assert len(num_ids) == 1, (
+ f"All (A)–(Z) items should share one numId for a continuous "
+ f"list, but got {len(num_ids)} distinct numIds: {num_ids}"
+ )
+
+ def test_numbering_format_is_upper_letter(self):
+ """The numbering format must be upperLetter."""
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ num_id = numbered[0][1][0]
+ ilvl = numbered[0][1][1]
+
+ numbering = doc.part.numbering_part.numbering_definitions._numbering
+ num_el = None
+ for n in numbering.findall(qn("w:num")):
+ if int(n.get(qn("w:numId"))) == num_id:
+ num_el = n
+ break
+ abstract_num_id = int(num_el.find(qn("w:abstractNumId")).get(qn("w:val")))
+ abstract = None
+ for a in numbering.findall(qn("w:abstractNum")):
+ if int(a.get(qn("w:abstractNumId"))) == abstract_num_id:
+ abstract = a
+ break
+
+ target_lvl = None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ target_lvl = lvl
+ break
+
+ fmt = target_lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "upperLetter", f"Expected upperLetter numbering format, got {fmt}"
+
+ def test_lvl_text_is_parenthesized(self):
+ """The lvlText must contain both '(' and ')' for bracket formatting."""
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+ num_id = numbered[0][1][0]
+ ilvl = numbered[0][1][1]
+
+ numbering = doc.part.numbering_part.numbering_definitions._numbering
+ num_el = None
+ for n in numbering.findall(qn("w:num")):
+ if int(n.get(qn("w:numId"))) == num_id:
+ num_el = n
+ break
+ abstract_num_id = int(num_el.find(qn("w:abstractNumId")).get(qn("w:val")))
+ abstract = None
+ for a in numbering.findall(qn("w:abstractNum")):
+ if int(a.get(qn("w:abstractNumId"))) == abstract_num_id:
+ abstract = a
+ break
+
+ target_lvl = None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ target_lvl = lvl
+ break
+
+ lvl_text = target_lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "(" in lvl_text and ")" in lvl_text, f"Expected parenthesized lvlText like '(%1)', got '{lvl_text}'"
+
+ def test_each_item_has_correct_text(self):
+ """Each paragraph text must match 'item N' for N=1..26."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [(p.text.strip(), self._get_numpr(p)) for p in doc.paragraphs if self._get_numpr(p)]
+
+ assert len(numbered) == 26, f"Expected 26 items, got {len(numbered)}"
+
+ for i, (text, _) in enumerate(numbered):
+ expected = f"item {i + 1}"
+ assert text == expected, f"Item {i} (letter {chr(ord('A') + i)}): expected text '{expected}', got '{text}'"
+
+
+class TestMidListNonMarkerTextInParagraph:
+ """Verify that non-marker text between parenthesized list items inside a
+
element is not silently discarded."""
+
+ def test_no_blank_line_indented_paragraph_belongs_to_item(self):
+ """When there is no blank line between a list item and a following
+ indented paragraph, the paragraph text should be preserved as part of
+ the preceding list item on a new line (continuation text)."""
+ from docx import Document
+
+ md = "(1) first item\nsome trailing note\n(2) second item"
+ doc = Document()
+ parse_markdown_to_docx(doc, md)
+
+ texts = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
+ # The continuation text should be part of the first list item paragraph
+ assert any("some trailing note" in t for t in texts), (
+ f"Expected 'some trailing note' to be preserved as continuation text "
+ f"of the first list item, but it was lost. Paragraphs: {texts}"
+ )
+ # It should be on a new line within the same paragraph, not concatenated
+ first_item_text = texts[0]
+ assert "first item\nsome trailing note" == first_item_text, (
+ f"Expected continuation text on a new line within the list item, got: {first_item_text!r}"
+ )
+
+ def test_blank_line_paragraph_is_standalone(self):
+ """When there is a blank line separating a non-marker paragraph from
+ the surrounding list items, the paragraph should appear as its own
+ standalone paragraph between the two list items, and the second item
+ should continue numbering from 2."""
+ from docx import Document
+
+ md = "(1) first item\n\nsome standalone paragraph\n\n(2) second item"
+ doc = Document()
+ parse_markdown_to_docx(doc, md)
+
+ texts = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
+ assert "some standalone paragraph" in texts, (
+ f"Expected 'some standalone paragraph' as its own paragraph, but it was not found. Paragraphs: {texts}"
+ )
diff --git a/pyproject.toml b/pyproject.toml
index bf93580f..ab5f70c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,9 @@ addopts = "--import-mode=importlib -m unit --tb=short"
norecursedirs = ["dependencies", "__pycache__", ".git", ".ruff_cache"]
+[tool.ruff]
+line-length = 120
+
# Environment variables for integration tests are loaded from .env
# by the root conftest.py (stdlib-only, no plugin dependency).