-
Notifications
You must be signed in to change notification settings - Fork 9
Enable ruff security linting and fix findings #109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| lightspeed-rag-content @ git+https://github.com/lightspeed-core/rag-content@main | ||
| defusedxml | ||
| packaging | ||
| lxml | ||
| html2text |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,8 @@ | |
| from packaging.version import Version | ||
| from typing import Generator, Tuple | ||
| import xml.etree.ElementTree as ET | ||
|
|
||
| import defusedxml.ElementTree as DefusedET | ||
| import re | ||
| import subprocess | ||
| import tempfile | ||
|
|
@@ -150,7 +152,7 @@ def red_hat_docs_path( | |
| # This is needed because docinfo.xml is not properly formatted XML file | ||
| # because it does not contain a single root tag. | ||
| docinfo_content = f.read() | ||
| tree = ET.fromstring(f"<root>{docinfo_content}</root>") | ||
| tree = DefusedET.fromstring(f"<root>{docinfo_content}</root>") | ||
|
|
||
| productnumber = get_xml_element_text(tree, "productnumber") | ||
| if Version(productnumber) != Version(docs_version): | ||
|
|
@@ -1364,8 +1366,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): | |
| # Try to remove the lock file (best effort) | ||
| try: | ||
| self.lock_path.unlink() | ||
| except Exception: | ||
| pass | ||
| except Exception as e: | ||
| LOG.debug(f"Could not remove lock file {self.lock_path}: {e}") | ||
| except Exception as e: | ||
| LOG.warning(f"Error releasing lock for {self.file_path}: {e}") | ||
|
|
||
|
|
@@ -1849,7 +1851,7 @@ def preprocess_xml_table_cells(xml_content: str) -> str: | |
| """ | ||
| try: | ||
| # Parse the XML | ||
| root = ET.fromstring(xml_content) | ||
| root = DefusedET.fromstring(xml_content) | ||
|
|
||
| # Define the DocBook namespace | ||
| ns = {"db": "http://docbook.org/ns/docbook"} | ||
|
|
@@ -1910,7 +1912,7 @@ def preprocess_xml_list_titles(xml_content: str) -> str: | |
| """ | ||
| try: | ||
| # Parse the XML | ||
| root = ET.fromstring(xml_content) | ||
| root = DefusedET.fromstring(xml_content) | ||
|
|
||
| # Define the DocBook namespace | ||
| ns = {"db": "http://docbook.org/ns/docbook"} | ||
|
|
@@ -2077,7 +2079,7 @@ def convert(self, input_path: Path, output_path: Path) -> dict[Path, list[str]]: | |
| str(xml_temp_path.absolute()), | ||
| str(input_for_conversion.absolute()), | ||
| ] | ||
| subprocess.run(asciidoctor_cmd, check=True, capture_output=True) | ||
| subprocess.run(asciidoctor_cmd, check=True, capture_output=True) # noqa: S603 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# AST-based verification: list subprocess.run calls missing a `timeout=` kwarg.
python - <<'PY'
import ast
from pathlib import Path
p = Path("scripts/rhoso_adoc_docs_to_text.py")
tree = ast.parse(p.read_text(encoding="utf-8"))
for node in ast.walk(tree):
if isinstance(node, ast.Call):
fn = node.func
is_subprocess_run = (
isinstance(fn, ast.Attribute)
and fn.attr == "run"
and isinstance(fn.value, ast.Name)
and fn.value.id == "subprocess"
)
if is_subprocess_run:
has_timeout = any(k.arg == "timeout" for k in node.keywords if k.arg)
if not has_timeout:
print(f"subprocess.run without timeout at line {node.lineno}")
PYRepository: openstack-lightspeed/rag-content Length of output: 252 🏁 Script executed: #!/bin/bash
set -euo pipefail
python3 - <<'PY'
from pathlib import Path
path = Path("scripts/rhoso_adoc_docs_to_text.py")
lines = path.read_text(encoding="utf-8").splitlines()
targets = [2082, 2112, 2278, 2324]
for t in targets:
start = max(1, t-8)
end = min(len(lines), t+8)
print(f"\n--- {path}:{t} (show {start}-{end}) ---")
for i in range(start, end+1):
print(f"{i:5d}: {lines[i-1]}")
PYRepository: openstack-lightspeed/rag-content Length of output: 4030 Add explicit timeouts to Asciidoctor/Pandoc conversion subprocesses
Proposed fix+SUBPROCESS_TIMEOUT_SECONDS = 600
...
- subprocess.run(asciidoctor_cmd, check=True, capture_output=True) # noqa: S603
+ subprocess.run(
+ asciidoctor_cmd,
+ check=True,
+ capture_output=True,
+ timeout=SUBPROCESS_TIMEOUT_SECONDS,
+ ) # noqa: S603
...
- subprocess.run(pandoc_cmd, check=True, capture_output=True) # noqa: S603
+ subprocess.run(
+ pandoc_cmd,
+ check=True,
+ capture_output=True,
+ timeout=SUBPROCESS_TIMEOUT_SECONDS,
+ ) # noqa: S603
...
result = subprocess.run( # noqa: S603
- asciidoctor_cmd, check=True, capture_output=True, text=True
+ asciidoctor_cmd,
+ check=True,
+ capture_output=True,
+ text=True,
+ timeout=SUBPROCESS_TIMEOUT_SECONDS,
)
...
subprocess.run( # noqa: S603
- pandoc_cmd, check=True, capture_output=True, text=True
+ pandoc_cmd,
+ check=True,
+ capture_output=True,
+ text=True,
+ timeout=SUBPROCESS_TIMEOUT_SECONDS,
)🤖 Prompt for AI Agents |
||
|
|
||
| # Step 1.5: Preprocess XML to fix issues | ||
| with open(xml_temp_path, "r", encoding="utf-8") as f: | ||
|
|
@@ -2107,7 +2109,7 @@ def convert(self, input_path: Path, output_path: Path) -> dict[Path, list[str]]: | |
| "-o", | ||
| str(output_path.absolute()), | ||
| ] | ||
| subprocess.run(pandoc_cmd, check=True, capture_output=True) | ||
| subprocess.run(pandoc_cmd, check=True, capture_output=True) # noqa: S603 | ||
|
|
||
| # Step 3: Convert any HTML tables to markdown pipe tables | ||
| with open(output_path, "r", encoding="utf-8") as f: | ||
|
|
@@ -2273,7 +2275,7 @@ def convert(self, input_path: Path, output_path: Path) -> None: | |
| str(xml_temp_path.absolute()), | ||
| str(input_for_conversion.absolute()), | ||
| ] | ||
| result = subprocess.run( | ||
| result = subprocess.run( # noqa: S603 | ||
| asciidoctor_cmd, check=True, capture_output=True, text=True | ||
| ) | ||
| if result.stderr: | ||
|
|
@@ -2319,7 +2321,7 @@ def convert(self, input_path: Path, output_path: Path) -> None: | |
| "-o", | ||
| str(output_path.absolute()), | ||
| ] | ||
| subprocess.run( | ||
| subprocess.run( # noqa: S603 | ||
| pandoc_cmd, check=True, capture_output=True, text=True | ||
| ) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: openstack-lightspeed/rag-content
Length of output: 3731
Catch
OSError(notException) when removing the lock fileself.lock_path.unlink()is only meant to “best-effort” clean up OS-level failures; catchingExceptionhere can hide unrelated bugs.Proposed fix
📝 Committable suggestion
🧰 Tools
🪛 Ruff (0.15.14)
[warning] 1369-1369: Do not catch blind exception:
Exception(BLE001)
🤖 Prompt for AI Agents