Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions scripts/triage_all_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datetime as dt
from collections import Counter, defaultdict
from typing import Any, Dict, List, Tuple
import os

from gh_code_scanning import create_clients
from gh_code_scanning.exceptions import GitHubApiError, GitHubNotFoundError
Expand Down Expand Up @@ -119,12 +120,22 @@
ap = argparse.ArgumentParser()
ap.add_argument("--owner", default="Notoriousjayy")
ap.add_argument("--state", default="open")
ap.add_argument("--write-md", default="out/code_scanning_triage.md")
ap.add_argument("--write-md", default="code_scanning_triage.md")
ap.add_argument("--update-issue", action="store_true")
ap.add_argument("--issue-repo", default="Notoriousjayy/ghas-code-scanning-toolkit")
ap.add_argument("--issue-title", default="Automated Code Scanning Triage")
args = ap.parse_args()

base_output_dir = "out"
# Normalize the requested output path and ensure it stays within base_output_dir.
requested_path = os.path.normpath(args.write_md)
# Disallow absolute paths or traversal outside the base directory.
if os.path.isabs(requested_path) or os.pardir in requested_path.split(os.sep):
safe_filename = "code_scanning_triage.md"
else:
safe_filename = os.path.basename(requested_path) or "code_scanning_triage.md"
write_path = os.path.normpath(os.path.join(base_output_dir, safe_filename))

rest, cs = create_clients()

repos = list_owned_repos(rest, args.owner)
Expand Down Expand Up @@ -160,16 +171,16 @@
md = render_markdown_report(args.owner, scanned, skipped_no_analysis, per_repo_counts, top_alerts)

# write report
import os
os.makedirs(os.path.dirname(args.write_md), exist_ok=True)
with open(args.write_md, "w", encoding="utf-8") as f:
output_dir = os.path.dirname(write_path) or "."
os.makedirs(output_dir, exist_ok=True)

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

Copilot Autofix

AI 4 months ago

In general, to fix uncontrolled path use, derive a canonical path rooted under a fixed base directory, normalize it with os.path.normpath (or realpath), and ensure the result is still within that base directory. Avoid trusting string patterns alone (like just checking for ".."), and avoid letting the user control directory components when you intend to constrain writes to a specific folder tree.

For this code, the best fix is to treat args.write_md as a relative path under the fixed base directory out, then normalize and validate the combined path. Specifically:

  1. Compute an absolute, normalized base directory (e.g., base_output_dir_abs = os.path.abspath(base_output_dir)).
  2. Join base_output_dir_abs and the user-supplied args.write_md, normalize the result (os.path.normpath), and make it absolute.
  3. Check that the resulting path is still under base_output_dir_abs (using a robust prefix check like os.path.commonpath).
  4. If validation fails (absolute path, path escaping the base, or empty), fall back to a safe default like code_scanning_triage.md under the base directory.
  5. Use this validated write_path to derive output_dir and for writing the file.

This keeps the existing behavior—always writing into an out directory and allowing the user to pick a file name and (optionally) subdirectories under that directory—while making the safety guarantees explicit and aligned with the recommended pattern.

Concretely, in scripts/triage_all_repos.py around lines 129–137, replace the current ad hoc requested_path / safe_filename logic with the robust normalization and containment check. No new imports are needed because os is already imported. The rest of the file (writing, printing, issue updating) remains unchanged.

Suggested changeset 1
scripts/triage_all_repos.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/scripts/triage_all_repos.py b/scripts/triage_all_repos.py
--- a/scripts/triage_all_repos.py
+++ b/scripts/triage_all_repos.py
@@ -127,14 +127,16 @@
     args = ap.parse_args()
 
     base_output_dir = "out"
-    # Normalize the requested output path and ensure it stays within base_output_dir.
-    requested_path = os.path.normpath(args.write_md)
-    # Disallow absolute paths or traversal outside the base directory.
-    if os.path.isabs(requested_path) or os.pardir in requested_path.split(os.sep):
-        safe_filename = "code_scanning_triage.md"
+    # Compute a safe, normalized output path rooted under base_output_dir.
+    base_output_dir_abs = os.path.abspath(base_output_dir)
+    requested_rel_path = args.write_md or "code_scanning_triage.md"
+    # Join and normalize the user-supplied path with the base directory.
+    candidate_path = os.path.normpath(os.path.join(base_output_dir_abs, requested_rel_path))
+    # Ensure the candidate path stays within the base output directory.
+    if not os.path.commonpath([base_output_dir_abs, candidate_path]) == base_output_dir_abs:
+        write_path = os.path.join(base_output_dir_abs, "code_scanning_triage.md")
     else:
-        safe_filename = os.path.basename(requested_path) or "code_scanning_triage.md"
-    write_path = os.path.normpath(os.path.join(base_output_dir, safe_filename))
+        write_path = candidate_path
 
     rest, cs = create_clients()
 
EOF
@@ -127,14 +127,16 @@
args = ap.parse_args()

base_output_dir = "out"
# Normalize the requested output path and ensure it stays within base_output_dir.
requested_path = os.path.normpath(args.write_md)
# Disallow absolute paths or traversal outside the base directory.
if os.path.isabs(requested_path) or os.pardir in requested_path.split(os.sep):
safe_filename = "code_scanning_triage.md"
# Compute a safe, normalized output path rooted under base_output_dir.
base_output_dir_abs = os.path.abspath(base_output_dir)
requested_rel_path = args.write_md or "code_scanning_triage.md"
# Join and normalize the user-supplied path with the base directory.
candidate_path = os.path.normpath(os.path.join(base_output_dir_abs, requested_rel_path))
# Ensure the candidate path stays within the base output directory.
if not os.path.commonpath([base_output_dir_abs, candidate_path]) == base_output_dir_abs:
write_path = os.path.join(base_output_dir_abs, "code_scanning_triage.md")
else:
safe_filename = os.path.basename(requested_path) or "code_scanning_triage.md"
write_path = os.path.normpath(os.path.join(base_output_dir, safe_filename))
write_path = candidate_path

rest, cs = create_clients()

Copilot is powered by AI and may make mistakes. Always verify output.
with open(write_path, "w", encoding="utf-8") as f:

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

Copilot Autofix

AI 4 months ago

In general, the problem is fixed by strictly constraining the user-controlled portion of the path: treat user input as a filename only (no directory components), validate or sanitize it, and then join it to a predefined base directory. Optionally, normalize and verify that the resulting path is still within that base directory before using it.

In this script, the best minimal fix that does not change existing functionality is:

  • Continue to use a fixed base directory (out).
  • Treat args.write_md strictly as a filename, stripping any directory parts with os.path.basename.
  • Reject any attempt to smuggle path separators or parent directories by checking for os.sep, alternative separators (like os.altsep), or os.pardir in the resulting filename. If validation fails, fall back to a safe default filename (code_scanning_triage.md).
  • Join the validated filename to base_output_dir, normalize once, and rely only on that path.

Concretely, in scripts/triage_all_repos.py, lines 129–137 already attempt something similar but still permit some edge cases (for example, non-standard separators on some platforms, or filenames like .. that may normalize oddly). We’ll replace that block with stricter logic:

  • Compute requested_path = os.path.normpath(args.write_md or "").
  • Derive candidate = os.path.basename(requested_path) so we never keep directory components.
  • Validate that candidate is non-empty, is not . or .., does not contain os.sep, does not contain os.altsep (if present), and does not equal os.pardir.
  • If any of those checks fail, use the default filename; otherwise accept candidate.
  • Build write_path = os.path.join(base_output_dir, safe_filename) and normalize it.

No new imports are required; os is already imported at the top of the file.


Suggested changeset 1
scripts/triage_all_repos.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/scripts/triage_all_repos.py b/scripts/triage_all_repos.py
--- a/scripts/triage_all_repos.py
+++ b/scripts/triage_all_repos.py
@@ -127,13 +127,21 @@
     args = ap.parse_args()
 
     base_output_dir = "out"
-    # Normalize the requested output path and ensure it stays within base_output_dir.
-    requested_path = os.path.normpath(args.write_md)
-    # Disallow absolute paths or traversal outside the base directory.
-    if os.path.isabs(requested_path) or os.pardir in requested_path.split(os.sep):
+    # Treat the user-provided path as a filename only and validate it before use.
+    requested_path = os.path.normpath(args.write_md or "")
+    candidate = os.path.basename(requested_path)
+    # Reject empty names, parent-directory indicators, or any embedded path separators.
+    invalid_name = (
+        not candidate
+        or candidate in (".", "..")
+        or candidate == os.pardir
+        or os.sep in candidate
+        or (os.altsep is not None and os.altsep in candidate)
+    )
+    if invalid_name:
         safe_filename = "code_scanning_triage.md"
     else:
-        safe_filename = os.path.basename(requested_path) or "code_scanning_triage.md"
+        safe_filename = candidate
     write_path = os.path.normpath(os.path.join(base_output_dir, safe_filename))
 
     rest, cs = create_clients()
EOF
@@ -127,13 +127,21 @@
args = ap.parse_args()

base_output_dir = "out"
# Normalize the requested output path and ensure it stays within base_output_dir.
requested_path = os.path.normpath(args.write_md)
# Disallow absolute paths or traversal outside the base directory.
if os.path.isabs(requested_path) or os.pardir in requested_path.split(os.sep):
# Treat the user-provided path as a filename only and validate it before use.
requested_path = os.path.normpath(args.write_md or "")
candidate = os.path.basename(requested_path)
# Reject empty names, parent-directory indicators, or any embedded path separators.
invalid_name = (
not candidate
or candidate in (".", "..")
or candidate == os.pardir
or os.sep in candidate
or (os.altsep is not None and os.altsep in candidate)
)
if invalid_name:
safe_filename = "code_scanning_triage.md"
else:
safe_filename = os.path.basename(requested_path) or "code_scanning_triage.md"
safe_filename = candidate
write_path = os.path.normpath(os.path.join(base_output_dir, safe_filename))

rest, cs = create_clients()
Copilot is powered by AI and may make mistakes. Always verify output.
f.write(md)

if args.update_issue:
issue_owner, issue_repo = args.issue_repo.split("/", 1)
upsert_issue(rest, issue_owner, issue_repo, args.issue_title, md, labels=["security", "code-scanning", "triage"])

print(f"Wrote: {args.write_md}")
print(f"Wrote: {write_path}")
if args.update_issue:
print(f"Updated issue: {args.issue_repo} -> {args.issue_title}")
return 0
Expand Down