diff --git a/REWRITE-README.md b/REWRITE-README.md new file mode 100644 index 000000000..5f4e1dbbf --- /dev/null +++ b/REWRITE-README.md @@ -0,0 +1,140 @@ +# Git History Rewrite for Open-Sourcing + +This directory contains scripts to rewrite git history for open-sourcing the h2 repository. + +## What It Does + +The rewrite process performs the following transformations on **all commits** in the repository history: + +1. **Adds BSD-3-Clause-Clear copyright headers** to all source files (.c, .h, .S, .py, .sh, .pl, etc.) +2. **Normalizes email addresses** to @qti.qualcomm.com +3. **Removes internal references** from commit messages (github.qualcomm.com, Q6Auto, JIRA) +4. **Adds Signed-off-by lines** to all commit messages +5. **Sets committer = author** for all commits + +## Files + +- **rewrite-history.sh** - Master script that orchestrates the entire rewrite +- **add-copyright-file-callback.py** - Adds copyright headers to source files +- **email-fixes.py** - Normalizes email addresses +- **commit-callback.py** - Fixes author/committer names and adds Signed-off-by +- **sanitize-commit-messages.py** - Removes internal references from commit messages +- **git-filter-repo** - The git-filter-repo tool + +## Usage + +### For a Single Branch + +```bash +# 1. Clone the repository (or checkout the branch you want to rewrite) +git clone h2-rewrite +cd h2-rewrite + +# 2. Copy all the rewrite scripts to the repository root +cp /path/to/scripts/* . + +# 3. Run the rewrite script +./rewrite-history.sh + +# Or skip confirmation prompt: +./rewrite-history.sh --force +``` + +### For Multiple Branches + +To rewrite multiple branches, you need to run the script on each branch separately: + +```bash +# Method 1: Rewrite each branch in a separate clone +for branch in work develop feature-x; do + echo "Processing branch: $branch" + git clone h2-$branch + cd h2-$branch + git checkout $branch + cp /path/to/scripts/* . + ./rewrite-history.sh --force + cd .. +done + +# Method 2: Rewrite all branches in one go (advanced) +# This rewrites ALL branches at once since git-filter-repo processes all refs +git clone h2-all-branches +cd h2-all-branches +cp /path/to/scripts/* . +./rewrite-history.sh --force +# All branches will be rewritten +``` + +## Important Notes + +### Before Running + +1. **Make a backup!** This operation rewrites git history and cannot be easily undone +2. **Use a fresh clone** - Don't run this on your working repository +3. **Ensure all required files are present** - The script will check for this + +### After Running + +1. The `origin` remote will be removed (this is normal for git-filter-repo) +2. You'll need to add a new remote and force-push: + ```bash + git remote add new-origin + git push new-origin --all --force + git push new-origin --tags --force + ``` + +### Expected Results + +- **Commit count**: May be slightly less than original (4-5 commits typically lost due to phantom references) +- **Copyright headers**: Present in all source files throughout entire history +- **Internal references**: Completely removed from commit messages +- **Email addresses**: All normalized to @qti.qualcomm.com + +## Validation + +The script automatically validates the rewrite and reports: +- ✓ Copyright headers present +- ✓ No internal references found +- ✓ Number of unique committers + +You can also manually check: + +```bash +# Check copyright in a file +git show HEAD:path/to/file.c | head -10 + +# Check for internal references +git log --all --format='%s' | grep -i 'github.qualcomm.com' + +# List all committers +git log --all --format='%cn <%ce>' | sort -u +``` + +## Troubleshooting + +### "Not in a git repository" +Make sure you're in the root of a git repository. + +### "Required file not found" +Ensure all script files are in the current directory. + +### "origin remote removed" +This is expected. Add a new remote to push to the new repository. + +### Commit count decreased +This is normal. A few commits (typically 4-5) are filtered out because they are phantom references to non-existent commits in merge messages. + +## Technical Details + +The rewrite uses `git-filter-repo` with multiple callbacks: + +1. **file-info-callback**: Modifies file contents to add copyright headers +2. **email-callback**: Normalizes email addresses +3. **commit-callback**: Fixes names and adds Signed-off-by +4. **message-callback**: Sanitizes commit messages + +Each callback is applied to every commit in the repository history, ensuring consistent transformations throughout. + +## Copyright + +All scripts include the BSD-3-Clause-Clear copyright header that will be added to source files. diff --git a/add-copyright-blob-callback.py b/add-copyright-blob-callback.py new file mode 100755 index 000000000..946d8b4e8 --- /dev/null +++ b/add-copyright-blob-callback.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +""" +Blob callback for git-filter-repo to add copyright headers to source files. +This modifies file contents in git history to add BSD-3-Clause-Clear headers. +""" + +# Copyright text +COPYRIGHT_TEXT = b"""Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +SPDX-License-Identifier: BSD-3-Clause-Clear""" + +# File extensions and their comment styles (as bytes) +COMMENT_STYLES = { + b'.c': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.h': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.cpp': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.hpp': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.cc': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.S': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.s': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.py': (b'# ', b'\n# ', b'\n\n'), + b'.sh': (b'# ', b'\n# ', b'\n\n'), + b'.pl': (b'# ', b'\n# ', b'\n\n'), + b'.java': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.js': (b'/*\n * ', b'\n * ', b'\n */\n\n'), +} + +def has_copyright(content): + """Check if file already has a copyright header.""" + first_part = content[:500].lower() + return b'copyright' in first_part or b'spdx-license-identifier' in first_part + +def get_file_extension(filename): + """Get file extension as bytes.""" + if b'.' not in filename: + return None + return b'.' + filename.rsplit(b'.', 1)[1] + +def add_copyright_to_blob(blob): + """Add copyright header to blob content.""" + # Get filename from blob + filename = blob.filename if hasattr(blob, 'filename') else b'' + + # Get file extension + ext = get_file_extension(filename) + if ext not in COMMENT_STYLES: + return # Not a file type we handle + + # Get original content + original_data = blob.data + + # Check if already has copyright + if has_copyright(original_data): + return # Already has copyright + + # Get comment style + start, middle, end = COMMENT_STYLES[ext] + + # Handle shebang for scripts + shebang = b"" + content = original_data + if content.startswith(b'#!'): + lines = content.split(b'\n', 1) + shebang = lines[0] + b'\n' + content = lines[1] if len(lines) > 1 else b"" + + # Create copyright header + copyright_lines = COPYRIGHT_TEXT.split(b'\n') + header = start + middle.join(copyright_lines) + end + + # Combine: shebang + copyright + original content + new_data = shebang + header + content + + # Update blob data + blob.data = new_data + +# This is the callback function that git-filter-repo will call +add_copyright_to_blob(blob) diff --git a/add-copyright-file-callback.py b/add-copyright-file-callback.py new file mode 100755 index 000000000..f05bfa0dc --- /dev/null +++ b/add-copyright-file-callback.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +File-info callback for git-filter-repo to add copyright headers to source files. +This modifies file contents in git history to add BSD-3-Clause-Clear headers. +""" + +import re + +# Copyright text (use explicit newline to avoid indentation issues) +COPYRIGHT_TEXT = b"Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.\nSPDX-License-Identifier: BSD-3-Clause-Clear" + +# File extensions and their comment styles (as bytes) +COMMENT_STYLES = { + b'.c': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.h': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.cpp': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.hpp': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.cc': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.S': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.s': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.py': (b'# ', b'\n# ', b'\n\n'), + b'.sh': (b'# ', b'\n# ', b'\n\n'), + b'.pl': (b'# ', b'\n# ', b'\n\n'), + b'.java': (b'/*\n * ', b'\n * ', b'\n */\n\n'), + b'.js': (b'/*\n * ', b'\n * ', b'\n */\n\n'), +} + +def has_new_copyright(content): + """Check if file already has the NEW copyright header.""" + first_part = content[:500] + return b'SPDX-License-Identifier: BSD-3-Clause-Clear' in first_part + +def remove_old_copyright(content): + """Remove old copyright headers from content.""" + # Pattern 1: Old Qualcomm copyright blocks with ====== borders + # These typically start with /*====== and end with ======*/ + import re + + # Remove old copyright blocks (the ones with ====== borders) + # Match from /*====== to the closing ======*/ + pattern1 = rb'/\*={5,}.*?={5,}\*/' + content = re.sub(pattern1, b'', content, flags=re.DOTALL) + + # Pattern 2: Simple copyright lines like "Copyright (c) 2013 by Qualcomm..." + # Remove standalone copyright comments + pattern2 = rb'/\*\s*Copyright \(c\).*?\*/' + content = re.sub(pattern2, b'', content, flags=re.DOTALL) + + # Clean up multiple blank lines that may result + content = re.sub(rb'\n\n\n+', b'\n\n', content) + + # Remove leading blank lines + content = content.lstrip(b'\n') + + return content + +def get_file_extension(filename): + """Get file extension as bytes.""" + if b'.' not in filename: + return None + return b'.' + filename.rsplit(b'.', 1)[1] + +# Skip symbolic links (mode 120000 in octal) +if mode == b'120000': + return (filename, mode, blob_id) + +# Get file extension +ext = get_file_extension(filename) + +# Only process files with known extensions +if ext in COMMENT_STYLES: + # Get original content + original_data = value.get_contents_by_identifier(blob_id) + + # Check if already has the NEW copyright + if not has_new_copyright(original_data): + # Remove any old copyright headers first + content = remove_old_copyright(original_data) + + # Get comment style + start, middle, end = COMMENT_STYLES[ext] + + # Handle shebang for scripts + shebang = b"" + if content.startswith(b'#!'): + lines = content.split(b'\n', 1) + shebang = lines[0] + b'\n' + content = lines[1] if len(lines) > 1 else b"" + + # Create copyright header + copyright_lines = COPYRIGHT_TEXT.split(b'\n') + header = start + middle.join(copyright_lines) + end + + # Combine: shebang + copyright + original content + new_data = shebang + header + content + + # Insert new blob and get new blob_id + blob_id = value.insert_file_with_contents(new_data) + +# Return the (possibly modified) file info +return (filename, mode, blob_id) diff --git a/add-copyright-headers.py b/add-copyright-headers.py new file mode 100755 index 000000000..87ce16109 --- /dev/null +++ b/add-copyright-headers.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +""" +Script to add copyright headers to source code files. +Adds BSD-3-Clause-Clear copyright to all code files. +""" + +import os +import sys +from pathlib import Path + +# Copyright text +COPYRIGHT_TEXT = """Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +SPDX-License-Identifier: BSD-3-Clause-Clear""" + +# File extensions and their comment styles +COMMENT_STYLES = { + # C-style comments + '.c': ('/*\n * ', '\n * ', '\n */\n\n'), + '.h': ('/*\n * ', '\n * ', '\n */\n\n'), + '.cpp': ('/*\n * ', '\n * ', '\n */\n\n'), + '.hpp': ('/*\n * ', '\n * ', '\n */\n\n'), + '.cc': ('/*\n * ', '\n * ', '\n */\n\n'), + + # Assembly + '.S': ('/*\n * ', '\n * ', '\n */\n\n'), + '.s': ('/*\n * ', '\n * ', '\n */\n\n'), + + # Python/Shell + '.py': ('# ', '\n# ', '\n\n'), + '.sh': ('# ', '\n# ', '\n\n'), + '.pl': ('# ', '\n# ', '\n\n'), + + # Other + '.java': ('/*\n * ', '\n * ', '\n */\n\n'), + '.js': ('/*\n * ', '\n * ', '\n */\n\n'), +} + +def has_copyright(content): + """Check if file already has a copyright header.""" + first_lines = content[:500].lower() + return 'copyright' in first_lines or 'spdx-license-identifier' in first_lines + +def add_copyright_header(filepath): + """Add copyright header to a file.""" + ext = filepath.suffix.lower() + + if ext not in COMMENT_STYLES: + return False, "Unsupported file type" + + try: + # Read existing content + with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + # Check if already has copyright + if has_copyright(content): + return False, "Already has copyright" + + # Get comment style + start, middle, end = COMMENT_STYLES[ext] + + # Handle shebang for scripts + shebang = "" + if content.startswith('#!'): + lines = content.split('\n', 1) + shebang = lines[0] + '\n' + content = lines[1] if len(lines) > 1 else "" + + # Create copyright header + copyright_lines = COPYRIGHT_TEXT.split('\n') + header = start + middle.join(copyright_lines) + end + + # Combine: shebang + copyright + original content + new_content = shebang + header + content + + # Write back + with open(filepath, 'w', encoding='utf-8') as f: + f.write(new_content) + + return True, "Added" + + except Exception as e: + return False, f"Error: {str(e)}" + +def main(): + """Main function to process all files.""" + # Get repository root + repo_root = Path.cwd() + + # Extensions to process + extensions = list(COMMENT_STYLES.keys()) + + # Statistics + stats = { + 'processed': 0, + 'added': 0, + 'skipped': 0, + 'errors': 0 + } + + print(f"Scanning for files with extensions: {', '.join(extensions)}") + print(f"Starting from: {repo_root}") + print() + + # Find all matching files + for ext in extensions: + for filepath in repo_root.rglob(f'*{ext}'): + # Skip hidden directories and .git + if any(part.startswith('.') for part in filepath.parts): + continue + + stats['processed'] += 1 + success, message = add_copyright_header(filepath) + + if success: + stats['added'] += 1 + print(f"✓ {filepath.relative_to(repo_root)}") + elif "Already has copyright" in message: + stats['skipped'] += 1 + else: + stats['errors'] += 1 + print(f"✗ {filepath.relative_to(repo_root)}: {message}") + + # Print summary + print() + print("=" * 60) + print("Summary:") + print(f" Files processed: {stats['processed']}") + print(f" Headers added: {stats['added']}") + print(f" Already had: {stats['skipped']}") + print(f" Errors: {stats['errors']}") + print("=" * 60) + +if __name__ == '__main__': + main() diff --git a/commit-callback.py b/commit-callback.py new file mode 100755 index 000000000..ad96c95a5 --- /dev/null +++ b/commit-callback.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause-Clear + +""" +Callback to fix author/committer names, add Signed-off-by lines, +and ensure committer matches author +""" +import re + +# Name mapping dictionary to fix problematic names +name_fixes = { + b'erich': b'Erich Plondke', + b'rkuo': b'Richard Kuo', + b'mzeng': b'Mao Zeng', + b'andreyk': b'Andrey Karpenko', + b'ask': b'Ashish Kumar', + b'Manning, Sid': b'Sid Manning', + b'Lobo, Nestor': b'Nestor Lobo', + b'Studinski, Gidon': b'Gidon Studinski', + b'Govindaraju, Anil': b'Anil Govindaraju', + b'Karpenko, Andrey': b'Andrey Karpenko', + b'Kumar, Ashish': b'Ashish Kumar', + b'Bayerdorffer, Bryan': b'Bryan Bayerdorffer', +} + +# Fix author name if needed +if commit.author_name in name_fixes: + commit.author_name = name_fixes[commit.author_name] + +# Set committer to match author (after fixing author name) +commit.committer_name = commit.author_name +commit.committer_email = commit.author_email +commit.committer_date = commit.author_date + +# Decode the commit message +msg = commit.message.decode('utf-8') + +# Remove any existing Signed-off-by lines +# This handles both middle and end of message +msg = re.sub(r'\nSigned-off-by:.*?\n', '\n', msg) +msg = re.sub(r'\nSigned-off-by:.*?$', '', msg) + +# Clean up any trailing whitespace +msg = msg.rstrip() + +# Construct the new Signed-off-by line using author info (which is now also committer) +author_name = commit.author_name.decode('utf-8') +author_email = commit.author_email.decode('utf-8') +signed_off_by = f"Signed-off-by: {author_name} <{author_email}>" + +# Add the Signed-off-by line at the end +# Ensure proper spacing (blank line before Signed-off-by) +if msg: + msg = f"{msg}\n\n{signed_off_by}\n" +else: + msg = f"{signed_off_by}\n" + +# Update the commit message +commit.message = msg.encode('utf-8') diff --git a/email-fixes.py b/email-fixes.py new file mode 100755 index 000000000..79b049a06 --- /dev/null +++ b/email-fixes.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause-Clear + +""" +Callback to normalize and consolidate email addresses +""" + +# Extract username from email +email_str = email.decode('utf-8') +username = email_str.split('@')[0] + +# Special case: consolidate bryan@ to bryanb@ +if username == 'bryan': + username = 'bryanb' + +# Return normalized email +return f"{username}@qti.qualcomm.com".encode('utf-8') diff --git a/rewrite-history.sh b/rewrite-history.sh new file mode 100755 index 000000000..50444b3f0 --- /dev/null +++ b/rewrite-history.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause-Clear +# +# Master script to rewrite git history for open-sourcing +# This script: +# 1. Adds BSD-3-Clause-Clear copyright headers to all source files +# 2. Normalizes email addresses to @qti.qualcomm.com +# 3. Removes internal references (github.qualcomm.com, Q6Auto, JIRA) +# 4. Adds Signed-off-by lines to all commits +# 5. Sets committer = author for all commits + +set -e # Exit on error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +echo "========================================" +echo "Git History Rewrite for Open-Sourcing" +echo "========================================" +echo "" + +# Check if we're in a git repository +if ! git rev-parse --git-dir > /dev/null 2>&1; then + echo -e "${RED}Error: Not in a git repository${NC}" + exit 1 +fi + +# Check if required files exist +REQUIRED_FILES=( + "git-filter-repo" + "add-copyright-file-callback.py" + "email-fixes.py" + "commit-callback.py" + "sanitize-commit-messages.py" +) + +echo "Checking required files..." +for file in "${REQUIRED_FILES[@]}"; do + if [ ! -f "$file" ]; then + echo -e "${RED}Error: Required file '$file' not found${NC}" + exit 1 + fi + # Make sure they're executable + chmod +x "$file" 2>/dev/null || true +done +echo -e "${GREEN}✓ All required files present${NC}" +echo "" + +# Get current branch +CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) +echo "Current branch: $CURRENT_BRANCH" +echo "" + +# Count commits before +COMMITS_BEFORE=$(git rev-list --all --count) +echo "Commits before rewrite: $COMMITS_BEFORE" +echo "" + +# Warn user +echo -e "${YELLOW}WARNING: This will rewrite git history!${NC}" +echo "This operation will:" +echo " - Modify all commits in the repository" +echo " - Remove the 'origin' remote" +echo " - Cannot be easily undone" +echo "" +echo "Make sure you have a backup of your repository!" +echo "" + +# Ask for confirmation unless --force is provided +if [ "$1" != "--force" ]; then + read -p "Do you want to continue? (yes/no): " -r + echo + if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then + echo "Aborted." + exit 0 + fi +fi + +echo "" +echo "Starting git-filter-repo rewrite..." +echo "This may take several minutes..." +echo "" + +# Run git-filter-repo with all callbacks +./git-filter-repo \ + --file-info-callback add-copyright-file-callback.py \ + --email-callback email-fixes.py \ + --commit-callback commit-callback.py \ + --message-callback sanitize-commit-messages.py \ + --force + +echo "" +echo -e "${GREEN}✓ Rewrite completed successfully!${NC}" +echo "" + +# Count commits after +COMMITS_AFTER=$(git rev-list --all --count) +echo "Commits after rewrite: $COMMITS_AFTER" +COMMITS_LOST=$((COMMITS_BEFORE - COMMITS_AFTER)) +if [ $COMMITS_LOST -gt 0 ]; then + echo -e "${YELLOW}Note: $COMMITS_LOST commits were filtered out (phantom references)${NC}" +fi +echo "" + +# Validation +echo "Running validation checks..." +echo "" + +# Check copyright headers +echo "1. Checking copyright headers..." +SAMPLE_FILE=$(git ls-tree -r HEAD --name-only | grep "\.c$" | head -1) +if [ -n "$SAMPLE_FILE" ]; then + if git show HEAD:"$SAMPLE_FILE" | head -5 | grep -q "Copyright"; then + echo -e " ${GREEN}✓ Copyright headers present${NC}" + else + echo -e " ${RED}✗ Copyright headers missing${NC}" + fi +else + echo " (No .c files found to check)" +fi + +# Check internal references +echo "2. Checking for internal references..." +GITHUB_COUNT=$(git log --all --format='%s' | grep -ic 'github.qualcomm.com' || echo 0) +Q6AUTO_COUNT=$(git log --all --format='%s' | grep -ic 'Q6Auto' || echo 0) +JIRA_COUNT=$(git log --all --format='%s' | grep -ic 'jira' || echo 0) + +if [ "$GITHUB_COUNT" -eq 0 ] && [ "$Q6AUTO_COUNT" -eq 0 ] && [ "$JIRA_COUNT" -eq 0 ]; then + echo -e " ${GREEN}✓ No internal references found${NC}" +else + echo -e " ${YELLOW}⚠ Found internal references:${NC}" + echo " github.qualcomm.com: $GITHUB_COUNT" + echo " Q6Auto: $Q6AUTO_COUNT" + echo " JIRA: $JIRA_COUNT" +fi + +# Check unique committers +echo "3. Checking committers..." +COMMITTER_COUNT=$(git log --all --format='%cn <%ce>' | sort -u | wc -l) +echo " Unique committers: $COMMITTER_COUNT" + +echo "" +echo "========================================" +echo -e "${GREEN}History rewrite completed successfully!${NC}" +echo "========================================" +echo "" +echo "Next steps:" +echo "1. Review the changes: git log --oneline | head -20" +echo "2. Check a few files: git show HEAD:path/to/file.c" +echo "3. When satisfied, push to new repository:" +echo " git remote add new-origin " +echo " git push new-origin --all --force" +echo " git push new-origin --tags --force" +echo "" diff --git a/sanitize-commit-messages.py b/sanitize-commit-messages.py new file mode 100755 index 000000000..8abfd7734 --- /dev/null +++ b/sanitize-commit-messages.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause-Clear + +""" +Callback script for git-filter-repo to sanitize commit messages. +Removes internal references to GitHub Enterprise, JIRA tickets, and internal URLs. + +Usage: + git-filter-repo --message-callback sanitize-commit-messages.py --force +""" + +import re + +# The message is passed as the 'message' variable (bytes) +msg = message.decode('utf-8') + +# Pattern replacements for sanitization +replacements = [ + # Replace internal GitHub Enterprise URLs + (r'github\.qualcomm\.com[:/]Q6Auto/h2', 'github.com/organization/repository'), + + # Replace Q6Auto organization references (when not part of URL) + (r'\bQ6Auto/(\w+)', r'organization/\1'), + + # Replace JIRA ticket references + (r'\bjira\s+\d+', 'issue tracker'), + (r'\bJIRA\s+\d+', 'issue tracker'), + + # Replace internal branch name patterns (optional - uncomment if needed) + # (r'\bsival_com_nt\b', 'internal-branch'), + # (r'\bsival_pa32\b', 'internal-branch'), +] + +# Apply all replacements +for pattern, replacement in replacements: + msg = re.sub(pattern, replacement, msg, flags=re.IGNORECASE) + +# Return the modified message as bytes +return msg.encode('utf-8')