-
Notifications
You must be signed in to change notification settings - Fork 5
Phase 1.3.3: Mask credentials in all log output #30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4280ba4
12b6d51
9c3ffeb
37c22d9
c2ff4fe
2965eb1
d0c496b
0834bb0
836fbbe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,15 @@ | ||
| """Utils module.""" | ||
|
|
||
| from operator_use.utils.helper import ensure_directory | ||
| from operator_use.utils.log_filter import ( | ||
| CredentialMaskingFilter, | ||
| install_credential_masking, | ||
| mask_credentials, | ||
| ) | ||
|
|
||
| __all__ = ["ensure_directory"] | ||
| __all__ = [ | ||
| "CredentialMaskingFilter", | ||
| "ensure_directory", | ||
| "install_credential_masking", | ||
| "mask_credentials", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| """Credential masking for log output -- prevents secrets leaking into logs.""" | ||
|
|
||
| import logging | ||
| import re | ||
|
|
||
|
|
||
| # Patterns that match common credential formats in log strings. | ||
| # Order matters: more specific patterns should come before general ones. | ||
| _MASK_PATTERNS: list[tuple[re.Pattern[str], str]] = [ | ||
| # URL DSN credentials: scheme://user:password@host or scheme://:password@host | ||
| ( | ||
| re.compile(r"(://[^:@/\s]*:)[^@\s]+(@)"), | ||
| r"\1***REDACTED***\2", | ||
| ), | ||
| # JWT-like strings (three base64url segments separated by dots) | ||
| ( | ||
| re.compile(r"eyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+"), | ||
| "***JWT_REDACTED***", | ||
| ), | ||
| # Bearer token header values | ||
| ( | ||
| re.compile(r"(Bearer\s+)[A-Za-z0-9\-._~+/]+=*", re.IGNORECASE), | ||
| r"\1***REDACTED***", | ||
| ), | ||
| # Provider-specific credential patterns | ||
| (re.compile(r"gsk_[A-Za-z0-9]{8,}", re.IGNORECASE), "gsk_***REDACTED***"), | ||
| (re.compile(r"AIza[A-Za-z0-9\-_]{8,}"), "AIza***REDACTED***"), | ||
| (re.compile(r"nvapi-[A-Za-z0-9\-_]{8,}", re.IGNORECASE), "nvapi-***REDACTED***"), | ||
| # API keys / tokens with common prefixes (sk-, pk-, api-, token-, key-) | ||
| # Allows multi-segment keys like sk-proj-abc12345678 | ||
| # \b guards the word start; (?=...\d) requires at least one digit in the suffix | ||
| # to avoid matching infrastructure words like "api-gateway-endpoint" | ||
| ( | ||
| re.compile( | ||
| r"\b(sk|pk|api|token|key)[-_](?=[A-Za-z0-9\-_]*\d)[A-Za-z0-9\-_]{8,}", | ||
| re.IGNORECASE, | ||
| ), | ||
| r"\1-***REDACTED***", | ||
| ), | ||
|
Comment on lines
+29
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2. gsk_/aiza/nvapi- unmasked The masking regexes do not include required provider credential formats gsk_ (Groq), AIza (Google), or nvapi- (NVIDIA), so these can appear unmasked in logs. This violates the requirement to mask all specified provider patterns at all log levels. Agent Prompt
|
||
| # Authorization / x-api-key / x-auth-token headers | ||
| ( | ||
| re.compile( | ||
| r"(authorization|x-api-key|x-auth-token)\s*[:=]\s*\S+", re.IGNORECASE | ||
| ), | ||
| r"\1: ***REDACTED***", | ||
| ), | ||
| # password= / secret= / token= / api_key= patterns in query strings or log lines | ||
| ( | ||
| re.compile( | ||
| r"(password|secret|passwd|pwd|token|api_key|apikey)\s*[=:]\s*\S+", | ||
| re.IGNORECASE, | ||
| ), | ||
| r"\1=***REDACTED***", | ||
| ), | ||
| # Generic high-entropy secrets: key=value or key: value where value is 32+ alphanum chars | ||
| ( | ||
| re.compile(r"(\b\w+\b\s*[=:]\s*)([A-Za-z0-9_\-]{32,})"), | ||
| r"\1***REDACTED***", | ||
| ), | ||
| ] | ||
|
Comment on lines
+7
to
+60
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 3. Missing 32+ token masking The implementation does not mask generic high-entropy secrets matching [a-zA-Z0-9_-]{32,} in
key-value contexts, allowing such values to be logged unredacted. This fails the generic
credential-leakage mitigation requirement.
Agent Prompt
|
||
|
|
||
|
|
||
| def mask_credentials(text: str) -> str: | ||
| """Apply all credential masking patterns to a string.""" | ||
| for pattern, replacement in _MASK_PATTERNS: | ||
| text = pattern.sub(replacement, text) | ||
| return text | ||
|
|
||
|
|
||
| class CredentialMaskingFilter(logging.Filter): | ||
| """Logging filter that redacts credential patterns from all log records. | ||
|
|
||
| Uses record.getMessage() to render the final formatted message before masking, | ||
| then clears record.args so the formatter does not re-apply %-style substitution. | ||
| This avoids TypeError when log args include numeric placeholders (%d, %.2f). | ||
| """ | ||
|
|
||
| def filter(self, record: logging.LogRecord) -> bool: | ||
| # Render the message with its args first to preserve type semantics, | ||
| # then mask the rendered string. Clear args so the handler formatter | ||
| # does not re-format (which would re-expose the original values). | ||
| rendered = record.getMessage() | ||
| record.msg = mask_credentials(rendered) | ||
| record.args = () | ||
| return True | ||
|
Comment on lines
+78
to
+85
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 5. Logging args type broken CredentialMaskingFilter.filter() converts all tuple/dict log args to strings, which can raise TypeError for existing %-style numeric formatting (e.g., %d, %.2f) during record formatting. This can crash logging calls or drop log output in normal execution paths. Agent Prompt
|
||
|
|
||
|
|
||
| def install_credential_masking() -> None: | ||
| """Install credential masking on the root logger and all current handlers. | ||
|
|
||
| Attaches CredentialMaskingFilter both to the root logger and to every | ||
| handler on the root logger, ensuring records emitted via named loggers | ||
| (logging.getLogger(__name__)) are masked regardless of propagation path. | ||
|
|
||
| Must be called *after* all handlers have been added to the root logger | ||
| (e.g. at the end of setup_logging()). Handlers added after this call | ||
| will not automatically receive the filter. | ||
| """ | ||
| root_logger = logging.getLogger() | ||
| filter_instance = CredentialMaskingFilter() | ||
|
|
||
| # Add to root logger filters (catches records at the logger level) | ||
| if not any(isinstance(f, CredentialMaskingFilter) for f in root_logger.filters): | ||
| root_logger.addFilter(filter_instance) | ||
|
|
||
| # Also add to every handler on the root logger for belt-and-suspenders coverage | ||
| for handler in root_logger.handlers: | ||
| if not any(isinstance(f, CredentialMaskingFilter) for f in handler.filters): | ||
| handler.addFilter(CredentialMaskingFilter()) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1. log_filter.py module missing
📎 Requirement gap⚙ MaintainabilityAgent Prompt
ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools