From b4278b4f651cd75184f824aac965c3627932320d Mon Sep 17 00:00:00 2001 From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com> Date: Wed, 10 Jun 2026 06:51:30 +0000 Subject: [PATCH] Python: Add repr() sanitizer to py/log-injection query Co-authored-by: Mrigank Pawagi <25179158+mrigankpawagi@users.noreply.github.com> --- ...2025-06-07-log-injection-repr-sanitizer.md | 4 ++++ .../dataflow/LogInjectionCustomizations.qll | 24 +++++++++++++++++++ ...2025-06-07-log-injection-repr-sanitizer.md | 4 ++++ .../CWE-117-LogInjection/LogInjectionGood.py | 12 ++++++++++ 4 files changed, 44 insertions(+) create mode 100644 python/ql/lib/change-notes/2025-06-07-log-injection-repr-sanitizer.md create mode 100644 python/ql/src/change-notes/2025-06-07-log-injection-repr-sanitizer.md diff --git a/python/ql/lib/change-notes/2025-06-07-log-injection-repr-sanitizer.md b/python/ql/lib/change-notes/2025-06-07-log-injection-repr-sanitizer.md new file mode 100644 index 000000000000..b65419c8930b --- /dev/null +++ b/python/ql/lib/change-notes/2025-06-07-log-injection-repr-sanitizer.md @@ -0,0 +1,4 @@ +--- +category: majorAnalysis +--- +* Calls to `repr()` and use of the `%r` format specifier are now recognized as sanitizers for the `py/log-injection` query, reducing false positives. diff --git a/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll index 98c767df2894..d5725ededb6f 100644 --- a/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll @@ -10,6 +10,7 @@ private import semmle.python.Concepts private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.BarrierGuards private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.ApiGraphs /** * Provides default sources, sinks and sanitizers for detecting @@ -42,12 +43,27 @@ module LogInjection { */ private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } + /** + * Holds if `node` is an argument to a logging call where the format string + * uses `%r` for that argument, which applies `repr()` escaping. + */ + private predicate isReprFormattedLoggingArg(DataFlow::Node node) { + exists(Logging log | + node = log.getAnInput() and + exists(DataFlow::Node fmtNode | + fmtNode = log.getAnInput() and + fmtNode.asExpr().(StringLiteral).getText().regexpMatch(".*%r.*") + ) + ) + } + /** * A logging operation, considered as a flow sink. */ class LoggingAsSink extends Sink { LoggingAsSink() { this = any(Logging write).getAnInput() and + not isReprFormattedLoggingArg(this) and // since the inner implementation of the `logging.Logger.warn` function is // ```py // class Logger: @@ -107,6 +123,14 @@ module LogInjection { } } + /** + * A call to `repr()`, considered as a sanitizer. + * `repr()` escapes special characters such as newlines, preventing log injection. + */ + class ReprCallSanitizer extends Sanitizer, DataFlow::CallCfgNode { + ReprCallSanitizer() { this = API::builtin("repr").getACall() } + } + /** * A sanitizer defined via models-as-data with kind "log-injection". */ diff --git a/python/ql/src/change-notes/2025-06-07-log-injection-repr-sanitizer.md b/python/ql/src/change-notes/2025-06-07-log-injection-repr-sanitizer.md new file mode 100644 index 000000000000..b65419c8930b --- /dev/null +++ b/python/ql/src/change-notes/2025-06-07-log-injection-repr-sanitizer.md @@ -0,0 +1,4 @@ +--- +category: majorAnalysis +--- +* Calls to `repr()` and use of the `%r` format specifier are now recognized as sanitizers for the `py/log-injection` query, reducing false positives. diff --git a/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionGood.py b/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionGood.py index d9279f2e4822..e2c41e21ff99 100644 --- a/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionGood.py +++ b/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionGood.py @@ -18,6 +18,18 @@ def good1(): logging.info('User name: ' + name) # Good return 'good1' +@app.route('/good_repr1') +def good_repr1(): + name = request.args.get('name') + logging.info('User name: ' + repr(name)) # Good - repr() escapes special characters + return 'good_repr1' + +@app.route('/good_repr2') +def good_repr2(): + name = request.args.get('name') + logging.info('User name: %r', name) # Good - %r format specifier applies repr() + return 'good_repr2' + if __name__ == '__main__': app.debug = True handler = logging.FileHandler('log')