Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/).

## [Unreleased]

### Changed

- `multipartkit/content_disposition.parse` now enforces RFC 7230 §3.2.6 `quoted-pair` strictly: a `\X` whose `X` is outside `HTAB / SP / VCHAR / obs-text` (for example `NUL`, `CR`, `LF`, or any other ASCII control byte) is rejected with the new `Error(InvalidQuotedPair(_))` instead of silently dropping the backslash, blocking `NUL`-smuggling into the decoded `name` / `filename`. (#50)

## [0.12.0] - 2026-05-11

### Changed
Expand Down
19 changes: 14 additions & 5 deletions src/multipartkit/content_disposition.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ import gleam/list
import gleam/option.{type Option, None, Some}
import gleam/result
import gleam/string
import multipartkit/error.{type MultipartError, InvalidContentDisposition}
import multipartkit/internal/text
import multipartkit/error.{
type MultipartError, InvalidContentDisposition, InvalidQuotedPair,
}
import multipartkit/internal/text.{QuotedPairInvalid, QuotedSyntax}

/// A parsed `Content-Disposition` header value.
///
Expand All @@ -19,7 +21,13 @@ import multipartkit/internal/text
/// RFC 5987 / RFC 8187 `*`-form when present (with the `*`-form taking
/// precedence over the plain form), otherwise from the plain parameter
/// value with surrounding quotes removed and backslash-escapes resolved
/// per RFC 7230 quoted-string rules.
/// per RFC 7230 §3.2.6 quoted-string / quoted-pair rules. A `\X`
/// escape whose `X` is outside the `quoted-pair` grammar
/// (`HTAB / SP / VCHAR / obs-text`) — for instance `NUL`, `CR`, `LF`,
/// or any other ASCII control byte — causes `parse` to return
/// `Error(InvalidQuotedPair(original_value))` instead of silently
/// dropping the backslash. This blocks `NUL`-smuggling into the
/// decoded `name` / `filename`.
/// - `params/1` contains every parameter as it appeared in the input
/// including `filename` and `name`, in order. Duplicate parameter names
/// are preserved left-to-right; only the first occurrence wins for the
Expand Down Expand Up @@ -116,8 +124,9 @@ fn parse_one_param(
_ ->
case string.pop_grapheme(after_key) {
Ok(#("=", value_rest)) ->
case text.read_token_or_quoted(value_rest) {
Error(Nil) -> Error(InvalidContentDisposition(original))
case text.read_token_or_quoted_strict(value_rest) {
Error(QuotedPairInvalid) -> Error(InvalidQuotedPair(original))
Error(QuotedSyntax) -> Error(InvalidContentDisposition(original))
Ok(#(raw_value, tail)) ->
parse_params(tail, [#(key, raw_value), ..acc], original)
}
Expand Down
11 changes: 11 additions & 0 deletions src/multipartkit/error.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,15 @@ pub type MultipartError {
/// inject a header break or split into a different `name: value` pair
/// at parse time. Carries the offending name.
InvalidHeaderName(name: String)
/// A `Content-Disposition` quoted-string parameter contained a
/// `\X` escape whose `X` is outside the RFC 7230 §3.2.6
/// `quoted-pair` grammar (`HTAB / SP / VCHAR / obs-text`). The
/// offending second character is not `HTAB`, `SP`, a `VCHAR`
/// (`%x21-7E`), nor `obs-text` (`%x80-FF`) — for example
/// `NUL`, `CR`, `LF`, or any other ASCII control byte. Allowing
/// these would let an attacker smuggle `NUL` into the decoded
/// `name` / `filename` (e.g. for C-string truncation attacks).
/// The carried value is the original Content-Disposition header
/// text so callers can render diagnostics.
InvalidQuotedPair(String)
}
110 changes: 110 additions & 0 deletions src/multipartkit/internal/text.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,116 @@ pub fn read_token_or_quoted(input: String) -> Result(#(String, String), Nil) {
}
}

/// Internal: Fault categories returned by `read_token_or_quoted_strict`.
///
/// - `QuotedSyntax`: the quoted-string is malformed in a way that is
/// not a quoted-pair grammar violation (e.g. unterminated, missing
/// opening quote, or an empty token where one was expected).
/// - `QuotedPairInvalid`: the input was otherwise a well-formed
/// quoted-string but contained a `\X` escape whose `X` falls outside
/// the RFC 7230 §3.2.6 `quoted-pair` grammar
/// (`HTAB / SP / VCHAR / obs-text`).
pub type QuotedFault {
QuotedSyntax
QuotedPairInvalid
}

/// Internal: Like `read_token_or_quoted`, but enforces RFC 7230 §3.2.6
/// strictly inside quoted-strings: the second character of a
/// `quoted-pair` (`\X`) MUST be `HTAB`, `SP`, a `VCHAR` (`%x21-7E`),
/// or `obs-text` (`%x80-FF`). Any other byte (NUL, CR, LF, other ASCII
/// control bytes, or DEL) results in `Error(QuotedPairInvalid)`.
pub fn read_token_or_quoted_strict(
input: String,
) -> Result(#(String, String), QuotedFault) {
case string.starts_with(input, "\"") {
True -> read_quoted_string_strict(input)
False -> {
let #(token, rest) = read_token(input)
case token {
"" -> Error(QuotedSyntax)
_ -> Ok(#(token, rest))
}
}
}
}

fn read_quoted_string_strict(
input: String,
) -> Result(#(String, String), QuotedFault) {
case string.pop_grapheme(input) {
Ok(#("\"", rest)) -> read_quoted_loop_strict(rest, "")
_ -> Error(QuotedSyntax)
}
}

fn read_quoted_loop_strict(
input: String,
acc: String,
) -> Result(#(String, String), QuotedFault) {
case string.pop_grapheme(input) {
Error(_) -> Error(QuotedSyntax)
Ok(#("\"", rest)) -> Ok(#(acc, rest))
Ok(#("\\", rest)) ->
case string.pop_grapheme(rest) {
Error(_) -> Error(QuotedSyntax)
Ok(#(escaped, after)) ->
case is_valid_quoted_pair_second_char(escaped) {
True -> read_quoted_loop_strict(after, acc <> escaped)
False -> Error(QuotedPairInvalid)
}
}
Ok(#(grapheme, rest)) -> read_quoted_loop_strict(rest, acc <> grapheme)
}
}

/// Internal: RFC 7230 §3.2.6 — the second character of a quoted-pair
/// must be `HTAB / SP / VCHAR / obs-text`. `HTAB` is `%x09`, `SP` is
/// `%x20`, `VCHAR` is `%x21-7E`, `obs-text` is `%x80-FF`. Excluded
/// here are `NUL` (`%x00`), other `%x01-08`, `LF` (`%x0A`), `CR`
/// (`%x0D`), the remaining `%x0B-1F` control bytes, and `DEL`
/// (`%x7F`). Non-ASCII graphemes (any code point ≥ U+0080) are
/// permitted as `obs-text`.
fn is_valid_quoted_pair_second_char(grapheme: String) -> Bool {
case grapheme {
"\t" -> True
" " -> True
"\u{0000}" -> False
"\u{0001}" -> False
"\u{0002}" -> False
"\u{0003}" -> False
"\u{0004}" -> False
"\u{0005}" -> False
"\u{0006}" -> False
"\u{0007}" -> False
"\u{0008}" -> False
"\n" -> False
"\u{000B}" -> False
"\u{000C}" -> False
"\r" -> False
"\u{000E}" -> False
"\u{000F}" -> False
"\u{0010}" -> False
"\u{0011}" -> False
"\u{0012}" -> False
"\u{0013}" -> False
"\u{0014}" -> False
"\u{0015}" -> False
"\u{0016}" -> False
"\u{0017}" -> False
"\u{0018}" -> False
"\u{0019}" -> False
"\u{001A}" -> False
"\u{001B}" -> False
"\u{001C}" -> False
"\u{001D}" -> False
"\u{001E}" -> False
"\u{001F}" -> False
"\u{007F}" -> False
_ -> True
}
}

/// Internal: Lowercase ASCII A-Z; preserve all other code points. Used in
/// places where we must avoid Unicode case-folding (e.g., header names per
/// RFC 7230).
Expand Down
67 changes: 67 additions & 0 deletions test/regression_cd_quoted_pair_test.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
//// Regression tests for Issue #50: `multipartkit/content_disposition.parse`
//// quoted-pair handling must align with RFC 7230 §3.2.6
////
//// quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
////
//// where `VCHAR = %x21-7E` and `obs-text = %x80-FF`. A `\X` whose `X`
//// is outside that set (NUL, CR, LF, other control bytes, or DEL)
//// must be rejected with `Error(InvalidQuotedPair(...))` rather than
//// silently dropping the backslash — otherwise an attacker can smuggle
//// a NUL byte into a decoded `name` / `filename`.

import gleam/option.{Some}
import gleeunit/should
import multipartkit/content_disposition as cd

// === RFC 7230 §3.2.6 strict compliance ===

pub fn cd_canonical_escape_backslash_test() {
let assert Ok(p) = cd.parse("form-data; name=\"\\\\backslash\"")
cd.name(p) |> should.equal(Some("\\backslash"))
}

pub fn cd_canonical_escape_quote_test() {
let assert Ok(p) = cd.parse("form-data; name=\"\\\"quoted\\\"\"")
cd.name(p) |> should.equal(Some("\"quoted\""))
}

pub fn cd_vchar_escape_drops_backslash_test() {
// \X where X is VCHAR ('n' = U+006E ∈ [%x21-7E])
// RFC 7230: \X 展開 = X (backslash drop)
let assert Ok(p) = cd.parse("form-data; name=\"\\nnewline\"")
cd.name(p) |> should.equal(Some("nnewline"))
}

pub fn cd_nul_in_quoted_pair_rejected_test() {
// \X where X is NUL (U+0000): not VCHAR, not obs-text → invalid quoted-pair
case cd.parse("form-data; name=\"\\\u{0000}nul\"") {
Error(_) -> Nil
Ok(_) -> should.fail()
}
}

pub fn cd_cr_in_quoted_pair_rejected_test() {
case cd.parse("form-data; name=\"\\\rcr\"") {
Error(_) -> Nil
Ok(_) -> should.fail()
}
}

pub fn cd_lf_in_quoted_pair_rejected_test() {
case cd.parse("form-data; name=\"\\\nlf\"") {
Error(_) -> Nil
Ok(_) -> should.fail()
}
}

// === Behavioural regression baseline ===

pub fn cd_no_escape_test() {
let assert Ok(p) = cd.parse("form-data; name=\"plain\"")
cd.name(p) |> should.equal(Some("plain"))
}

pub fn cd_utf8_name_test() {
let assert Ok(p) = cd.parse("form-data; name=\"日本語\"")
cd.name(p) |> should.equal(Some("日本語"))
}