From 9e6889fc5561794b35ce2b9d6ef287667bd8116b Mon Sep 17 00:00:00 2001 From: youichi-uda Date: Mon, 1 Jun 2026 15:12:35 +0900 Subject: [PATCH 1/2] fix(schema): return Err instead of panicking on invalid name/alias/type-ref `Schema::parse_str` panicked on schema JSON whose alias, type reference, or nested type name is not a valid Avro identifier, because three sites in `schema::parser` called `Name::new(..).unwrap()` / `Alias::new(..).unwrap()` on values that already return `AvroResult`. Since `parse_str` is a public `Result`-returning API, callers cannot guard against the unwind, so parsing untrusted schema JSON was a DoS. This is the sibling of the `Name::parse` fix in #496. - `get_already_seen_schema`: an unparseable type name cannot match an already-seen schema, so return `None` (`.ok()?`) rather than unwrap. - `fix_aliases_namespace`: return `AvroResult` and propagate; an invalid alias is a schema error (`?` added at the three call sites). - `get_schema_type_name`: fall back to the enclosing name (`unwrap_or(name)`), matching the existing `_ => name` arm. Adds a regression test (tests/avro-rs-547.rs) covering invalid aliases, nested names and type references, plus a positive case ensuring valid aliases still parse. Fixes #547 Signed-off-by: youichi-uda --- avro/src/schema/parser.rs | 55 ++++++++++++++++++------------ avro/tests/avro-rs-547.rs | 71 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 22 deletions(-) create mode 100644 avro/tests/avro-rs-547.rs diff --git a/avro/src/schema/parser.rs b/avro/src/schema/parser.rs index 4235209c..950db6d0 100644 --- a/avro/src/schema/parser.rs +++ b/avro/src/schema/parser.rs @@ -522,8 +522,12 @@ impl Parser { ) -> Option<&Schema> { match complex.get("type") { Some(Value::String(typ)) => { + // A `type` that is not a valid Avro name cannot match any + // already-seen schema, so treat it as "not seen" rather than + // panicking. The real validation error is surfaced later by + // the parse path that calls `Name::parse`. let name = - Name::new_with_enclosing_namespace(typ.as_str(), enclosing_namespace).unwrap(); + Name::new_with_enclosing_namespace(typ.as_str(), enclosing_namespace).ok()?; self.resolving_schemas .get(&name) .or_else(|| self.parsed_schemas.get(&name)) @@ -548,7 +552,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace())?; let mut lookup = BTreeMap::new(); @@ -622,7 +626,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace())?; let symbols: Vec = symbols_opt .and_then(|v| v.as_array()) @@ -771,7 +775,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace())?; let schema = Schema::Fixed(FixedSchema { name: fully_qualified_name.clone(), @@ -795,29 +799,36 @@ impl Parser { &self, aliases: Option>, namespace: NamespaceRef, - ) -> Aliases { - aliases.map(|aliases| { - aliases - .iter() - .map(|alias| { - if alias.find('.').is_none() { - match namespace { - Some(ns) => format!("{ns}.{alias}"), - None => alias.clone(), - } - } else { - alias.clone() - } - }) - .map(|alias| Alias::new(alias.as_str()).unwrap()) - .collect() - }) + ) -> AvroResult { + aliases + .map(|aliases| { + aliases + .iter() + .map(|alias| { + let qualified = if alias.find('.').is_none() { + match namespace { + Some(ns) => format!("{ns}.{alias}"), + None => alias.clone(), + } + } else { + alias.clone() + }; + // An alias that is not a valid Avro name is a schema + // error — propagate it instead of unwrapping (which + // would panic on attacker-controlled schema JSON). + Alias::new(qualified.as_str()) + }) + .collect::>>() + }) + .transpose() } fn get_schema_type_name(&self, name: Name, value: &Value) -> Name { match value.get("type") { Some(Value::Object(complex_type)) => match complex_type.name() { - Some(name) => Name::new(name).unwrap(), + // Fall back to the enclosing name if the nested `type` name is + // not a valid Avro name, rather than panicking on `unwrap()`. + Some(type_name) => Name::new(type_name).unwrap_or(name), _ => name, }, _ => name, diff --git a/avro/tests/avro-rs-547.rs b/avro/tests/avro-rs-547.rs new file mode 100644 index 00000000..7589dc24 --- /dev/null +++ b/avro/tests/avro-rs-547.rs @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Regression: parsing a schema whose name / alias / type-reference is not a +//! valid Avro identifier must return `Err`, never panic. +//! +//! `Schema::parse_str` is a `Result`-returning, public API; callers cannot +//! guard against an internal `unwrap()`, so a panic on attacker-controlled +//! schema JSON is a denial-of-service. `Name::parse` was fixed previously, +//! but the sibling sites in `schema::parser` (`get_already_seen_schema`, +//! `fix_aliases_namespace`, `get_schema_type_name`) still called +//! `Name::new(..).unwrap()` / `Alias::new(..).unwrap()`. + +use apache_avro::Schema; + +/// Each of these declares a name, alias, or type reference that is not a valid +/// Avro identifier. Before the fix at least one site (`fix_aliases_namespace`) +/// panicked with `called 'Result::unwrap()' on an 'Err' value: Invalid schema +/// name ...`. +const INVALID_NAME_SCHEMAS: &[&str] = &[ + // invalid `aliases` entry on a record / fixed / enum + r#"{"type": "record", "name": "R", "aliases": [":"], "fields": []}"#, + r#"{"type": "fixed", "name": "F", "aliases": ["a.b:c"], "size": 4}"#, + r#"{"type": "enum", "name": "E", "aliases": [""], "symbols": ["A"]}"#, + // invalid nested type name + r#"{"type": "record", "name": "R", "fields": [ + {"name": "f", "type": {"type": "record", "name": ":", "fields": []}} + ]}"#, + // invalid string type reference + r#"{"type": "record", "name": "R", "fields": [{"name": "f", "type": ":"}]}"#, + // invalid top-level name + r#"{"type": "record", "name": ":", "fields": []}"#, +]; + +#[test] +fn invalid_schema_names_return_err_not_panic() { + for schema in INVALID_NAME_SCHEMAS { + let result = Schema::parse_str(schema); + assert!( + result.is_err(), + "expected Err (not a panic) for invalid-name schema:\n{schema}" + ); + } +} + +#[test] +fn valid_schema_with_aliases_still_parses() { + // Guards against the fix over-rejecting: legitimate aliases must still work. + let schema = r#"{ + "type": "record", + "name": "R", + "namespace": "ns", + "aliases": ["Old", "other.Older"], + "fields": [{"name": "f", "type": "int"}] + }"#; + assert!(Schema::parse_str(schema).is_ok()); +} From ac0282c9a9cb833faeff89968fb10dd0ac109859 Mon Sep 17 00:00:00 2001 From: youichi-uda Date: Mon, 1 Jun 2026 17:05:19 +0900 Subject: [PATCH 2/2] address review: avoid clones in fix_aliases_namespace; propagate in get_schema_type_name - fix_aliases_namespace: use `contains('.')` and pass `&str` / `&format!(..)` directly to `Alias::new`, dropping the intermediate `qualified` String and the per-alias clones. - get_schema_type_name: return `AvroResult` and propagate the validation error (callers updated with `?`) instead of silently falling back to the enclosing name on an invalid nested `type` name. Signed-off-by: youichi-uda --- avro/src/schema/parser.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/avro/src/schema/parser.rs b/avro/src/schema/parser.rs index 950db6d0..57ef4123 100644 --- a/avro/src/schema/parser.rs +++ b/avro/src/schema/parser.rs @@ -96,7 +96,7 @@ impl Parser { .expect("Key unexpectedly missing"); let parsed = self.parse(&value, None)?; self.parsed_schemas - .insert(self.get_schema_type_name(name, &value), parsed); + .insert(self.get_schema_type_name(name, &value)?, parsed); } Ok(()) } @@ -197,7 +197,7 @@ impl Parser { // parsing a full schema from inside another schema. Other full schema will not inherit namespace let parsed = self.parse(&value, None)?; self.parsed_schemas.insert( - self.get_schema_type_name(fully_qualified_name, &value), + self.get_schema_type_name(fully_qualified_name, &value)?, parsed.clone(), ); @@ -805,33 +805,32 @@ impl Parser { aliases .iter() .map(|alias| { - let qualified = if alias.find('.').is_none() { - match namespace { - Some(ns) => format!("{ns}.{alias}"), - None => alias.clone(), - } - } else { - alias.clone() - }; // An alias that is not a valid Avro name is a schema // error — propagate it instead of unwrapping (which // would panic on attacker-controlled schema JSON). - Alias::new(qualified.as_str()) + if alias.contains('.') { + Alias::new(alias.as_str()) + } else { + match namespace { + Some(ns) => Alias::new(&format!("{ns}.{alias}")), + None => Alias::new(alias.as_str()), + } + } }) .collect::>>() }) .transpose() } - fn get_schema_type_name(&self, name: Name, value: &Value) -> Name { + fn get_schema_type_name(&self, name: Name, value: &Value) -> AvroResult { match value.get("type") { Some(Value::Object(complex_type)) => match complex_type.name() { - // Fall back to the enclosing name if the nested `type` name is + // Propagate the validation error if the nested `type` name is // not a valid Avro name, rather than panicking on `unwrap()`. - Some(type_name) => Name::new(type_name).unwrap_or(name), - _ => name, + Some(type_name) => Name::new(type_name), + _ => Ok(name), }, - _ => name, + _ => Ok(name), } }