diff --git a/avro/src/schema/parser.rs b/avro/src/schema/parser.rs index 4235209c..57ef4123 100644 --- a/avro/src/schema/parser.rs +++ b/avro/src/schema/parser.rs @@ -96,7 +96,7 @@ impl Parser { .expect("Key unexpectedly missing"); let parsed = self.parse(&value, None)?; self.parsed_schemas - .insert(self.get_schema_type_name(name, &value), parsed); + .insert(self.get_schema_type_name(name, &value)?, parsed); } Ok(()) } @@ -197,7 +197,7 @@ impl Parser { // parsing a full schema from inside another schema. Other full schema will not inherit namespace let parsed = self.parse(&value, None)?; self.parsed_schemas.insert( - self.get_schema_type_name(fully_qualified_name, &value), + self.get_schema_type_name(fully_qualified_name, &value)?, parsed.clone(), ); @@ -522,8 +522,12 @@ impl Parser { ) -> Option<&Schema> { match complex.get("type") { Some(Value::String(typ)) => { + // A `type` that is not a valid Avro name cannot match any + // already-seen schema, so treat it as "not seen" rather than + // panicking. The real validation error is surfaced later by + // the parse path that calls `Name::parse`. let name = - Name::new_with_enclosing_namespace(typ.as_str(), enclosing_namespace).unwrap(); + Name::new_with_enclosing_namespace(typ.as_str(), enclosing_namespace).ok()?; self.resolving_schemas .get(&name) .or_else(|| self.parsed_schemas.get(&name)) @@ -548,7 +552,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace())?; let mut lookup = BTreeMap::new(); @@ -622,7 +626,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace())?; let symbols: Vec = symbols_opt .and_then(|v| v.as_array()) @@ -771,7 +775,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace())?; let schema = Schema::Fixed(FixedSchema { name: fully_qualified_name.clone(), @@ -795,32 +799,38 @@ impl Parser { &self, aliases: Option>, namespace: NamespaceRef, - ) -> Aliases { - aliases.map(|aliases| { - aliases - .iter() - .map(|alias| { - if alias.find('.').is_none() { - match namespace { - Some(ns) => format!("{ns}.{alias}"), - None => alias.clone(), + ) -> AvroResult { + aliases + .map(|aliases| { + aliases + .iter() + .map(|alias| { + // An alias that is not a valid Avro name is a schema + // error — propagate it instead of unwrapping (which + // would panic on attacker-controlled schema JSON). + if alias.contains('.') { + Alias::new(alias.as_str()) + } else { + match namespace { + Some(ns) => Alias::new(&format!("{ns}.{alias}")), + None => Alias::new(alias.as_str()), + } } - } else { - alias.clone() - } - }) - .map(|alias| Alias::new(alias.as_str()).unwrap()) - .collect() - }) + }) + .collect::>>() + }) + .transpose() } - fn get_schema_type_name(&self, name: Name, value: &Value) -> Name { + fn get_schema_type_name(&self, name: Name, value: &Value) -> AvroResult { match value.get("type") { Some(Value::Object(complex_type)) => match complex_type.name() { - Some(name) => Name::new(name).unwrap(), - _ => name, + // Propagate the validation error if the nested `type` name is + // not a valid Avro name, rather than panicking on `unwrap()`. + Some(type_name) => Name::new(type_name), + _ => Ok(name), }, - _ => name, + _ => Ok(name), } } diff --git a/avro/tests/avro-rs-547.rs b/avro/tests/avro-rs-547.rs new file mode 100644 index 00000000..7589dc24 --- /dev/null +++ b/avro/tests/avro-rs-547.rs @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Regression: parsing a schema whose name / alias / type-reference is not a +//! valid Avro identifier must return `Err`, never panic. +//! +//! `Schema::parse_str` is a `Result`-returning, public API; callers cannot +//! guard against an internal `unwrap()`, so a panic on attacker-controlled +//! schema JSON is a denial-of-service. `Name::parse` was fixed previously, +//! but the sibling sites in `schema::parser` (`get_already_seen_schema`, +//! `fix_aliases_namespace`, `get_schema_type_name`) still called +//! `Name::new(..).unwrap()` / `Alias::new(..).unwrap()`. + +use apache_avro::Schema; + +/// Each of these declares a name, alias, or type reference that is not a valid +/// Avro identifier. Before the fix at least one site (`fix_aliases_namespace`) +/// panicked with `called 'Result::unwrap()' on an 'Err' value: Invalid schema +/// name ...`. +const INVALID_NAME_SCHEMAS: &[&str] = &[ + // invalid `aliases` entry on a record / fixed / enum + r#"{"type": "record", "name": "R", "aliases": [":"], "fields": []}"#, + r#"{"type": "fixed", "name": "F", "aliases": ["a.b:c"], "size": 4}"#, + r#"{"type": "enum", "name": "E", "aliases": [""], "symbols": ["A"]}"#, + // invalid nested type name + r#"{"type": "record", "name": "R", "fields": [ + {"name": "f", "type": {"type": "record", "name": ":", "fields": []}} + ]}"#, + // invalid string type reference + r#"{"type": "record", "name": "R", "fields": [{"name": "f", "type": ":"}]}"#, + // invalid top-level name + r#"{"type": "record", "name": ":", "fields": []}"#, +]; + +#[test] +fn invalid_schema_names_return_err_not_panic() { + for schema in INVALID_NAME_SCHEMAS { + let result = Schema::parse_str(schema); + assert!( + result.is_err(), + "expected Err (not a panic) for invalid-name schema:\n{schema}" + ); + } +} + +#[test] +fn valid_schema_with_aliases_still_parses() { + // Guards against the fix over-rejecting: legitimate aliases must still work. + let schema = r#"{ + "type": "record", + "name": "R", + "namespace": "ns", + "aliases": ["Old", "other.Older"], + "fields": [{"name": "f", "type": "int"}] + }"#; + assert!(Schema::parse_str(schema).is_ok()); +}