From 1dc1a6cdd3cb788918d3e3da3db06d3a0a5bb53c Mon Sep 17 00:00:00 2001 From: David Ramos Date: Fri, 30 Aug 2019 10:39:30 -0700 Subject: [PATCH 1/3] Emit multi-line string values as block scalars --- src/emitter.rs | 98 +++++++++++++++++++++++++++++++--------- src/scanner.rs | 10 ++-- tests/test_round_trip.rs | 12 +++++ 3 files changed, 94 insertions(+), 26 deletions(-) diff --git a/src/emitter.rs b/src/emitter.rs index 09e9f876..d7a71d64 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -17,7 +17,7 @@ impl Error for EmitError { } } - fn cause(&self) -> Option<&Error> { + fn cause(&self) -> Option<&dyn Error> { None } } @@ -38,7 +38,7 @@ impl From for EmitError { } pub struct YamlEmitter<'a> { - writer: &'a mut fmt::Write, + writer: &'a mut dyn fmt::Write, best_indent: usize, compact: bool, @@ -48,7 +48,7 @@ pub struct YamlEmitter<'a> { pub type EmitResult = Result<(), EmitError>; // from serialize::json -fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { +fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { wr.write_str("\"")?; let mut start = 0; @@ -111,7 +111,7 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { } impl<'a> YamlEmitter<'a> { - pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter { + pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { YamlEmitter { writer, best_indent: 2, @@ -141,7 +141,10 @@ impl<'a> YamlEmitter<'a> { // write DocumentStart writeln!(self.writer, "---")?; self.level = -1; - self.emit_node(doc) + self.emit_node(false, doc)?; + writeln!(self.writer)?; + write!(self.writer, "...")?; + Ok(()) } fn write_indent(&mut self) -> EmitResult { @@ -156,13 +159,45 @@ impl<'a> YamlEmitter<'a> { Ok(()) } - fn emit_node(&mut self, node: &Yaml) -> EmitResult { + fn emit_node(&mut self, is_val: bool, node: &Yaml) -> EmitResult { match *node { Yaml::Array(ref v) => self.emit_array(v), Yaml::Hash(ref h) => self.emit_hash(h), Yaml::String(ref v) => { if need_quotes(v) { - escape_str(self.writer, v)?; + // For multi-line string values, use a block scalar. + if is_val && v.contains("\n") && is_valid_literal_block_scalar(v) { + write!( + self.writer, + "|{}{}", + // If the string ends in a newline, we need to have YAML preserve the + // newline characters using the "keep" chomp indicator. + if v.ends_with("\n") { + "+" + // Otherwise, it should strip them using the "strip" chomp indicator. + } else { + "-" + }, + // Number of additional indent characters. + self.best_indent, + )?; + self.level += 1; + let mut lines = v.split("\n").peekable(); + while let Some(line) = lines.next() { + // The last line is special: if it's blank, that means the string ends + // in a newline character and we used the "keep" chomp indicator above. + // In that case, we should suppress the last, empty line. Otherwise, + // print it normally. + if lines.peek().is_some() || !line.is_empty() { + writeln!(self.writer)?; + self.write_indent()?; + write!(self.writer, "{}", line)?; + } + } + self.level -= 1; + } else { + escape_str(self.writer, v)?; + } } else { write!(self.writer, "{}", v)?; } @@ -233,7 +268,7 @@ impl<'a> YamlEmitter<'a> { write!(self.writer, ":")?; self.emit_val(true, v)?; } else { - self.emit_node(k)?; + self.emit_node(false, k)?; write!(self.writer, ":")?; self.emit_val(false, v)?; } @@ -273,7 +308,7 @@ impl<'a> YamlEmitter<'a> { } _ => { write!(self.writer, " ")?; - self.emit_node(val) + self.emit_node(true, val) } } } @@ -316,12 +351,12 @@ fn need_quotes(string: &str) -> bool { | '\"' | '\'' | '\\' - | '\0'...'\x06' + | '\0'..='\x06' | '\t' | '\n' | '\r' - | '\x0e'...'\x1a' - | '\x1c'...'\x1f' => true, + | '\x0e'..='\x1a' + | '\x1c'..='\x1f' => true, _ => false, }) || [ @@ -340,6 +375,18 @@ fn need_quotes(string: &str) -> bool { || string.parse::().is_ok() } +/// Check if the string can be expressed a valid literal block scalar. +/// The YAML spec supports all of the following in block literals except #xFEFF: +/// #x9 | #xA | [#x20-#x7E] /* 8 bit */ +/// | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */ +/// | [#x10000-#x10FFFF] /* 32 bit */ +fn is_valid_literal_block_scalar(string: &str) -> bool { + string.chars().all(|character: char| match character { + '\t' | '\n' | '\x20'..='\x7e' | '\u{0085}' | '\u{00a0}'..='\u{d7fff}' => true, + _ => false, + }) +} + #[cfg(test)] mod test { use super::*; @@ -358,7 +405,7 @@ a3: [1, 2, 3] a4: - [a1, a2] - 2 -"; +..."; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; @@ -448,7 +495,8 @@ products: "{}": empty hash key x: test y: avoid quoting here -z: string with spaces"#; +z: string with spaces +..."#; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; @@ -458,7 +506,9 @@ z: string with spaces"#; emitter.dump(doc).unwrap(); } - assert_eq!(s, writer, "actual:\n\n{}\n", writer); + let docs2 = YamlLoader::load_from_str(&writer).unwrap(); + + assert_eq!(docs, docs2, "actual:\n\n{}\n", writer); } #[test] @@ -506,7 +556,8 @@ null0: ~ - "OFF" : false_bools bool0: true -bool1: false"#; +bool1: false +..."#; let docs = YamlLoader::load_from_str(&input).unwrap(); let doc = &docs[0]; @@ -543,7 +594,8 @@ a: e: - f - g - - h: []"# + - h: [] +..."# } else { r#"--- a: @@ -554,7 +606,8 @@ e: - f - g - - h: []"# + h: [] +..."# }; let docs = YamlLoader::load_from_str(&s).unwrap(); @@ -577,7 +630,8 @@ a: - - c - d - - e - - f"#; + - f +..."#; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; @@ -601,7 +655,8 @@ a: - d - - e - - f - - - e"#; + - - e +..."#; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; @@ -623,7 +678,8 @@ a: b: c: d: - e: f"#; + e: f +..."#; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; diff --git a/src/scanner.rs b/src/scanner.rs index 6f4fa587..b2ce148b 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -67,7 +67,7 @@ impl Error for ScanError { self.info.as_ref() } - fn cause(&self) -> Option<&Error> { + fn cause(&self) -> Option<&dyn Error> { None } } @@ -199,7 +199,7 @@ fn is_digit(c: char) -> bool { #[inline] fn is_alpha(c: char) -> bool { match c { - '0'...'9' | 'a'...'z' | 'A'...'Z' => true, + '0'..='9' | 'a'..='z' | 'A'..='Z' => true, '_' | '-' => true, _ => false, } @@ -211,9 +211,9 @@ fn is_hex(c: char) -> bool { #[inline] fn as_hex(c: char) -> u32 { match c { - '0'...'9' => (c as u32) - ('0' as u32), - 'a'...'f' => (c as u32) - ('a' as u32) + 10, - 'A'...'F' => (c as u32) - ('A' as u32) + 10, + '0'..='9' => (c as u32) - ('0' as u32), + 'a'..='f' => (c as u32) - ('a' as u32) + 10, + 'A'..='F' => (c as u32) - ('A' as u32) + 10, _ => unreachable!(), } } diff --git a/tests/test_round_trip.rs b/tests/test_round_trip.rs index bfa96027..cc191948 100644 --- a/tests/test_round_trip.rs +++ b/tests/test_round_trip.rs @@ -21,3 +21,15 @@ fn test_colon_in_string() { let y = Yaml::String("x: %".to_owned()); test_round_trip(&y); } + +#[test] +fn test_newline() { + let y = Yaml::Array(vec![Yaml::String("\n".to_owned())]); + test_round_trip(&y); +} + +#[test] +fn test_crlf() { + let y = Yaml::Array(vec![Yaml::String("\r\n".to_owned())]); + test_round_trip(&y); +} From 2bfdce5680fb950c7bb15c4768d8520889c7c986 Mon Sep 17 00:00:00 2001 From: David Ramos Date: Fri, 30 Aug 2019 13:18:17 -0700 Subject: [PATCH 2/3] Revert backward-incompatible Rust changes --- src/emitter.rs | 14 +++++++------- src/scanner.rs | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/emitter.rs b/src/emitter.rs index d7a71d64..d74636d7 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -17,7 +17,7 @@ impl Error for EmitError { } } - fn cause(&self) -> Option<&dyn Error> { + fn cause(&self) -> Option<&Error> { None } } @@ -38,7 +38,7 @@ impl From for EmitError { } pub struct YamlEmitter<'a> { - writer: &'a mut dyn fmt::Write, + writer: &'a mut fmt::Write, best_indent: usize, compact: bool, @@ -48,7 +48,7 @@ pub struct YamlEmitter<'a> { pub type EmitResult = Result<(), EmitError>; // from serialize::json -fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { +fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { wr.write_str("\"")?; let mut start = 0; @@ -111,7 +111,7 @@ fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { } impl<'a> YamlEmitter<'a> { - pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { + pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter { YamlEmitter { writer, best_indent: 2, @@ -351,12 +351,12 @@ fn need_quotes(string: &str) -> bool { | '\"' | '\'' | '\\' - | '\0'..='\x06' + | '\0'...'\x06' | '\t' | '\n' | '\r' - | '\x0e'..='\x1a' - | '\x1c'..='\x1f' => true, + | '\x0e'...'\x1a' + | '\x1c'...'\x1f' => true, _ => false, }) || [ diff --git a/src/scanner.rs b/src/scanner.rs index b2ce148b..6f4fa587 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -67,7 +67,7 @@ impl Error for ScanError { self.info.as_ref() } - fn cause(&self) -> Option<&dyn Error> { + fn cause(&self) -> Option<&Error> { None } } @@ -199,7 +199,7 @@ fn is_digit(c: char) -> bool { #[inline] fn is_alpha(c: char) -> bool { match c { - '0'..='9' | 'a'..='z' | 'A'..='Z' => true, + '0'...'9' | 'a'...'z' | 'A'...'Z' => true, '_' | '-' => true, _ => false, } @@ -211,9 +211,9 @@ fn is_hex(c: char) -> bool { #[inline] fn as_hex(c: char) -> u32 { match c { - '0'..='9' => (c as u32) - ('0' as u32), - 'a'..='f' => (c as u32) - ('a' as u32) + 10, - 'A'..='F' => (c as u32) - ('A' as u32) + 10, + '0'...'9' => (c as u32) - ('0' as u32), + 'a'...'f' => (c as u32) - ('a' as u32) + 10, + 'A'...'F' => (c as u32) - ('A' as u32) + 10, _ => unreachable!(), } } From 5a7192f461e8573b37fd4f4e07d1f26520869b35 Mon Sep 17 00:00:00 2001 From: David Ramos Date: Fri, 30 Aug 2019 13:33:39 -0700 Subject: [PATCH 3/3] Fix range patterns --- src/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emitter.rs b/src/emitter.rs index d74636d7..7968bc34 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -382,7 +382,7 @@ fn need_quotes(string: &str) -> bool { /// | [#x10000-#x10FFFF] /* 32 bit */ fn is_valid_literal_block_scalar(string: &str) -> bool { string.chars().all(|character: char| match character { - '\t' | '\n' | '\x20'..='\x7e' | '\u{0085}' | '\u{00a0}'..='\u{d7fff}' => true, + '\t' | '\n' | '\x20'...'\x7e' | '\u{0085}' | '\u{00a0}'...'\u{d7ff}' => true, _ => false, }) }