tafia · dralley · Jul 12, 2025 · Jul 12, 2025 · Jan 29, 2023 · Apr 3, 2022
diff --git a/Changelog.md b/Changelog.md
@@ -22,11 +22,31 @@
   - `Deserializer::buffering_with_resolver`
 - [#878]: Add ability to serialize structs in `$value` fields. The struct name will
   be used as a tag name. Previously only enums was allowed there.
+- [#371]: Improved compliance with the XML attribute value normalization process by adding
+  - `Attribute::normalized_value()`
+  - `Attribute::normalized_value_with()`
+  - `Attribute::decoded_and_normalized_value()`
+  - `Attribute::decoded_and_normalized_value_with()`
+
+  which ought to be used in place of deprecated
+  - `Attribute::unescape_value()`
+  - `Attribute::unescape_value_with()`
+  - `Attribute::decode_and_unescape_value()`
+  - `Attribute::decode_and_unescape_value_with()`
+
+  Deprecated functions now behaves the same as newly added.
 
 ### Bug Fixes
 
+- [#806]: Properly normalize EOL characters in `BytesText::decode`, `BytesCData::decode`
+  and `BytesRef::decode` methods.
+
 ### Misc Changes
 
+- [#371]: New error variant `EscapeError::TooManyNestedEntities` was added.
+
+[#371]: https://github.com/tafia/quick-xml/issues/371
+[#806]: https://github.com/tafia/quick-xml/issues/806
 [#878]: https://github.com/tafia/quick-xml/pull/878
 [#882]: https://github.com/tafia/quick-xml/pull/882
 

diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs
@@ -44,14 +44,13 @@ static INPUTS: &[(&str, &str)] = &[
     ("players.xml", PLAYERS),
 ];
 
-// TODO: use fully normalized attribute values
 fn parse_document_from_str(doc: &str) -> XmlResult<()> {
     let mut r = Reader::from_str(doc);
     loop {
         match black_box(r.read_event()?) {
             Event::Start(e) | Event::Empty(e) => {
                 for attr in e.attributes() {
-                    black_box(attr?.decode_and_unescape_value(r.decoder())?);
+                    black_box(attr?.decoded_and_normalized_value(r.decoder())?);
                 }
             }
             Event::Text(e) => {
@@ -68,15 +67,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
     Ok(())
 }
 
-// TODO: use fully normalized attribute values
 fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
     let mut r = Reader::from_reader(doc);
     let mut buf = Vec::new();
     loop {
         match black_box(r.read_event_into(&mut buf)?) {
             Event::Start(e) | Event::Empty(e) => {
                 for attr in e.attributes() {
-                    black_box(attr?.decode_and_unescape_value(r.decoder())?);
+                    black_box(attr?.decoded_and_normalized_value(r.decoder())?);
                 }
             }
             Event::Text(e) => {
@@ -94,15 +92,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
     Ok(())
 }
 
-// TODO: use fully normalized attribute values
 fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
     let mut r = NsReader::from_str(doc);
     loop {
         match black_box(r.read_resolved_event()?) {
             (resolved_ns, Event::Start(e) | Event::Empty(e)) => {
                 black_box(resolved_ns);
                 for attr in e.attributes() {
-                    black_box(attr?.decode_and_unescape_value(r.decoder())?);
+                    black_box(attr?.decoded_and_normalized_value(r.decoder())?);
                 }
             }
             (resolved_ns, Event::Text(e)) => {
@@ -121,7 +118,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
     Ok(())
 }
 
-// TODO: use fully normalized attribute values
 fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
     let mut r = NsReader::from_reader(doc);
     let mut buf = Vec::new();
@@ -130,7 +126,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
             (resolved_ns, Event::Start(e) | Event::Empty(e)) => {
                 black_box(resolved_ns);
                 for attr in e.attributes() {
-                    black_box(attr?.decode_and_unescape_value(r.decoder())?);
+                    black_box(attr?.decoded_and_normalized_value(r.decoder())?);
                 }
             }
             (resolved_ns, Event::Text(e)) => {

diff --git a/benches/microbenches.rs b/benches/microbenches.rs
@@ -243,6 +243,50 @@ fn attributes(c: &mut Criterion) {
             assert_eq!(count, 150);
         })
     });
+
+    group.finish();
+}
+
+/// Benchmarks normalizing attribute values
+fn attribute_value_normalization(c: &mut Criterion) {
+    let mut group = c.benchmark_group("attribute_value_normalization");
+
+    group.bench_function("noop_short", |b| {
+        b.iter(|| {
+            black_box(unescape("foobar")).unwrap();
+        })
+    });
+
+    group.bench_function("noop_long", |b| {
+        b.iter(|| {
+            black_box(unescape("just a bit of text without any entities")).unwrap();
+        })
+    });
+
+    group.bench_function("replacement_chars", |b| {
+        b.iter(|| {
+            black_box(unescape("just a bit\n of text without\tany entities")).unwrap();
+        })
+    });
+
+    group.bench_function("char_reference", |b| {
+        b.iter(|| {
+            let text = "prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;";
+            black_box(unescape(text)).unwrap();
+            let text = "&#38;&#60;";
+            black_box(unescape(text)).unwrap();
+        })
+    });
+
+    group.bench_function("entity_reference", |b| {
+        b.iter(|| {
+            let text = "age &gt; 72 &amp;&amp; age &lt; 21";
+            black_box(unescape(text)).unwrap();
+            let text = "&quot;what&apos;s that?&quot;";
+            black_box(unescape(text)).unwrap();
+        })
+    });
+
     group.finish();
 }
 
@@ -355,6 +399,7 @@ criterion_group!(
     read_resolved_event_into,
     one_event,
     attributes,
+    attribute_value_normalization,
     escaping,
     unescaping,
 );

diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs
@@ -154,7 +154,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         let label = attrs.next().unwrap()?;
         assert_eq!(label.key, QName(b"label"));
         assert_eq!(
-            label.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
+            label
+                .decoded_and_normalized_value_with(reader.decoder(), 9, |e| reader.get_entity(e))?,
             "Message: hello world"
         );
 
@@ -185,7 +186,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         let attr = attrs.next().unwrap()?;
         assert_eq!(attr.key, QName(b"attr"));
         assert_eq!(
-            attr.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
+            attr.decoded_and_normalized_value_with(reader.decoder(), 9, |e| reader.get_entity(e))?,
             "Message: hello world"
         );
 

diff --git a/examples/read_nodes.rs b/examples/read_nodes.rs
@@ -70,8 +70,8 @@ impl Translation {
         for attr_result in element.attributes() {
             let a = attr_result?;
             match a.key.as_ref() {
-                b"Language" => lang = a.decode_and_unescape_value(reader.decoder())?,
-                b"Tag" => tag = a.decode_and_unescape_value(reader.decoder())?,
+                b"Language" => lang = a.decoded_and_normalized_value(reader.decoder())?,
+                b"Tag" => tag = a.decoded_and_normalized_value(reader.decoder())?,
                 _ => (),
             }
         }
@@ -141,7 +141,7 @@ fn main() -> Result<(), AppError> {
                                             Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
                                         })
                                         .unwrap().to_string();
-                                    let value = a.decode_and_unescape_value(reader.decoder()).or_else(|err| {
+                                    let value = a.decoded_and_normalized_value(reader.decoder()).or_else(|err| {
                                             dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err);
                                             Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
                                     }).unwrap().to_string();

diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs
@@ -34,7 +34,7 @@ where
                 debug_format!(e.name());
                 for a in e.attributes() {
                     debug_format!(a);
-                    if a.ok().map_or(false, |a| a.unescape_value().is_err()) {
+                    if a.ok().map_or(false, |a| a.normalized_value().is_err()) {
                         break;
                     }
                 }

diff --git a/src/encoding.rs b/src/encoding.rs
@@ -6,6 +6,8 @@ use std::str::Utf8Error;
 #[cfg(feature = "encoding")]
 use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8};
 
+use crate::escape::normalize_eols;
+
 /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8.
 /// See <https://unicode.org/faq/utf_bom.html#bom1>
 pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
@@ -145,9 +147,20 @@ impl Decoder {
         bytes: &Cow<'b, [u8]>,
     ) -> Result<Cow<'b, str>, EncodingError> {
         match bytes {
-            Cow::Borrowed(bytes) => self.decode(bytes),
+            Cow::Borrowed(bytes) => {
+                let text = self.decode(bytes)?;
+                match normalize_eols(&text) {
+                    // If text borrowed after normalization that means that it's not changed
+                    Cow::Borrowed(_) => Ok(text),
+                    Cow::Owned(s) => Ok(Cow::Owned(s)),
+                }
+            }
             // Convert to owned, because otherwise Cow will be bound with wrong lifetime
-            Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
+            Cow::Owned(bytes) => {
+                let text = self.decode(bytes)?;
+                let text = normalize_eols(&text);
+                Ok(text.into_owned().into())
+            }
         }
     }
 }