rstml_component/fmt/
escape.rs

1use std::borrow::Cow;
2
3/// Escapes an `&str` and replaces all xml special characters (`<`, `>`, `&`, `'`, `"`)
4/// with their corresponding xml escaped value.
5///
6/// This function performs following replacements:
7///
8/// | Character | Replacement
9/// |-----------|------------
10/// | `<`       | `&lt;`
11/// | `>`       | `&gt;`
12/// | `&`       | `&amp;`
13/// | `'`       | `&apos;`
14/// | `"`       | `&quot;`
15pub fn attribute(raw: &[u8]) -> Cow<'_, [u8]> {
16	_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
17}
18
19/// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`)
20/// with their corresponding xml escaped value.
21///
22/// Should only be used for escaping text content. In XML text content, it is allowed
23/// (though not recommended) to leave the quote special characters `"` and `'` unescaped.
24///
25/// This function performs following replacements:
26///
27/// | Character | Replacement
28/// |-----------|------------
29/// | `<`       | `&lt;`
30/// | `>`       | `&gt;`
31/// | `&`       | `&amp;`
32pub fn text(raw: &[u8]) -> Cow<'_, [u8]> {
33	_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
34}
35
36/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
37/// `&`, `'`, `"`) with their corresponding xml escaped value.
38pub(crate) fn _escape<F: Fn(u8) -> bool>(bytes: &[u8], escape_chars: F) -> Cow<'_, [u8]> {
39	let mut escaped = None;
40	let mut iter = bytes.iter();
41	let mut pos = 0;
42
43	while let Some(i) = iter.position(|&b| escape_chars(b)) {
44		if escaped.is_none() {
45			escaped = Some(Vec::with_capacity(bytes.len() + 20));
46		}
47		let escaped = escaped.as_mut().expect("initialized");
48		let new_pos = pos + i;
49		escaped.extend_from_slice(&bytes[pos..new_pos]);
50		match bytes[new_pos] {
51			b'<' => escaped.extend_from_slice(b"&lt;"),
52			b'>' => escaped.extend_from_slice(b"&gt;"),
53			b'\'' => escaped.extend_from_slice(b"&apos;"),
54			b'&' => escaped.extend_from_slice(b"&amp;"),
55			b'"' => escaped.extend_from_slice(b"&quot;"),
56
57			// This set of escapes handles characters that should be escaped
58			// in elements of xs:lists, because those characters works as
59			// delimiters of list elements
60			b'\t' => escaped.extend_from_slice(b"&#9;"),
61			b'\n' => escaped.extend_from_slice(b"&#10;"),
62			b'\r' => escaped.extend_from_slice(b"&#13;"),
63			b' ' => escaped.extend_from_slice(b"&#32;"),
64			_ => unreachable!("Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped"),
65		}
66		pos = new_pos + 1;
67	}
68
69	if let Some(mut escaped) = escaped {
70		if let Some(raw) = bytes.get(pos..) {
71			escaped.extend_from_slice(raw);
72		}
73		// SAFETY: we operate on UTF-8 input and search for an one byte chars only,
74		// so all slices that was put to the `escaped` is a valid UTF-8 encoded strings
75		// TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }`
76		// if unsafe code will be allowed
77		Cow::Owned(escaped)
78	} else {
79		Cow::Borrowed(bytes)
80	}
81}