1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
use std::borrow::Cow;
/// Escapes an `&str` and replaces all xml special characters (`<`, `>`, `&`, `'`, `"`)
/// with their corresponding xml escaped value.
///
/// This function performs following replacements:
///
/// | Character | Replacement
/// |-----------|------------
/// | `<` | `<`
/// | `>` | `>`
/// | `&` | `&`
/// | `'` | `'`
/// | `"` | `"`
pub fn attribute(raw: &[u8]) -> Cow<[u8]> {
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
}
/// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`)
/// with their corresponding xml escaped value.
///
/// Should only be used for escaping text content. In XML text content, it is allowed
/// (though not recommended) to leave the quote special characters `"` and `'` unescaped.
///
/// This function performs following replacements:
///
/// | Character | Replacement
/// |-----------|------------
/// | `<` | `<`
/// | `>` | `>`
/// | `&` | `&`
pub fn text(raw: &[u8]) -> Cow<[u8]> {
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
}
/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
/// `&`, `'`, `"`) with their corresponding xml escaped value.
pub(crate) fn _escape<F: Fn(u8) -> bool>(bytes: &[u8], escape_chars: F) -> Cow<[u8]> {
let mut escaped = None;
let mut iter = bytes.iter();
let mut pos = 0;
while let Some(i) = iter.position(|&b| escape_chars(b)) {
if escaped.is_none() {
escaped = Some(Vec::with_capacity(bytes.len() + 20));
}
let escaped = escaped.as_mut().expect("initialized");
let new_pos = pos + i;
escaped.extend_from_slice(&bytes[pos..new_pos]);
match bytes[new_pos] {
b'<' => escaped.extend_from_slice(b"<"),
b'>' => escaped.extend_from_slice(b">"),
b'\'' => escaped.extend_from_slice(b"'"),
b'&' => escaped.extend_from_slice(b"&"),
b'"' => escaped.extend_from_slice(b"""),
// This set of escapes handles characters that should be escaped
// in elements of xs:lists, because those characters works as
// delimiters of list elements
b'\t' => escaped.extend_from_slice(b"	"),
b'\n' => escaped.extend_from_slice(b" "),
b'\r' => escaped.extend_from_slice(b" "),
b' ' => escaped.extend_from_slice(b" "),
_ => unreachable!("Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped"),
}
pos = new_pos + 1;
}
if let Some(mut escaped) = escaped {
if let Some(raw) = bytes.get(pos..) {
escaped.extend_from_slice(raw);
}
// SAFETY: we operate on UTF-8 input and search for an one byte chars only,
// so all slices that was put to the `escaped` is a valid UTF-8 encoded strings
// TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }`
// if unsafe code will be allowed
Cow::Owned(escaped)
} else {
Cow::Borrowed(bytes)
}
}