Skip to main content

ammonia/
lib.rs

1// Copyright (C) Michael Howell and others
2// this library is released under the same terms as Rust itself.
3
4#![deny(unsafe_code)]
5#![deny(missing_docs)]
6
7//! Ammonia is a whitelist-based HTML sanitization library. It is designed to
8//! prevent cross-site scripting, layout breaking, and clickjacking caused
9//! by untrusted user-provided HTML being mixed into a larger web page.
10//!
11//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do,
12//! so it is extremely resilient to syntactic obfuscation.
13//!
14//! Ammonia parses its input exactly according to the HTML5 specification;
15//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©.
16//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark].
17//!
18//! # Examples
19//!
20//! ```
21//! let result = ammonia::clean(
22//!     "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>"
23//! );
24//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>");
25//! ```
26//!
27//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo"
28//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser"
29
30#[cfg(ammonia_unstable)]
31pub mod rcdom;
32
33#[cfg(not(ammonia_unstable))]
34mod rcdom;
35
36mod style;
37
38use html5ever::interface::Attribute;
39use html5ever::serialize::{serialize, SerializeOpts};
40use html5ever::tree_builder::{NodeOrText, TreeSink};
41use html5ever::{driver as html, local_name, ns, Namespace, QualName};
42use maplit::{hashmap, hashset};
43use std::sync::LazyLock;
44use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
45use std::borrow::{Borrow, Cow};
46use std::cell::Cell;
47use std::cmp::max;
48use std::collections::{HashMap, HashSet};
49use std::fmt::{self, Display};
50use std::io;
51use std::iter::IntoIterator as IntoIter;
52use std::mem;
53use std::rc::Rc;
54use std::str::FromStr;
55use html5ever::tendril::stream::TendrilSink;
56use html5ever::tendril::StrTendril;
57use html5ever::tendril::{format_tendril, ByteTendril};
58pub use url::Url;
59
60use html5ever::buffer_queue::BufferQueue;
61use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
62pub use url;
63
64static AMMONIA: LazyLock<Builder<'static>> = LazyLock::new(Builder::default);
65
66/// Clean HTML with a conservative set of defaults.
67///
68/// * [tags](struct.Builder.html#defaults)
69/// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1)
70/// * [attributes on specific tags](struct.Builder.html#defaults-2)
71/// * [attributes on all tags](struct.Builder.html#defaults-6)
72/// * [url schemes](struct.Builder.html#defaults-7)
73/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8)
74/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9)
75/// * all `class=""` settings are blocked by default
76/// * comments are stripped by default
77/// * no generic attribute prefixes are turned on by default
78/// * no specific tag-attribute-value settings are configured by default
79///
80/// [opener]: https://mathiasbynens.github.io/rel-noopener/
81/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer
82///
83/// # Examples
84///
85///     assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS")
86pub fn clean(src: &str) -> String {
87    AMMONIA.clean(src).to_string()
88}
89
90/// Turn an arbitrary string into unformatted HTML.
91///
92/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`.
93/// It is as strict as possible, encoding every character that has special meaning to the
94/// HTML parser.
95///
96/// # Warnings
97///
98/// This function cannot be used to package strings into a `<script>` or `<style>` tag;
99/// you need a JavaScript or CSS escaper to do that.
100///
101///     // DO NOT DO THIS
102///     # use ammonia::clean_text;
103///     let untrusted = "Robert\"); abuse();//";
104///     let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted));
105///
106/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded.
107/// If you want to build an editor that works the way most folks expect them to, you should put a
108/// newline at the beginning of the tag, like this:
109///
110///     # use ammonia::{Builder, clean_text};
111///     let untrusted = "\n\nhi!";
112///     let mut b = Builder::new();
113///     b.add_tags(&["textarea"]);
114///     // This is the bad version
115///     // The user put two newlines at the beginning, but the first one was removed
116///     let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string();
117///     assert_eq!("<textarea>\nhi!</textarea>", sanitized);
118///     // This is a good version
119///     // The user put two newlines at the beginning, and we add a third one,
120///     // so the result still has two
121///     let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string();
122///     assert_eq!("<textarea>\n\nhi!</textarea>", sanitized);
123///     // This version is also often considered good
124///     // For many applications, leading and trailing whitespace is probably unwanted
125///     let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string();
126///     assert_eq!("<textarea>hi!</textarea>", sanitized);
127///
128/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`.
129/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes
130/// and paragraph contents.
131pub fn clean_text(src: &str) -> String {
132    let mut ret_val = String::with_capacity(max(4, src.len()));
133    for c in src.chars() {
134        let replacement = match c {
135            // this character, when confronted, will start a tag
136            '<' => "&lt;",
137            // in an unquoted attribute, will end the attribute value
138            '>' => "&gt;",
139            // in an attribute surrounded by double quotes, this character will end the attribute value
140            '\"' => "&quot;",
141            // in an attribute surrounded by single quotes, this character will end the attribute value
142            '\'' => "&apos;",
143            // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes
144            '`' => "&grave;",
145            // in an unquoted attribute, this character will end the attribute
146            '/' => "&#47;",
147            // starts an entity reference
148            '&' => "&amp;",
149            // if at the beginning of an unquoted attribute, will get ignored
150            '=' => "&#61;",
151            // will end an unquoted attribute
152            ' ' => "&#32;",
153            '\t' => "&#9;",
154            '\n' => "&#10;",
155            '\x0c' => "&#12;",
156            '\r' => "&#13;",
157            // a spec-compliant browser will perform this replacement anyway, but the middleware might not
158            '\0' => "&#65533;",
159            // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM
160            _ => {
161                ret_val.push(c);
162                continue;
163            }
164        };
165        ret_val.push_str(replacement);
166    }
167    ret_val
168}
169
170/// Determine if a given string contains HTML
171///
172/// This function is parses the full string into HTML and checks if the input contained any
173/// HTML syntax.
174///
175/// # Note
176/// This function will return positively for strings that contain invalid HTML syntax like
177/// `<g>` and even `Vec::<u8>::new()`.
178pub fn is_html(input: &str) -> bool {
179    let santok = SanitizationTokenizer::new();
180    let mut chunk = ByteTendril::new();
181    chunk.push_slice(input.as_bytes());
182    let mut input = BufferQueue::default();
183    input.push_back(chunk.try_reinterpret().unwrap());
184
185    let tok = Tokenizer::new(santok, Default::default());
186    let _ = tok.feed(&mut input);
187    tok.end();
188    tok.sink.was_sanitized.get()
189}
190
191#[derive(Clone)]
192struct SanitizationTokenizer {
193    was_sanitized: Cell<bool>,
194}
195
196impl SanitizationTokenizer {
197    pub fn new() -> SanitizationTokenizer {
198        SanitizationTokenizer {
199            was_sanitized: false.into(),
200        }
201    }
202}
203
204impl TokenSink for SanitizationTokenizer {
205    type Handle = ();
206    fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
207        match token {
208            Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
209            _ => {
210                self.was_sanitized.set(true);
211            }
212        }
213        TokenSinkResult::Continue
214    }
215    fn end(&self) {}
216}
217
218/// An HTML sanitizer.
219///
220/// Given a fragment of HTML, Ammonia will parse it according to the HTML5
221/// parsing algorithm and sanitize any disallowed tags or attributes. This
222/// algorithm also takes care of things like unclosed and (some) misnested
223/// tags.
224///
225/// # Examples
226///
227///     use ammonia::{Builder, UrlRelative};
228///
229///     let a = Builder::default()
230///         .link_rel(None)
231///         .url_relative(UrlRelative::PassThrough)
232///         .clean("<a href=/>test")
233///         .to_string();
234///     assert_eq!(
235///         a,
236///         "<a href=\"/\">test</a>");
237///
238/// # Panics
239///
240/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is
241/// configured with any of these (contradictory) settings:
242///
243///  * The `rel` attribute is added to [`generic_attributes`] or the
244///    [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`.
245///
246///    For example, this is going to panic, since [`link_rel`] is set  to
247///    `Some("noopener noreferrer")` by default,
248///    and it makes no sense to simultaneously say that the user is allowed to
249///    set their own `rel` attribute while saying that every link shall be set to
250///    a particular value:
251///
252///    ```should_panic
253///    use ammonia::Builder;
254///    use maplit::hashset;
255///
256///    # fn main() {
257///    Builder::default()
258///        .generic_attributes(hashset!["rel"])
259///        .clean("");
260///    # }
261///    ```
262///
263///    This, however, is perfectly valid:
264///
265///    ```
266///    use ammonia::Builder;
267///    use maplit::hashset;
268///
269///    # fn main() {
270///    Builder::default()
271///        .generic_attributes(hashset!["rel"])
272///        .link_rel(None)
273///        .clean("");
274///    # }
275///    ```
276///
277///  * The `class` attribute is in [`allowed_classes`] and is in the
278///    corresponding [`tag_attributes`] or in [`generic_attributes`].
279///
280///    This is done both to line up with the treatment of `rel`,
281///    and to prevent people from accidentally allowing arbitrary
282///    classes on a particular element.
283///
284///    This will panic:
285///
286///    ```should_panic
287///    use ammonia::Builder;
288///    use maplit::{hashmap, hashset};
289///
290///    # fn main() {
291///    Builder::default()
292///        .generic_attributes(hashset!["class"])
293///        .allowed_classes(hashmap!["span" => hashset!["hidden"]])
294///        .clean("");
295///    # }
296///    ```
297///
298///    This, however, is perfectly valid:
299///
300///    ```
301///    use ammonia::Builder;
302///    use maplit::{hashmap, hashset};
303///
304///    # fn main() {
305///    Builder::default()
306///        .allowed_classes(hashmap!["span" => hashset!["hidden"]])
307///        .clean("");
308///    # }
309///    ```
310///
311///  * A tag is in either [`tags`] or [`tag_attributes`] while also
312///    being in [`clean_content_tags`].
313///
314///    Both [`tags`] and [`tag_attributes`] are whitelists but
315///    [`clean_content_tags`] is a blacklist, so it doesn't make sense
316///    to have the same tag in both.
317///
318///    For example, this will panic, since the `aside` tag is in
319///    [`tags`] by default:
320///
321///    ```should_panic
322///    use ammonia::Builder;
323///    use maplit::hashset;
324///
325///    # fn main() {
326///    Builder::default()
327///        .clean_content_tags(hashset!["aside"])
328///        .clean("");
329///    # }
330///    ```
331///
332///    This, however, is valid:
333///
334///    ```
335///    use ammonia::Builder;
336///    use maplit::hashset;
337///
338///    # fn main() {
339///    Builder::default()
340///        .rm_tags(&["aside"])
341///        .clean_content_tags(hashset!["aside"])
342///        .clean("");
343///    # }
344///    ```
345///
346/// [`clean`]: #method.clean
347/// [`clean_from_reader`]: #method.clean_from_reader
348/// [`generic_attributes`]: #method.generic_attributes
349/// [`tag_attributes`]: #method.tag_attributes
350/// [`generic_attributes`]: #method.generic_attributes
351/// [`link_rel`]: #method.link_rel
352/// [`allowed_classes`]: #method.allowed_classes
353/// [`id_prefix`]: #method.id_prefix
354/// [`tags`]: #method.tags
355/// [`clean_content_tags`]: #method.clean_content_tags
356#[derive(Debug)]
357pub struct Builder<'a> {
358    tags: HashSet<&'a str>,
359    clean_content_tags: HashSet<&'a str>,
360    tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
361    tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
362    set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
363    generic_attributes: HashSet<&'a str>,
364    url_schemes: HashSet<&'a str>,
365    url_relative: UrlRelative<'a>,
366    attribute_filter: Option<Box<dyn AttributeFilter>>,
367    link_rel: Option<&'a str>,
368    allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
369    strip_comments: bool,
370    id_prefix: Option<&'a str>,
371    generic_attribute_prefixes: Option<HashSet<&'a str>>,
372    style_properties: Option<HashSet<&'a str>>,
373}
374
375impl<'a> Default for Builder<'a> {
376    fn default() -> Self {
377        #[rustfmt::skip]
378        let tags = hashset![
379            "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
380            "bdo", "blockquote", "br", "caption", "center", "cite", "code",
381            "col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
382            "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
383            "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
384            "ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
385            "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
386            "strike", "strong", "sub", "summary", "sup", "table", "tbody",
387            "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
388        ];
389        let clean_content_tags = hashset!["script", "style"];
390        let generic_attributes = hashset!["lang", "title"];
391        let tag_attributes = hashmap![
392            "a" => hashset![
393                "href", "hreflang"
394            ],
395            "bdo" => hashset![
396                "dir"
397            ],
398            "blockquote" => hashset![
399                "cite"
400            ],
401            "col" => hashset![
402                "align", "char", "charoff", "span"
403            ],
404            "colgroup" => hashset![
405                "align", "char", "charoff", "span"
406            ],
407            "del" => hashset![
408                "cite", "datetime"
409            ],
410            "hr" => hashset![
411                "align", "size", "width"
412            ],
413            "img" => hashset![
414                "align", "alt", "height", "src", "width"
415            ],
416            "ins" => hashset![
417                "cite", "datetime"
418            ],
419            "ol" => hashset![
420                "start"
421            ],
422            "q" => hashset![
423                "cite"
424            ],
425            "table" => hashset![
426                "align", "char", "charoff", "summary"
427            ],
428            "tbody" => hashset![
429                "align", "char", "charoff"
430            ],
431            "td" => hashset![
432                "align", "char", "charoff", "colspan", "headers", "rowspan"
433            ],
434            "tfoot" => hashset![
435                "align", "char", "charoff"
436            ],
437            "th" => hashset![
438                "align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
439            ],
440            "thead" => hashset![
441                "align", "char", "charoff"
442            ],
443            "tr" => hashset![
444                "align", "char", "charoff"
445            ],
446        ];
447        let tag_attribute_values = hashmap![];
448        let set_tag_attribute_values = hashmap![];
449        let url_schemes = hashset![
450            "bitcoin",
451            "ftp",
452            "ftps",
453            "geo",
454            "http",
455            "https",
456            "im",
457            "irc",
458            "ircs",
459            "magnet",
460            "mailto",
461            "mms",
462            "mx",
463            "news",
464            "nntp",
465            "openpgp4fpr",
466            "sip",
467            "sms",
468            "smsto",
469            "ssh",
470            "tel",
471            "url",
472            "webcal",
473            "wtai",
474            "xmpp"
475        ];
476        let allowed_classes = hashmap![];
477
478        Builder {
479            tags,
480            clean_content_tags,
481            tag_attributes,
482            tag_attribute_values,
483            set_tag_attribute_values,
484            generic_attributes,
485            url_schemes,
486            url_relative: UrlRelative::PassThrough,
487            attribute_filter: None,
488            link_rel: Some("noopener noreferrer"),
489            allowed_classes,
490            strip_comments: true,
491            id_prefix: None,
492            generic_attribute_prefixes: None,
493            style_properties: None,
494        }
495    }
496}
497
498impl<'a> Builder<'a> {
499    /// Sets the tags that are allowed.
500    ///
501    /// Note that the document-level tags `<html>`, `<head>`, and `<body>` cannot
502    /// be allowed here. Ammonia parses its input as a fragment (as if it were
503    /// the contents of a `<div>`), so these tags are stripped by the parser
504    /// before they reach the sanitizer.
505    ///
506    /// # Examples
507    ///
508    ///     use ammonia::Builder;
509    ///     use maplit::hashset;
510    ///
511    ///     # fn main() {
512    ///     let tags = hashset!["my-tag"];
513    ///     let a = Builder::new()
514    ///         .tags(tags)
515    ///         .clean("<my-tag>")
516    ///         .to_string();
517    ///     assert_eq!(a, "<my-tag></my-tag>");
518    ///     # }
519    ///
520    /// # Defaults
521    ///
522    /// ```notest
523    /// a, abbr, acronym, area, article, aside, b, bdi,
524    /// bdo, blockquote, br, caption, center, cite, code,
525    /// col, colgroup, data, dd, del, details, dfn, div,
526    /// dl, dt, em, figcaption, figure, footer, h1, h2,
527    /// h3, h4, h5, h6, header, hgroup, hr, i, img,
528    /// ins, kbd, li, map, mark, nav, ol, p, pre,
529    /// q, rp, rt, rtc, ruby, s, samp, small, span,
530    /// strike, strong, sub, summary, sup, table, tbody,
531    /// td, th, thead, time, tr, tt, u, ul, var, wbr
532    /// ```
533    pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
534        self.tags = value;
535        self
536    }
537
538    /// Add additonal whitelisted tags without overwriting old ones.
539    ///
540    /// Does nothing if the tag is already there.
541    ///
542    /// # Examples
543    ///
544    ///     let a = ammonia::Builder::default()
545    ///         .add_tags(&["my-tag"])
546    ///         .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
547    ///     assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a);
548    pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
549        &mut self,
550        it: I,
551    ) -> &mut Self {
552        self.tags.extend(it.into_iter().map(Borrow::borrow));
553        self
554    }
555
556    /// Remove already-whitelisted tags.
557    ///
558    /// Does nothing if the tags is already gone.
559    ///
560    /// # Examples
561    ///
562    ///     let a = ammonia::Builder::default()
563    ///         .rm_tags(&["span"])
564    ///         .clean("<span></span>").to_string();
565    ///     assert_eq!("", a);
566    pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
567        &mut self,
568        it: I,
569    ) -> &mut Self {
570        for i in it {
571            self.tags.remove(i.borrow());
572        }
573        self
574    }
575
576    /// Returns a copy of the set of whitelisted tags.
577    ///
578    /// # Examples
579    ///
580    ///     use maplit::hashset;
581    ///
582    ///     let tags = hashset!["my-tag-1", "my-tag-2"];
583    ///
584    ///     let mut b = ammonia::Builder::default();
585    ///     b.tags(Clone::clone(&tags));
586    ///     assert_eq!(tags, b.clone_tags());
587    pub fn clone_tags(&self) -> HashSet<&'a str> {
588        self.tags.clone()
589    }
590
591    /// Sets the tags whose contents will be completely removed from the output.
592    ///
593    /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
594    /// a panic.
595    ///
596    /// # Examples
597    ///
598    ///     use ammonia::Builder;
599    ///     use maplit::hashset;
600    ///
601    ///     # fn main() {
602    ///     let tag_blacklist = hashset!["script", "style"];
603    ///     let a = Builder::new()
604    ///         .clean_content_tags(tag_blacklist)
605    ///         .clean("<script>alert('hello')</script><style>a { background: #fff }</style>")
606    ///         .to_string();
607    ///     assert_eq!(a, "");
608    ///     # }
609    ///
610    /// # Defaults
611    ///
612    /// ```notest
613    /// script, style
614    /// ```
615    pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
616        self.clean_content_tags = value;
617        self
618    }
619
620    /// Add additonal blacklisted clean-content tags without overwriting old ones.
621    ///
622    /// Does nothing if the tag is already there.
623    ///
624    /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
625    /// a panic.
626    ///
627    /// # Examples
628    ///
629    ///     let a = ammonia::Builder::default()
630    ///         .add_clean_content_tags(&["my-tag"])
631    ///         .clean("<my-tag>test</my-tag><span>mess</span>").to_string();
632    ///     assert_eq!("<span>mess</span>", a);
633    pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
634        &mut self,
635        it: I,
636    ) -> &mut Self {
637        self.clean_content_tags
638            .extend(it.into_iter().map(Borrow::borrow));
639        self
640    }
641
642    /// Remove already-blacklisted clean-content tags.
643    ///
644    /// Does nothing if the tags aren't blacklisted.
645    ///
646    /// # Examples
647    ///     use ammonia::Builder;
648    ///     use maplit::hashset;
649    ///
650    ///     # fn main() {
651    ///     let tag_blacklist = hashset!["script"];
652    ///     let a = ammonia::Builder::default()
653    ///         .clean_content_tags(tag_blacklist)
654    ///         .rm_clean_content_tags(&["script"])
655    ///         .clean("<script>XSS</script>").to_string();
656    ///     assert_eq!("XSS", a);
657    ///     # }
658    pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
659        &mut self,
660        it: I,
661    ) -> &mut Self {
662        for i in it {
663            self.clean_content_tags.remove(i.borrow());
664        }
665        self
666    }
667
668    /// Returns a copy of the set of blacklisted clean-content tags.
669    ///
670    /// # Examples
671    ///     # use maplit::hashset;
672    ///
673    ///     let tags = hashset!["my-tag-1", "my-tag-2"];
674    ///
675    ///     let mut b = ammonia::Builder::default();
676    ///     b.clean_content_tags(Clone::clone(&tags));
677    ///     assert_eq!(tags, b.clone_clean_content_tags());
678    pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
679        self.clean_content_tags.clone()
680    }
681
682    /// Sets the HTML attributes that are allowed on specific tags.
683    ///
684    /// The value is structured as a map from tag names to a set of attribute names.
685    ///
686    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
687    ///
688    /// # Examples
689    ///
690    ///     use ammonia::Builder;
691    ///     use maplit::{hashmap, hashset};
692    ///
693    ///     # fn main() {
694    ///     let tags = hashset!["my-tag"];
695    ///     let tag_attributes = hashmap![
696    ///         "my-tag" => hashset!["val"]
697    ///     ];
698    ///     let a = Builder::new().tags(tags).tag_attributes(tag_attributes)
699    ///         .clean("<my-tag val=1>")
700    ///         .to_string();
701    ///     assert_eq!(a, "<my-tag val=\"1\"></my-tag>");
702    ///     # }
703    ///
704    /// # Defaults
705    ///
706    /// ```notest
707    /// a =>
708    ///     href, hreflang
709    /// bdo =>
710    ///     dir
711    /// blockquote =>
712    ///     cite
713    /// col =>
714    ///     align, char, charoff, span
715    /// colgroup =>
716    ///     align, char, charoff, span
717    /// del =>
718    ///     cite, datetime
719    /// hr =>
720    ///     align, size, width
721    /// img =>
722    ///     align, alt, height, src, width
723    /// ins =>
724    ///     cite, datetime
725    /// ol =>
726    ///     start
727    /// q =>
728    ///     cite
729    /// table =>
730    ///     align, char, charoff, summary
731    /// tbody =>
732    ///     align, char, charoff
733    /// td =>
734    ///     align, char, charoff, colspan, headers, rowspan
735    /// tfoot =>
736    ///     align, char, charoff
737    /// th =>
738    ///     align, char, charoff, colspan, headers, rowspan, scope
739    /// thead =>
740    ///     align, char, charoff
741    /// tr =>
742    ///     align, char, charoff
743    /// ```
744    pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
745        self.tag_attributes = value;
746        self
747    }
748
749    /// Add additonal whitelisted tag-specific attributes without overwriting old ones.
750    ///
751    /// # Examples
752    ///
753    ///     let a = ammonia::Builder::default()
754    ///         .add_tags(&["my-tag"])
755    ///         .add_tag_attributes("my-tag", &["my-attr"])
756    ///         .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
757    ///     assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
758    pub fn add_tag_attributes<
759        T: 'a + ?Sized + Borrow<str>,
760        U: 'a + ?Sized + Borrow<str>,
761        I: IntoIter<Item = &'a T>,
762    >(
763        &mut self,
764        tag: &'a U,
765        it: I,
766    ) -> &mut Self {
767        self.tag_attributes
768            .entry(tag.borrow())
769            .or_default()
770            .extend(it.into_iter().map(Borrow::borrow));
771        self
772    }
773
774    /// Remove already-whitelisted tag-specific attributes.
775    ///
776    /// Does nothing if the attribute is already gone.
777    ///
778    /// # Examples
779    ///
780    ///     let a = ammonia::Builder::default()
781    ///         .rm_tag_attributes("a", &["href"])
782    ///         .clean("<a href=\"/\"></a>").to_string();
783    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
784    pub fn rm_tag_attributes<
785        'b,
786        'c,
787        T: 'b + ?Sized + Borrow<str>,
788        U: 'c + ?Sized + Borrow<str>,
789        I: IntoIter<Item = &'b T>,
790    >(
791        &mut self,
792        tag: &'c U,
793        it: I,
794    ) -> &mut Self {
795        if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
796            for i in it {
797                tag.remove(i.borrow());
798            }
799        }
800        self
801    }
802
803    /// Returns a copy of the set of whitelisted tag-specific attributes.
804    ///
805    /// # Examples
806    ///     use maplit::{hashmap, hashset};
807    ///
808    ///     let tag_attributes = hashmap![
809    ///         "my-tag" => hashset!["my-attr-1", "my-attr-2"]
810    ///     ];
811    ///
812    ///     let mut b = ammonia::Builder::default();
813    ///     b.tag_attributes(Clone::clone(&tag_attributes));
814    ///     assert_eq!(tag_attributes, b.clone_tag_attributes());
815    pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
816        self.tag_attributes.clone()
817    }
818
819    /// Sets the values of HTML attributes that are allowed on specific tags.
820    ///
821    /// The value is structured as a map from tag names to a map from attribute names to a set of
822    /// attribute values.
823    ///
824    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
825    ///
826    /// # Examples
827    ///
828    ///     use ammonia::Builder;
829    ///     use maplit::{hashmap, hashset};
830    ///
831    ///     # fn main() {
832    ///     let tags = hashset!["my-tag"];
833    ///     let tag_attribute_values = hashmap![
834    ///         "my-tag" => hashmap![
835    ///             "my-attr" => hashset!["val"],
836    ///         ],
837    ///     ];
838    ///     let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values)
839    ///         .clean("<my-tag my-attr=val>")
840    ///         .to_string();
841    ///     assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
842    ///     # }
843    ///
844    /// # Defaults
845    ///
846    /// None.
847    pub fn tag_attribute_values(
848        &mut self,
849        value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
850    ) -> &mut Self {
851        self.tag_attribute_values = value;
852        self
853    }
854
855    /// Add additonal whitelisted tag-specific attribute values without overwriting old ones.
856    ///
857    /// # Examples
858    ///
859    ///     let a = ammonia::Builder::default()
860    ///         .add_tags(&["my-tag"])
861    ///         .add_tag_attribute_values("my-tag", "my-attr", &[""])
862    ///         .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
863    ///     assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
864    pub fn add_tag_attribute_values<
865        T: 'a + ?Sized + Borrow<str>,
866        U: 'a + ?Sized + Borrow<str>,
867        V: 'a + ?Sized + Borrow<str>,
868        I: IntoIter<Item = &'a T>,
869    >(
870        &mut self,
871        tag: &'a U,
872        attribute: &'a V,
873        it: I,
874    ) -> &mut Self {
875        self.tag_attribute_values
876            .entry(tag.borrow())
877            .or_default()
878            .entry(attribute.borrow())
879            .or_default()
880            .extend(it.into_iter().map(Borrow::borrow));
881
882        self
883    }
884
885    /// Remove already-whitelisted tag-specific attribute values.
886    ///
887    /// Does nothing if the attribute or the value is already gone.
888    ///
889    /// # Examples
890    ///
891    ///     let a = ammonia::Builder::default()
892    ///         .rm_tag_attributes("a", &["href"])
893    ///         .add_tag_attribute_values("a", "href", &["/"])
894    ///         .rm_tag_attribute_values("a", "href", &["/"])
895    ///         .clean("<a href=\"/\"></a>").to_string();
896    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
897    pub fn rm_tag_attribute_values<
898        'b,
899        'c,
900        T: 'b + ?Sized + Borrow<str>,
901        U: 'c + ?Sized + Borrow<str>,
902        V: 'c + ?Sized + Borrow<str>,
903        I: IntoIter<Item = &'b T>,
904    >(
905        &mut self,
906        tag: &'c U,
907        attribute: &'c V,
908        it: I,
909    ) -> &mut Self {
910        if let Some(attrs) = self
911            .tag_attribute_values
912            .get_mut(tag.borrow())
913            .and_then(|map| map.get_mut(attribute.borrow()))
914        {
915            for i in it {
916                attrs.remove(i.borrow());
917            }
918        }
919        self
920    }
921
922    /// Returns a copy of the set of whitelisted tag-specific attribute values.
923    ///
924    /// # Examples
925    ///
926    ///     use maplit::{hashmap, hashset};
927    ///
928    ///     let attribute_values = hashmap![
929    ///         "my-attr-1" => hashset!["foo"],
930    ///         "my-attr-2" => hashset!["baz", "bar"],
931    ///     ];
932    ///     let tag_attribute_values = hashmap![
933    ///         "my-tag" => attribute_values
934    ///     ];
935    ///
936    ///     let mut b = ammonia::Builder::default();
937    ///     b.tag_attribute_values(Clone::clone(&tag_attribute_values));
938    ///     assert_eq!(tag_attribute_values, b.clone_tag_attribute_values());
939    pub fn clone_tag_attribute_values(
940        &self,
941    ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
942        self.tag_attribute_values.clone()
943    }
944
945    /// Sets the values of HTML attributes that are to be set on specific tags.
946    ///
947    /// The value is structured as a map from tag names to a map from attribute names to an
948    /// attribute value.
949    ///
950    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
951    ///
952    /// # Examples
953    ///
954    ///     use ammonia::Builder;
955    ///     use maplit::{hashmap, hashset};
956    ///
957    ///     # fn main() {
958    ///     let tags = hashset!["my-tag"];
959    ///     let set_tag_attribute_values = hashmap![
960    ///         "my-tag" => hashmap![
961    ///             "my-attr" => "val",
962    ///         ],
963    ///     ];
964    ///     let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values)
965    ///         .clean("<my-tag>")
966    ///         .to_string();
967    ///     assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
968    ///     # }
969    ///
970    /// # Defaults
971    ///
972    /// None.
973    pub fn set_tag_attribute_values(
974        &mut self,
975        value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
976    ) -> &mut Self {
977        self.set_tag_attribute_values = value;
978        self
979    }
980
981    /// Add an attribute value to set on a specific element.
982    ///
983    /// # Examples
984    ///
985    ///     let a = ammonia::Builder::default()
986    ///         .add_tags(&["my-tag"])
987    ///         .set_tag_attribute_value("my-tag", "my-attr", "val")
988    ///         .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
989    ///     assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a);
990    pub fn set_tag_attribute_value<
991        T: 'a + ?Sized + Borrow<str>,
992        A: 'a + ?Sized + Borrow<str>,
993        V: 'a + ?Sized + Borrow<str>,
994    >(
995        &mut self,
996        tag: &'a T,
997        attribute: &'a A,
998        value: &'a V,
999    ) -> &mut Self {
1000        self.set_tag_attribute_values
1001            .entry(tag.borrow())
1002            .or_default()
1003            .insert(attribute.borrow(), value.borrow());
1004        self
1005    }
1006
1007    /// Remove existing tag-specific attribute values to be set.
1008    ///
1009    /// Does nothing if the attribute is already gone.
1010    ///
1011    /// # Examples
1012    ///
1013    ///     let a = ammonia::Builder::default()
1014    ///         // this does nothing, since no value is set for this tag attribute yet
1015    ///         .rm_set_tag_attribute_value("a", "target")
1016    ///         .set_tag_attribute_value("a", "target", "_blank")
1017    ///         .rm_set_tag_attribute_value("a", "target")
1018    ///         .clean("<a href=\"/\"></a>").to_string();
1019    ///     assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a);
1020    pub fn rm_set_tag_attribute_value<
1021        T: 'a + ?Sized + Borrow<str>,
1022        A: 'a + ?Sized + Borrow<str>,
1023    >(
1024        &mut self,
1025        tag: &'a T,
1026        attribute: &'a A,
1027    ) -> &mut Self {
1028        if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
1029            attributes.remove(attribute.borrow());
1030        }
1031        self
1032    }
1033
1034    /// Returns the value that will be set for the attribute on the element, if any.
1035    ///
1036    /// # Examples
1037    ///
1038    ///     let mut b = ammonia::Builder::default();
1039    ///     b.set_tag_attribute_value("a", "target", "_blank");
1040    ///     let value = b.get_set_tag_attribute_value("a", "target");
1041    ///     assert_eq!(value, Some("_blank"));
1042    pub fn get_set_tag_attribute_value<
1043        T: 'a + ?Sized + Borrow<str>,
1044        A: 'a + ?Sized + Borrow<str>,
1045    >(
1046        &self,
1047        tag: &'a T,
1048        attribute: &'a A,
1049    ) -> Option<&'a str> {
1050        self.set_tag_attribute_values
1051            .get(tag.borrow())
1052            .and_then(|map| map.get(attribute.borrow()))
1053            .copied()
1054    }
1055
1056    /// Returns a copy of the set of tag-specific attribute values to be set.
1057    ///
1058    /// # Examples
1059    ///
1060    ///     use maplit::{hashmap, hashset};
1061    ///
1062    ///     let attribute_values = hashmap![
1063    ///         "my-attr-1" => "foo",
1064    ///         "my-attr-2" => "bar",
1065    ///     ];
1066    ///     let set_tag_attribute_values = hashmap![
1067    ///         "my-tag" => attribute_values,
1068    ///     ];
1069    ///
1070    ///     let mut b = ammonia::Builder::default();
1071    ///     b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values));
1072    ///     assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values());
1073    pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
1074        self.set_tag_attribute_values.clone()
1075    }
1076
1077    /// Sets the prefix of attributes that are allowed on any tag.
1078    ///
1079    /// # Examples
1080    ///
1081    ///     use ammonia::Builder;
1082    ///     use maplit::hashset;
1083    ///
1084    ///     # fn main() {
1085    ///     let prefixes = hashset!["data-"];
1086    ///     let a = Builder::new()
1087    ///         .generic_attribute_prefixes(prefixes)
1088    ///         .clean("<b data-val=1>")
1089    ///         .to_string();
1090    ///     assert_eq!(a, "<b data-val=\"1\"></b>");
1091    ///     # }
1092    ///
1093    /// # Defaults
1094    ///
1095    /// No attribute prefixes are allowed by default.
1096    pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1097        self.generic_attribute_prefixes = Some(value);
1098        self
1099    }
1100
1101    /// Add additional whitelisted attribute prefix without overwriting old ones.
1102    ///
1103    /// # Examples
1104    ///
1105    ///     let a = ammonia::Builder::default()
1106    ///         .add_generic_attribute_prefixes(&["my-"])
1107    ///         .clean("<span my-attr>mess</span>").to_string();
1108    ///     assert_eq!("<span my-attr=\"\">mess</span>", a);
1109    pub fn add_generic_attribute_prefixes<
1110        T: 'a + ?Sized + Borrow<str>,
1111        I: IntoIter<Item = &'a T>,
1112    >(
1113        &mut self,
1114        it: I,
1115    ) -> &mut Self {
1116        self.generic_attribute_prefixes
1117            .get_or_insert_with(HashSet::new)
1118            .extend(it.into_iter().map(Borrow::borrow));
1119        self
1120    }
1121
1122    /// Remove already-whitelisted attribute prefixes.
1123    ///
1124    /// Does nothing if the attribute prefix is already gone.
1125    ///
1126    /// # Examples
1127    ///
1128    ///     let a = ammonia::Builder::default()
1129    ///         .add_generic_attribute_prefixes(&["data-", "code-"])
1130    ///         .rm_generic_attribute_prefixes(&["data-"])
1131    ///         .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string();
1132    ///     assert_eq!("<span code-test=\"foo\"></span>", a);
1133    pub fn rm_generic_attribute_prefixes<
1134        'b,
1135        T: 'b + ?Sized + Borrow<str>,
1136        I: IntoIter<Item = &'b T>,
1137    >(
1138        &mut self,
1139        it: I,
1140    ) -> &mut Self {
1141        if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
1142            for i in it {
1143                let _ = prefixes.remove(i.borrow());
1144            }
1145            prefixes.is_empty()
1146        }) {
1147            self.generic_attribute_prefixes = None;
1148        }
1149        self
1150    }
1151
1152    /// Returns a copy of the set of whitelisted attribute prefixes.
1153    ///
1154    /// # Examples
1155    ///
1156    ///     use maplit::hashset;
1157    ///
1158    ///     let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"];
1159    ///
1160    ///     let mut b = ammonia::Builder::default();
1161    ///     b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes));
1162    ///     assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes());
1163    pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
1164        self.generic_attribute_prefixes.clone()
1165    }
1166
1167    /// Sets the attributes that are allowed on any tag.
1168    ///
1169    /// # Examples
1170    ///
1171    ///     use ammonia::Builder;
1172    ///     use maplit::hashset;
1173    ///
1174    ///     # fn main() {
1175    ///     let attributes = hashset!["data-val"];
1176    ///     let a = Builder::new()
1177    ///         .generic_attributes(attributes)
1178    ///         .clean("<b data-val=1>")
1179    ///         .to_string();
1180    ///     assert_eq!(a, "<b data-val=\"1\"></b>");
1181    ///     # }
1182    ///
1183    /// # Defaults
1184    ///
1185    /// ```notest
1186    /// lang, title
1187    /// ```
1188    pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1189        self.generic_attributes = value;
1190        self
1191    }
1192
1193    /// Add additonal whitelisted attributes without overwriting old ones.
1194    ///
1195    /// # Examples
1196    ///
1197    ///     let a = ammonia::Builder::default()
1198    ///         .add_generic_attributes(&["my-attr"])
1199    ///         .clean("<span my-attr>mess</span>").to_string();
1200    ///     assert_eq!("<span my-attr=\"\">mess</span>", a);
1201    pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1202        &mut self,
1203        it: I,
1204    ) -> &mut Self {
1205        self.generic_attributes
1206            .extend(it.into_iter().map(Borrow::borrow));
1207        self
1208    }
1209
1210    /// Remove already-whitelisted attributes.
1211    ///
1212    /// Does nothing if the attribute is already gone.
1213    ///
1214    /// # Examples
1215    ///
1216    ///     let a = ammonia::Builder::default()
1217    ///         .rm_generic_attributes(&["title"])
1218    ///         .clean("<span title=\"cool\"></span>").to_string();
1219    ///     assert_eq!("<span></span>", a);
1220    pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1221        &mut self,
1222        it: I,
1223    ) -> &mut Self {
1224        for i in it {
1225            self.generic_attributes.remove(i.borrow());
1226        }
1227        self
1228    }
1229
1230    /// Returns a copy of the set of whitelisted attributes.
1231    ///
1232    /// # Examples
1233    ///
1234    ///     use maplit::hashset;
1235    ///
1236    ///     let generic_attributes = hashset!["my-attr-1", "my-attr-2"];
1237    ///
1238    ///     let mut b = ammonia::Builder::default();
1239    ///     b.generic_attributes(Clone::clone(&generic_attributes));
1240    ///     assert_eq!(generic_attributes, b.clone_generic_attributes());
1241    pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
1242        self.generic_attributes.clone()
1243    }
1244
1245    /// Sets the URL schemes permitted on `href` and `src` attributes.
1246    ///
1247    /// # Examples
1248    ///
1249    ///     use ammonia::Builder;
1250    ///     use maplit::hashset;
1251    ///
1252    ///     # fn main() {
1253    ///     let url_schemes = hashset![
1254    ///         "http", "https", "mailto", "magnet"
1255    ///     ];
1256    ///     let a = Builder::new().url_schemes(url_schemes)
1257    ///         .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>")
1258    ///         .to_string();
1259    ///
1260    ///     // See `link_rel` for information on the rel="noopener noreferrer" attribute
1261    ///     // in the cleaned HTML.
1262    ///     assert_eq!(a,
1263    ///       "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&amp;xl=0&amp;dn=zero_len.fil&amp;xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&amp;xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>");
1264    ///     # }
1265    ///
1266    /// # Defaults
1267    ///
1268    /// ```notest
1269    /// bitcoin, ftp, ftps, geo, http, https, im, irc,
1270    /// ircs, magnet, mailto, mms, mx, news, nntp,
1271    /// openpgp4fpr, sip, sms, smsto, ssh, tel, url,
1272    /// webcal, wtai, xmpp
1273    /// ```
1274    pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1275        self.url_schemes = value;
1276        self
1277    }
1278
1279    /// Add additonal whitelisted URL schemes without overwriting old ones.
1280    ///
1281    /// # Examples
1282    ///
1283    ///     let a = ammonia::Builder::default()
1284    ///         .add_url_schemes(&["my-scheme"])
1285    ///         .clean("<a href=my-scheme:home>mess</span>").to_string();
1286    ///     assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a);
1287    pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1288        &mut self,
1289        it: I,
1290    ) -> &mut Self {
1291        self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
1292        self
1293    }
1294
1295    /// Remove already-whitelisted attributes.
1296    ///
1297    /// Does nothing if the attribute is already gone.
1298    ///
1299    /// # Examples
1300    ///
1301    ///     let a = ammonia::Builder::default()
1302    ///         .rm_url_schemes(&["ftp"])
1303    ///         .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string();
1304    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
1305    pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1306        &mut self,
1307        it: I,
1308    ) -> &mut Self {
1309        for i in it {
1310            self.url_schemes.remove(i.borrow());
1311        }
1312        self
1313    }
1314
1315    /// Returns a copy of the set of whitelisted URL schemes.
1316    ///
1317    /// # Examples
1318    ///     use maplit::hashset;
1319    ///
1320    ///     let url_schemes = hashset!["my-scheme-1", "my-scheme-2"];
1321    ///
1322    ///     let mut b = ammonia::Builder::default();
1323    ///     b.url_schemes(Clone::clone(&url_schemes));
1324    ///     assert_eq!(url_schemes, b.clone_url_schemes());
1325    pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
1326        self.url_schemes.clone()
1327    }
1328
1329    /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny.
1330    ///
1331    /// # Examples
1332    ///
1333    ///     use ammonia::{Builder, UrlRelative};
1334    ///
1335    ///     let a = Builder::new().url_relative(UrlRelative::PassThrough)
1336    ///         .clean("<a href=/>Home</a>")
1337    ///         .to_string();
1338    ///
1339    ///     // See `link_rel` for information on the rel="noopener noreferrer" attribute
1340    ///     // in the cleaned HTML.
1341    ///     assert_eq!(
1342    ///       a,
1343    ///       "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>");
1344    ///
1345    /// # Defaults
1346    ///
1347    /// ```notest
1348    /// UrlRelative::PassThrough
1349    /// ```
1350    pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self {
1351        self.url_relative = value;
1352        self
1353    }
1354
1355    /// Allows rewriting of all attributes using a callback.
1356    ///
1357    /// The callback takes name of the element, attribute and its value.
1358    /// Returns `None` to remove the attribute, or a value to use.
1359    ///
1360    /// Rewriting of attributes with URLs is done before `url_relative()`.
1361    ///
1362    /// # Panics
1363    ///
1364    /// If more than one callback is set.
1365    ///
1366    /// # Examples
1367    ///
1368    /// ```rust
1369    /// use ammonia::Builder;
1370    /// let a = Builder::new()
1371    ///     .attribute_filter(|element, attribute, value| {
1372    ///         match (element, attribute) {
1373    ///             ("img", "src") => None,
1374    ///             _ => Some(value.into())
1375    ///         }
1376    ///     })
1377    ///     .link_rel(None)
1378    ///     .clean("<a href=/><img alt=Home src=foo></a>")
1379    ///     .to_string();
1380    /// assert_eq!(a,
1381    ///     r#"<a href="/"><img alt="Home"></a>"#);
1382    /// ```
1383    pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
1384    where
1385        CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
1386    {
1387        assert!(
1388            self.attribute_filter.is_none(),
1389            "attribute_filter can be set only once"
1390        );
1391        self.attribute_filter = Some(Box::new(callback));
1392        self
1393    }
1394
1395    /// Returns `true` if the relative URL resolver is set to `Deny`.
1396    ///
1397    /// # Examples
1398    ///
1399    ///     use ammonia::{Builder, UrlRelative};
1400    ///     let mut a = Builder::default();
1401    ///     a.url_relative(UrlRelative::Deny);
1402    ///     assert!(a.is_url_relative_deny());
1403    ///     a.url_relative(UrlRelative::PassThrough);
1404    ///     assert!(!a.is_url_relative_deny());
1405    pub fn is_url_relative_deny(&self) -> bool {
1406        matches!(self.url_relative, UrlRelative::Deny)
1407    }
1408
1409    /// Returns `true` if the relative URL resolver is set to `PassThrough`.
1410    ///
1411    /// # Examples
1412    ///
1413    ///     use ammonia::{Builder, UrlRelative};
1414    ///     let mut a = Builder::default();
1415    ///     a.url_relative(UrlRelative::Deny);
1416    ///     assert!(!a.is_url_relative_pass_through());
1417    ///     a.url_relative(UrlRelative::PassThrough);
1418    ///     assert!(a.is_url_relative_pass_through());
1419    pub fn is_url_relative_pass_through(&self) -> bool {
1420        matches!(self.url_relative, UrlRelative::PassThrough)
1421    }
1422
1423    /// Returns `true` if the relative URL resolver is set to `Custom`.
1424    ///
1425    /// # Examples
1426    ///
1427    ///     use ammonia::{Builder, UrlRelative};
1428    ///     use std::borrow::Cow;
1429    ///     fn test(a: &str) -> Option<Cow<str>> { None }
1430    ///     # fn main() {
1431    ///     let mut a = Builder::default();
1432    ///     a.url_relative(UrlRelative::Custom(Box::new(test)));
1433    ///     assert!(a.is_url_relative_custom());
1434    ///     a.url_relative(UrlRelative::PassThrough);
1435    ///     assert!(!a.is_url_relative_custom());
1436    ///     a.url_relative(UrlRelative::Deny);
1437    ///     assert!(!a.is_url_relative_custom());
1438    ///     # }
1439    pub fn is_url_relative_custom(&self) -> bool {
1440        matches!(self.url_relative, UrlRelative::Custom(_))
1441    }
1442
1443    /// Configures a `rel` attribute that will be added on links.
1444    ///
1445    /// If `rel` is in the generic or tag attributes, this must be set to `None`.
1446    /// Common `rel` values to include:
1447    ///
1448    /// * `noopener`: This prevents [a particular type of XSS attack],
1449    ///   and should usually be turned on for untrusted HTML.
1450    /// * `noreferrer`: This prevents the browser from [sending the source URL]
1451    ///   to the website that is linked to.
1452    /// * `nofollow`: This prevents search engines from [using this link for
1453    ///   ranking], which disincentivizes spammers.
1454    ///
1455    /// To turn on rel-insertion, call this function with a space-separated list.
1456    /// Ammonia does not parse rel-attributes;
1457    /// it just puts the given string into the attribute directly.
1458    ///
1459    /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/
1460    /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer
1461    /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow
1462    ///
1463    /// # Examples
1464    ///
1465    ///     use ammonia::Builder;
1466    ///
1467    ///     let a = Builder::new().link_rel(None)
1468    ///         .clean("<a href=https://rust-lang.org/>Rust</a>")
1469    ///         .to_string();
1470    ///     assert_eq!(
1471    ///       a,
1472    ///       "<a href=\"https://rust-lang.org/\">Rust</a>");
1473    ///
1474    /// # Defaults
1475    ///
1476    /// ```notest
1477    /// Some("noopener noreferrer")
1478    /// ```
1479    pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
1480        self.link_rel = value;
1481        self
1482    }
1483
1484    /// Returns the settings for links' `rel` attribute, if one is set.
1485    ///
1486    /// # Examples
1487    ///
1488    ///     use ammonia::{Builder, UrlRelative};
1489    ///     let mut a = Builder::default();
1490    ///     a.link_rel(Some("a b"));
1491    ///     assert_eq!(a.get_link_rel(), Some("a b"));
1492    pub fn get_link_rel(&self) -> Option<&str> {
1493        self.link_rel
1494    }
1495
1496    /// Sets the CSS classes that are allowed on specific tags.
1497    ///
1498    /// The values is structured as a map from tag names to a set of class names.
1499    ///
1500    /// If the `class` attribute is itself whitelisted for a tag, then adding entries to
1501    /// this map will cause a panic.
1502    ///
1503    /// # Examples
1504    ///
1505    ///     use ammonia::Builder;
1506    ///     use maplit::{hashmap, hashset};
1507    ///
1508    ///     # fn main() {
1509    ///     let allowed_classes = hashmap![
1510    ///         "code" => hashset!["rs", "ex", "c", "cxx", "js"]
1511    ///     ];
1512    ///     let a = Builder::new()
1513    ///         .allowed_classes(allowed_classes)
1514    ///         .clean("<code class=rs>fn main() {}</code>")
1515    ///         .to_string();
1516    ///     assert_eq!(
1517    ///       a,
1518    ///       "<code class=\"rs\">fn main() {}</code>");
1519    ///     # }
1520    ///
1521    /// # Defaults
1522    ///
1523    /// The set of allowed classes is empty by default.
1524    pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
1525        self.allowed_classes = value;
1526        self
1527    }
1528
1529    /// Add additonal whitelisted classes without overwriting old ones.
1530    ///
1531    /// # Examples
1532    ///
1533    ///     let a = ammonia::Builder::default()
1534    ///         .add_allowed_classes("a", &["onebox"])
1535    ///         .clean("<a href=/ class=onebox>mess</span>").to_string();
1536    ///     assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a);
1537    pub fn add_allowed_classes<
1538        T: 'a + ?Sized + Borrow<str>,
1539        U: 'a + ?Sized + Borrow<str>,
1540        I: IntoIter<Item = &'a T>,
1541    >(
1542        &mut self,
1543        tag: &'a U,
1544        it: I,
1545    ) -> &mut Self {
1546        self.allowed_classes
1547            .entry(tag.borrow())
1548            .or_default()
1549            .extend(it.into_iter().map(Borrow::borrow));
1550        self
1551    }
1552
1553    /// Remove already-whitelisted attributes.
1554    ///
1555    /// Does nothing if the attribute is already gone.
1556    ///
1557    /// # Examples
1558    ///
1559    ///     let a = ammonia::Builder::default()
1560    ///         .add_allowed_classes("span", &["active"])
1561    ///         .rm_allowed_classes("span", &["active"])
1562    ///         .clean("<span class=active>").to_string();
1563    ///     assert_eq!("<span class=\"\"></span>", a);
1564    pub fn rm_allowed_classes<
1565        'b,
1566        'c,
1567        T: 'b + ?Sized + Borrow<str>,
1568        U: 'c + ?Sized + Borrow<str>,
1569        I: IntoIter<Item = &'b T>,
1570    >(
1571        &mut self,
1572        tag: &'c U,
1573        it: I,
1574    ) -> &mut Self {
1575        if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
1576            for i in it {
1577                tag.remove(i.borrow());
1578            }
1579        }
1580        self
1581    }
1582
1583    /// Returns a copy of the set of whitelisted class attributes.
1584    ///
1585    /// # Examples
1586    ///
1587    ///     use maplit::{hashmap, hashset};
1588    ///
1589    ///     let allowed_classes = hashmap![
1590    ///         "my-tag" => hashset!["my-class-1", "my-class-2"]
1591    ///     ];
1592    ///
1593    ///     let mut b = ammonia::Builder::default();
1594    ///     b.allowed_classes(Clone::clone(&allowed_classes));
1595    ///     assert_eq!(allowed_classes, b.clone_allowed_classes());
1596    pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
1597        self.allowed_classes.clone()
1598    }
1599
1600    /// Configures the handling of HTML comments.
1601    ///
1602    /// If this option is false, comments will be preserved.
1603    ///
1604    /// # Examples
1605    ///
1606    ///     use ammonia::Builder;
1607    ///
1608    ///     let a = Builder::new().strip_comments(false)
1609    ///         .clean("<!-- yes -->")
1610    ///         .to_string();
1611    ///     assert_eq!(
1612    ///       a,
1613    ///       "<!-- yes -->");
1614    ///
1615    /// # Defaults
1616    ///
1617    /// `true`
1618    pub fn strip_comments(&mut self, value: bool) -> &mut Self {
1619        self.strip_comments = value;
1620        self
1621    }
1622
1623    /// Returns `true` if comment stripping is turned on.
1624    ///
1625    /// # Examples
1626    ///
1627    ///     let mut a = ammonia::Builder::new();
1628    ///     a.strip_comments(true);
1629    ///     assert!(a.will_strip_comments());
1630    ///     a.strip_comments(false);
1631    ///     assert!(!a.will_strip_comments());
1632    pub fn will_strip_comments(&self) -> bool {
1633        self.strip_comments
1634    }
1635
1636    /// Prefixes all "id" attribute values with a given string.  Note that the tag and
1637    /// attribute themselves must still be whitelisted.
1638    ///
1639    /// # Examples
1640    ///
1641    ///     use ammonia::Builder;
1642    ///     use maplit::hashset;
1643    ///
1644    ///     # fn main() {
1645    ///     let attributes = hashset!["id"];
1646    ///     let a = Builder::new()
1647    ///         .generic_attributes(attributes)
1648    ///         .id_prefix(Some("safe-"))
1649    ///         .clean("<b id=42>")
1650    ///         .to_string();
1651    ///     assert_eq!(a, "<b id=\"safe-42\"></b>");
1652    ///     # }
1653
1654    ///
1655    /// # Defaults
1656    ///
1657    /// `None`
1658    pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
1659        self.id_prefix = value;
1660        self
1661    }
1662
1663    /// Only allows the specified properties in `style` attributes.
1664    ///
1665    /// Irrelevant if `style` is not an allowed attribute.
1666    ///
1667    /// Note that if style filtering is enabled style properties will be normalised e.g.
1668    /// invalid declarations and @rules will be removed, with only syntactically valid
1669    /// declarations kept.
1670    ///
1671    /// # Examples
1672    ///
1673    ///     use ammonia::Builder;
1674    ///     use maplit::hashset;
1675    ///
1676    ///     # fn main() {
1677    ///     let attributes = hashset!["style"];
1678    ///     let properties = hashset!["color"];
1679    ///     let a = Builder::new()
1680    ///         .generic_attributes(attributes)
1681    ///         .filter_style_properties(properties)
1682    ///         .clean("<p style=\"font-weight: heavy; color: red\">my html</p>")
1683    ///         .to_string();
1684    ///     assert_eq!(a, "<p style=\"color:red\">my html</p>");
1685    ///     # }
1686    pub fn filter_style_properties(&mut self, value: HashSet<&'a str>) -> &mut Self {
1687        self.style_properties = Some(value);
1688        self
1689    }
1690
1691    /// Constructs a [`Builder`] instance configured with the [default options].
1692    ///
1693    /// # Examples
1694    ///
1695    ///     use ammonia::{Builder, Url, UrlRelative};
1696    ///     # use std::error::Error;
1697    ///
1698    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1699    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1700    ///     let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1701    ///
1702    ///     let result = Builder::new() // <--
1703    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1704    ///         .clean(input)
1705    ///         .to_string();
1706    ///     assert_eq!(result, output);
1707    ///     # Ok(())
1708    ///     # }
1709    ///     # fn main() { do_main().unwrap() }
1710    ///
1711    /// [default options]: fn.clean.html
1712    /// [`Builder`]: struct.Builder.html
1713    pub fn new() -> Self {
1714        Self::default()
1715    }
1716
1717    /// Constructs a [`Builder`] instance configured with no allowed tags.
1718    ///
1719    /// # Examples
1720    ///
1721    ///     use ammonia::{Builder, Url, UrlRelative};
1722    ///     # use std::error::Error;
1723    ///
1724    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1725    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>.";
1726    ///     let output = "This is an Ammonia example using the empty() function.";
1727    ///
1728    ///     let result = Builder::empty() // <--
1729    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1730    ///         .clean(input)
1731    ///         .to_string();
1732    ///     assert_eq!(result, output);
1733    ///     # Ok(())
1734    ///     # }
1735    ///     # fn main() { do_main().unwrap() }
1736    ///
1737    /// [default options]: fn.clean.html
1738    /// [`Builder`]: struct.Builder.html
1739    pub fn empty() -> Self {
1740        Self {
1741            tags: hashset![],
1742            ..Self::default()
1743        }
1744    }
1745
1746    /// Sanitizes an HTML fragment in a string according to the configured options.
1747    ///
1748    /// # Examples
1749    ///
1750    ///     use ammonia::{Builder, Url, UrlRelative};
1751    ///     # use std::error::Error;
1752    ///
1753    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1754    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1755    ///     let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1756    ///
1757    ///     let result = Builder::new()
1758    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1759    ///         .clean(input)
1760    ///         .to_string(); // <--
1761    ///     assert_eq!(result, output);
1762    ///     # Ok(())
1763    ///     # }
1764    ///     # fn main() { do_main().unwrap() }
1765    pub fn clean(&self, src: &str) -> Document {
1766        let parser = Self::make_parser();
1767        let dom = parser.one(src);
1768        self.clean_dom(dom)
1769    }
1770
1771    /// Sanitizes an HTML fragment from a reader according to the configured options.
1772    ///
1773    /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just
1774    /// like when using [`String::from_utf8_lossy`].
1775    ///
1776    /// To avoid consuming the reader, a mutable reference can be passed to this method.
1777    ///
1778    /// # Examples
1779    ///
1780    ///     use ammonia::Builder;
1781    ///     # use std::error::Error;
1782    ///
1783    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1784    ///     let a = Builder::new()
1785    ///         .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b`
1786    ///         .to_string();
1787    ///     assert_eq!(a, "");
1788    ///     # Ok(()) }
1789    ///     # fn main() { do_main().unwrap() }
1790    ///
1791    /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy
1792    pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
1793    where
1794        R: io::Read,
1795    {
1796        let parser = Self::make_parser().from_utf8();
1797        let dom = parser.read_from(&mut src)?;
1798        Ok(self.clean_dom(dom))
1799    }
1800
1801    /// Clean a post-parsing DOM.
1802    ///
1803    /// This is not a public API because RcDom isn't really stable.
1804    /// We want to be able to take breaking changes to html5ever itself
1805    /// without having to break Ammonia's API.
1806    fn clean_dom(&self, dom: RcDom) -> Document {
1807        let mut stack = Vec::new();
1808        let mut removed = Vec::new();
1809        let link_rel = self
1810            .link_rel
1811            .map(|link_rel| format_tendril!("{}", link_rel));
1812        if link_rel.is_some() {
1813            assert!(self.generic_attributes.get("rel").is_none());
1814            assert!(self
1815                .tag_attributes
1816                .get("a")
1817                .and_then(|a| a.get("rel"))
1818                .is_none());
1819        }
1820        assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
1821        for tag_name in self.allowed_classes.keys() {
1822            assert!(self
1823                .tag_attributes
1824                .get(tag_name)
1825                .and_then(|a| a.get("class"))
1826                .is_none());
1827        }
1828        for tag_name in &self.clean_content_tags {
1829            assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
1830            assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
1831        }
1832        let body = {
1833            let children = dom.document.children.borrow();
1834            children[0].clone()
1835        };
1836        stack.extend(
1837            mem::take(&mut *body.children.borrow_mut())
1838                .into_iter()
1839                .rev(),
1840        );
1841        // This design approach is used to prevent pathological content from producing
1842        // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
1843        // of course, contains nodes that need to be dropped (we can't just drop them,
1844        // because they could have a very deep child tree).
1845        while let Some(mut node) = stack.pop() {
1846            if matches!(node.data, NodeData::Element { ref name, .. } if &*name.local == "selectedcontent" && name.ns == ns!(html)) &&
1847                self.is_within(node.clone(), ns!(html), "select")
1848            {
1849                for sub in node.children.borrow_mut().iter_mut() {
1850                    sub.parent.replace(None);
1851                }
1852                *node.children.borrow_mut() = Vec::new();
1853            }
1854            let parent = node.parent
1855                .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
1856                .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
1857            let pass = self.clean_child(&mut node);
1858            self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix);
1859            if self.clean_node_content(&node) || !self.check_expected_namespace(&parent, &node) {
1860                removed.push(node);
1861                continue;
1862            }
1863            if pass {
1864                dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
1865            } else {
1866                for sub in node.children.borrow_mut().iter_mut() {
1867                    sub.parent.replace(Some(Rc::downgrade(&parent)));
1868                }
1869            }
1870            stack.extend(
1871                mem::take(&mut *node.children.borrow_mut())
1872                    .into_iter()
1873                    .rev(),
1874            );
1875            if !pass {
1876                removed.push(node);
1877            }
1878        }
1879        // Now, imperatively clean up all of the child nodes.
1880        // Otherwise, we could wind up with a DoS, either caused by a memory leak,
1881        // or caused by a stack overflow.
1882        while let Some(node) = removed.pop() {
1883            removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]);
1884        }
1885        Document(dom)
1886    }
1887
1888    fn is_within(&self, mut child: Handle, ns: Namespace, tag: &str) -> bool {
1889        while let Some(parent) = child.parent.take() {
1890            child.parent.set(Some(parent.clone()));
1891            match child.data {
1892                NodeData::Element { ref name, .. } if name.ns == ns && &*name.local == tag => return true,
1893                _ => {
1894                    if let Some(parent) = parent.upgrade() {
1895                        child = parent;
1896                    } else {
1897                        return false;
1898                    }
1899                }
1900            }
1901        }
1902        false
1903    }
1904
1905    /// Returns `true` if a node and all its content should be removed.
1906    fn clean_node_content(&self, node: &Handle) -> bool {
1907        match node.data {
1908            NodeData::Text { .. }
1909            | NodeData::Comment { .. }
1910            | NodeData::Doctype { .. }
1911            | NodeData::Document
1912            | NodeData::ProcessingInstruction { .. } => false,
1913            NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
1914        }
1915    }
1916
1917    /// Remove unwanted attributes, and check if the node should be kept or not.
1918    ///
1919    /// The root node doesn't need cleaning because we create the root node ourselves,
1920    /// and it doesn't get serialized, and ... it just exists to give the parser
1921    /// a context (in this case, a div-like block context).
1922    fn clean_child(&self, child: &mut Handle) -> bool {
1923        match child.data {
1924            NodeData::Text { .. } => true,
1925            NodeData::Comment { .. } => !self.strip_comments,
1926            NodeData::Doctype { .. }
1927            | NodeData::Document
1928            | NodeData::ProcessingInstruction { .. } => false,
1929            NodeData::Element {
1930                ref name,
1931                ref attrs,
1932                ..
1933            } => {
1934                if self.tags.contains(&*name.local) {
1935                    let attr_filter = |attr: &html5ever::Attribute| {
1936                        let whitelisted = self.generic_attributes.contains(&*attr.name.local)
1937                            || self.generic_attribute_prefixes.as_ref().map(|prefixes| {
1938                                prefixes.iter().any(|&p| attr.name.local.starts_with(p))
1939                            }) == Some(true)
1940                            || self
1941                                .tag_attributes
1942                                .get(&*name.local)
1943                                .map(|ta| ta.contains(&*attr.name.local))
1944                                == Some(true)
1945                            || self
1946                                .tag_attribute_values
1947                                .get(&*name.local)
1948                                .and_then(|tav| tav.get(&*attr.name.local))
1949                                .map(|vs| {
1950                                    let attr_val = attr.value.to_lowercase();
1951                                    vs.iter().any(|v| v.to_lowercase() == attr_val)
1952                                })
1953                                == Some(true);
1954                        if !whitelisted {
1955                            // If the class attribute is not whitelisted,
1956                            // but there is a whitelisted set of allowed_classes,
1957                            // do not strip out the class attribute.
1958                            // Banned classes will be filtered later.
1959                            &*attr.name.local == "class"
1960                                && self.allowed_classes.contains_key(&*name.local)
1961                        } else if is_url_attr(&name.local, &attr.name.local) {
1962                            let url = Url::parse(&attr.value);
1963                            if let Ok(url) = url {
1964                                self.url_schemes.contains(url.scheme())
1965                            } else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
1966                                !matches!(self.url_relative, UrlRelative::Deny)
1967                            } else {
1968                                false
1969                            }
1970                        } else {
1971                            true
1972                        }
1973                    };
1974                    attrs.borrow_mut().retain(attr_filter);
1975                    true
1976                } else {
1977                    false
1978                }
1979            }
1980        }
1981    }
1982
1983    // Check for unexpected namespace changes.
1984    //
1985    // The issue happens if developers added to the list of allowed tags any
1986    // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state,
1987    // that is:
1988    //
1989    // * title
1990    // * textarea
1991    // * xmp
1992    // * iframe
1993    // * noembed
1994    // * noframes
1995    // * plaintext
1996    // * noscript
1997    // * style
1998    // * script
1999    //
2000    // An example in the wild is Plume, that allows iframe [1].  So in next
2001    // examples I'll assume the following policy:
2002    //
2003    //     Builder::new()
2004    //        .add_tags(&["iframe"])
2005    //
2006    // In HTML namespace `<iframe>` is parsed specially; that is, its content is
2007    // treated as text. For instance, the following html:
2008    //
2009    //     <iframe><a>test
2010    //
2011    // Is parsed into the following DOM tree:
2012    //
2013    //     iframe
2014    //     └─ #text: <a>test
2015    //
2016    // So iframe cannot have any children other than a text node.
2017    //
2018    // The same is not true, though, in "foreign content"; that is, within
2019    // <svg> or <math> tags. The following html:
2020    //
2021    //     <svg><iframe><a>test
2022    //
2023    // is parsed differently:
2024    //
2025    //    svg
2026    //    └─ iframe
2027    //       └─ a
2028    //          └─ #text: test
2029    //
2030    // So in SVG namespace iframe can have children.
2031    //
2032    // Ammonia disallows <svg> but it keeps its content after deleting it. And
2033    // the parser internally keeps track of the namespace of the element. So
2034    // assume we have the following snippet:
2035    //
2036    //     <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test
2037    //
2038    // It is parsed into:
2039    //
2040    //     svg
2041    //     └─ iframe
2042    //        └─ a title="</iframe><img src onerror=alert(1)>"
2043    //           └─ #text: test
2044    //
2045    // This DOM tree is harmless from ammonia point of view because the piece
2046    // of code that looks like XSS is in a title attribute. Hence, the
2047    // resulting "safe" HTML from ammonia would be:
2048    //
2049    //     <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener
2050    // noreferrer">test</a></iframe>
2051    //
2052    // However, at this point, the information about namespace is lost, which
2053    // means that the browser will parse this snippet into:
2054    //
2055    //     ├─ iframe
2056    //     │  └─ #text: <a title="
2057    //     ├─ img src="" onerror="alert(1)"
2058    //     └─ #text: " rel="noopener noreferrer">test
2059    //
2060    // Leading to XSS.
2061    //
2062    // To solve this issue, check for unexpected namespace switches after cleanup.
2063    // Elements which change namespace at an unexpected point are removed.
2064    // This function returns `true` if `child` should be kept, and `false` if it
2065    // should be removed.
2066    //
2067    // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21
2068    fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
2069        let (parent, parent_attr, child) = match (&parent.data, &child.data) {
2070            (NodeData::Element { name: pn, attrs, .. }, NodeData::Element { name: cn, .. }) => (pn, attrs, cn),
2071            _ => return true,
2072        };
2073        // The only way to switch from html to svg is with the <svg> tag
2074        if parent.ns == ns!(html) && child.ns == ns!(svg) {
2075            child.local == local_name!("svg")
2076        // The only way to switch from html to mathml is with the <math> tag
2077        } else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
2078            child.local == local_name!("math")
2079        // The only way to switch from mathml to svg/html is with a text integration point
2080        } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
2081            // https://html.spec.whatwg.org/#mathml
2082            if &*parent.local == "annotation-xml" {
2083                let parent_attr = parent_attr.borrow();
2084                // https://html.spec.whatwg.org/#tree-construction
2085                if child.ns == ns!(html)
2086                    && parent_attr
2087                        .iter()
2088                        .filter(|attr| attr.name.local == local_name!("encoding"))
2089                        .all(|attr| {
2090                            &*attr.value == "text/html" || &*attr.value == "application/xhtml+xml"
2091                        })
2092                {
2093                    is_html_tag(&child.local)
2094                    && parent_attr
2095                        .iter()
2096                        .filter(|attr| attr.name.local == local_name!("encoding"))
2097                        .count()
2098                        == 1
2099                } else {
2100                    child.local == local_name!("svg") && child.ns == ns!(svg)
2101                }
2102            } else {
2103                matches!(&*parent.local, "mi" | "mo" | "mn" | "ms" | "mtext")
2104                    && if child.ns == ns!(html) {
2105                        is_html_tag(&child.local)
2106                    } else {
2107                        true
2108                    }
2109            }
2110
2111        // The only way to switch from svg to mathml/html is with an html integration point
2112        } else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
2113            // https://html.spec.whatwg.org/#svg-0
2114            matches!(&*parent.local, "foreignObject")
2115                && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2116        } else if child.ns == ns!(svg) {
2117            is_svg_tag(&child.local)
2118        } else if child.ns == ns!(mathml) {
2119            is_mathml_tag(&child.local)
2120        } else if child.ns == ns!(html) {
2121            is_html_tag(&child.local)
2122        } else {
2123            // There are no other supported ways to switch namespace
2124            parent.ns == child.ns
2125        }
2126    }
2127
2128    /// Add and transform special-cased attributes and elements.
2129    ///
2130    /// This function handles:
2131    ///
2132    /// * relative URL rewriting
2133    /// * adding `<a rel>` attributes
2134    /// * filtering out banned style properties
2135    /// * filtering out banned classes
2136    fn adjust_node_attributes(
2137        &self,
2138        child: &mut Handle,
2139        link_rel: &Option<StrTendril>,
2140        id_prefix: Option<&'a str>,
2141    ) {
2142        if let NodeData::Element {
2143            ref name,
2144            ref attrs,
2145            ..
2146        } = child.data
2147        {
2148            if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
2149                let mut attrs = attrs.borrow_mut();
2150                for (&set_name, &set_value) in set_attrs {
2151                    // set the value of the attribute if the attribute is already present
2152                    if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
2153                    {
2154                        if &*attr.value != set_value {
2155                            attr.value = set_value.into();
2156                        }
2157                    } else {
2158                        // otherwise, add the attribute
2159                        let attr = Attribute {
2160                            name: QualName::new(None, ns!(), set_name.into()),
2161                            value: set_value.into(),
2162                        };
2163                        attrs.push(attr);
2164                    }
2165                }
2166            }
2167            if let Some(ref link_rel) = *link_rel {
2168                if &*name.local == "a" {
2169                    attrs.borrow_mut().push(Attribute {
2170                        name: QualName::new(None, ns!(), local_name!("rel")),
2171                        value: link_rel.clone(),
2172                    })
2173                }
2174            }
2175            if let Some(ref id_prefix) = id_prefix {
2176                for attr in &mut *attrs.borrow_mut() {
2177                    if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) {
2178                        attr.value = format_tendril!("{}{}", id_prefix, attr.value);
2179                    }
2180                }
2181            }
2182            if let Some(ref attr_filter) = self.attribute_filter {
2183                let mut drop_attrs = Vec::new();
2184                let mut attrs = attrs.borrow_mut();
2185                for (i, attr) in &mut attrs.iter_mut().enumerate() {
2186                    let replace_with = if let Some(new) =
2187                        attr_filter.filter(&name.local, &attr.name.local, &attr.value)
2188                    {
2189                        if *new != *attr.value {
2190                            Some(format_tendril!("{}", new))
2191                        } else {
2192                            None // no need to replace the attr if filter returned the same value
2193                        }
2194                    } else {
2195                        drop_attrs.push(i);
2196                        None
2197                    };
2198                    if let Some(replace_with) = replace_with {
2199                        attr.value = replace_with;
2200                    }
2201                }
2202                for i in drop_attrs.into_iter().rev() {
2203                    attrs.swap_remove(i);
2204                }
2205            }
2206            {
2207                let mut drop_attrs = Vec::new();
2208                let mut attrs = attrs.borrow_mut();
2209                for (i, attr) in attrs.iter_mut().enumerate() {
2210                    if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) {
2211                        let new_value = self.url_relative.evaluate(&attr.value);
2212                        if let Some(new_value) = new_value {
2213                            attr.value = new_value;
2214                        } else {
2215                            drop_attrs.push(i);
2216                        }
2217                    }
2218                }
2219                // Swap remove scrambles the vector after the current point.
2220                // We will not do anything except with items before the current point.
2221                // The `rev()` is, as such, necessary for correctness.
2222                // We could use regular `remove(usize)` and a forward iterator,
2223                // but that's slower.
2224                for i in drop_attrs.into_iter().rev() {
2225                    attrs.swap_remove(i);
2226                }
2227            }
2228            if let Some(allowed_values) = &self.style_properties {
2229                for attr in &mut *attrs.borrow_mut() {
2230                    if &attr.name.local == "style" {
2231                        attr.value = style::filter_style_attribute(&attr.value, allowed_values).into();
2232                    }
2233                }
2234            }
2235            if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
2236                for attr in &mut *attrs.borrow_mut() {
2237                    if &attr.name.local == "class" {
2238                        let mut classes = vec![];
2239                        // https://html.spec.whatwg.org/#global-attributes:classes-2
2240                        for class in attr.value.split_ascii_whitespace() {
2241                            if allowed_values.contains(class) {
2242                                classes.push(class.to_owned());
2243                            }
2244                        }
2245                        attr.value = format_tendril!("{}", classes.join(" "));
2246                    }
2247                }
2248            }
2249        }
2250    }
2251
2252    /// Initializes an HTML fragment parser.
2253    ///
2254    /// Ammonia conforms to the HTML5 fragment parsing rules,
2255    /// by parsing the given fragment as if it were included in a <div> tag.
2256    fn make_parser() -> html::Parser<RcDom> {
2257        html::parse_fragment(
2258            RcDom::default(),
2259            html::ParseOpts::default(),
2260            QualName::new(None, ns!(html), local_name!("div")),
2261            vec![],
2262            false,
2263        )
2264    }
2265}
2266
2267/// Given an element name and attribute name, determine if the given attribute contains a URL.
2268fn is_url_attr(element: &str, attr: &str) -> bool {
2269    attr == "href"
2270        || attr == "src"
2271        || (element == "form" && attr == "action")
2272        || (element == "object" && attr == "data")
2273        || ((element == "button" || element == "input") && attr == "formaction")
2274        || (element == "a" && attr == "ping")
2275        || (element == "video" && attr == "poster")
2276}
2277
2278fn is_html_tag(element: &str) -> bool {
2279    (!is_svg_tag(element) && !is_mathml_tag(element))
2280        || matches!(
2281            element,
2282            "title" | "style" | "font" | "a" | "script" | "span"
2283        )
2284}
2285
2286/// Given an element name, check if it's SVG
2287fn is_svg_tag(element: &str) -> bool {
2288    // https://svgwg.org/svg2-draft/eltindex.html
2289    matches!(
2290        element,
2291        "a" | "animate"
2292            | "animateMotion"
2293            | "animateTransform"
2294            | "circle"
2295            | "clipPath"
2296            | "defs"
2297            | "desc"
2298            | "discard"
2299            | "ellipse"
2300            | "feBlend"
2301            | "feColorMatrix"
2302            | "feComponentTransfer"
2303            | "feComposite"
2304            | "feConvolveMatrix"
2305            | "feDiffuseLighting"
2306            | "feDisplacementMap"
2307            | "feDistantLight"
2308            | "feDropShadow"
2309            | "feFlood"
2310            | "feFuncA"
2311            | "feFuncB"
2312            | "feFuncG"
2313            | "feFuncR"
2314            | "feGaussianBlur"
2315            | "feImage"
2316            | "feMerge"
2317            | "feMergeNode"
2318            | "feMorphology"
2319            | "feOffset"
2320            | "fePointLight"
2321            | "feSpecularLighting"
2322            | "feSpotLight"
2323            | "feTile"
2324            | "feTurbulence"
2325            | "filter"
2326            | "foreignObject"
2327            | "g"
2328            | "image"
2329            | "line"
2330            | "linearGradient"
2331            | "marker"
2332            | "mask"
2333            | "metadata"
2334            | "mpath"
2335            | "path"
2336            | "pattern"
2337            | "polygon"
2338            | "polyline"
2339            | "radialGradient"
2340            | "rect"
2341            | "script"
2342            | "set"
2343            | "stop"
2344            | "style"
2345            | "svg"
2346            | "switch"
2347            | "symbol"
2348            | "text"
2349            | "textPath"
2350            | "title"
2351            | "tspan"
2352            | "use"
2353            | "view"
2354    )
2355}
2356
2357/// Given an element name, check if it's Math
2358fn is_mathml_tag(element: &str) -> bool {
2359    // https://svgwg.org/svg2-draft/eltindex.html
2360    matches!(
2361        element,
2362        "abs"
2363            | "and"
2364            | "annotation"
2365            | "annotation-xml"
2366            | "apply"
2367            | "approx"
2368            | "arccos"
2369            | "arccosh"
2370            | "arccot"
2371            | "arccoth"
2372            | "arccsc"
2373            | "arccsch"
2374            | "arcsec"
2375            | "arcsech"
2376            | "arcsin"
2377            | "arcsinh"
2378            | "arctan"
2379            | "arctanh"
2380            | "arg"
2381            | "bind"
2382            | "bvar"
2383            | "card"
2384            | "cartesianproduct"
2385            | "cbytes"
2386            | "ceiling"
2387            | "cerror"
2388            | "ci"
2389            | "cn"
2390            | "codomain"
2391            | "complexes"
2392            | "compose"
2393            | "condition"
2394            | "conjugate"
2395            | "cos"
2396            | "cosh"
2397            | "cot"
2398            | "coth"
2399            | "cs"
2400            | "csc"
2401            | "csch"
2402            | "csymbol"
2403            | "curl"
2404            | "declare"
2405            | "degree"
2406            | "determinant"
2407            | "diff"
2408            | "divergence"
2409            | "divide"
2410            | "domain"
2411            | "domainofapplication"
2412            | "emptyset"
2413            | "eq"
2414            | "equivalent"
2415            | "eulergamma"
2416            | "exists"
2417            | "exp"
2418            | "exponentiale"
2419            | "factorial"
2420            | "factorof"
2421            | "false"
2422            | "floor"
2423            | "fn"
2424            | "forall"
2425            | "gcd"
2426            | "geq"
2427            | "grad"
2428            | "gt"
2429            | "ident"
2430            | "image"
2431            | "imaginary"
2432            | "imaginaryi"
2433            | "implies"
2434            | "in"
2435            | "infinity"
2436            | "int"
2437            | "integers"
2438            | "intersect"
2439            | "interval"
2440            | "inverse"
2441            | "lambda"
2442            | "laplacian"
2443            | "lcm"
2444            | "leq"
2445            | "limit"
2446            | "list"
2447            | "ln"
2448            | "log"
2449            | "logbase"
2450            | "lowlimit"
2451            | "lt"
2452            | "maction"
2453            | "maligngroup"
2454            | "malignmark"
2455            | "math"
2456            | "matrix"
2457            | "matrixrow"
2458            | "max"
2459            | "mean"
2460            | "median"
2461            | "menclose"
2462            | "merror"
2463            | "mfenced"
2464            | "mfrac"
2465            | "mglyph"
2466            | "mi"
2467            | "min"
2468            | "minus"
2469            | "mlabeledtr"
2470            | "mlongdiv"
2471            | "mmultiscripts"
2472            | "mn"
2473            | "mo"
2474            | "mode"
2475            | "moment"
2476            | "momentabout"
2477            | "mover"
2478            | "mpadded"
2479            | "mphantom"
2480            | "mprescripts"
2481            | "mroot"
2482            | "mrow"
2483            | "ms"
2484            | "mscarries"
2485            | "mscarry"
2486            | "msgroup"
2487            | "msline"
2488            | "mspace"
2489            | "msqrt"
2490            | "msrow"
2491            | "mstack"
2492            | "mstyle"
2493            | "msub"
2494            | "msubsup"
2495            | "msup"
2496            | "mtable"
2497            | "mtd"
2498            | "mtext"
2499            | "mtr"
2500            | "munder"
2501            | "munderover"
2502            | "naturalnumbers"
2503            | "neq"
2504            | "none"
2505            | "not"
2506            | "notanumber"
2507            | "notin"
2508            | "notprsubset"
2509            | "notsubset"
2510            | "or"
2511            | "otherwise"
2512            | "outerproduct"
2513            | "partialdiff"
2514            | "pi"
2515            | "piece"
2516            | "piecewise"
2517            | "plus"
2518            | "power"
2519            | "primes"
2520            | "product"
2521            | "prsubset"
2522            | "quotient"
2523            | "rationals"
2524            | "real"
2525            | "reals"
2526            | "reln"
2527            | "rem"
2528            | "root"
2529            | "scalarproduct"
2530            | "sdev"
2531            | "sec"
2532            | "sech"
2533            | "selector"
2534            | "semantics"
2535            | "sep"
2536            | "set"
2537            | "setdiff"
2538            | "share"
2539            | "sin"
2540            | "sinh"
2541            | "span"
2542            | "subset"
2543            | "sum"
2544            | "tan"
2545            | "tanh"
2546            | "tendsto"
2547            | "times"
2548            | "transpose"
2549            | "true"
2550            | "union"
2551            | "uplimit"
2552            | "variance"
2553            | "vector"
2554            | "vectorproduct"
2555            | "xor"
2556    )
2557}
2558
2559fn is_url_relative(url: &str) -> bool {
2560    matches!(
2561        Url::parse(url),
2562        Err(url::ParseError::RelativeUrlWithoutBase)
2563    )
2564}
2565
2566/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
2567///
2568/// This policy kicks in, if set, for any attribute named `src` or `href`,
2569/// as well as the `data` attribute of an `object` tag.
2570///
2571/// [relative URLs]: struct.Builder.html#method.url_relative
2572///
2573/// # Examples
2574///
2575/// ## `Deny`
2576///
2577/// * `<a href="test">` is a file-relative URL, and will be removed
2578/// * `<a href="/test">` is a domain-relative URL, and will be removed
2579/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed
2580/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept
2581///
2582/// ## `PassThrough`
2583///
2584/// No changes will be made to any URLs, except if a disallowed scheme is used.
2585///
2586/// ## `RewriteWithBase`
2587///
2588/// If the base is set to `http://notriddle.com/some-directory/some-file`
2589///
2590/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">`
2591/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">`
2592/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">`
2593/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is
2594///
2595/// ## `Custom`
2596///
2597/// Pass the relative URL to a function.
2598/// If it returns `Some(string)`, then that one gets used.
2599/// Otherwise, it will remove the attribute (like `Deny` does).
2600///
2601///     use std::borrow::Cow;
2602///     fn is_absolute_path(url: &str) -> bool {
2603///         let u = url.as_bytes();
2604///         // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
2605///         // `/a/b/c` is an absolute path, and what we want to do stuff to.
2606///         u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
2607///     }
2608///     fn evaluate(url: &str) -> Option<Cow<str>> {
2609///         if is_absolute_path(url) {
2610///             Some(Cow::Owned(String::from("/root") + url))
2611///         } else {
2612///             Some(Cow::Borrowed(url))
2613///         }
2614///     }
2615///     fn main() {
2616///         let a = ammonia::Builder::new()
2617///             .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
2618///             .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
2619///             .to_string();
2620///         assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
2621///     }
2622///
2623/// This function is only applied to relative URLs.
2624/// To filter all of the URLs,
2625/// use the not-yet-implemented Content Security Policy.
2626#[non_exhaustive]
2627pub enum UrlRelative<'a> {
2628    /// Relative URLs will be completely stripped from the document.
2629    Deny,
2630    /// Relative URLs will be passed through unchanged.
2631    PassThrough,
2632    /// Relative URLs will be changed into absolute URLs, based on this base URL.
2633    RewriteWithBase(Url),
2634    /// Force absolute and relative paths into a particular directory.
2635    ///
2636    /// Since the resolver does not affect fully-qualified URLs, it doesn't
2637    /// prevent users from linking wherever they want. This feature only
2638    /// serves to make content more portable.
2639    ///
2640    /// # Examples
2641    ///
2642    /// <table>
2643    /// <thead>
2644    /// <tr>
2645    ///     <th>root</th>
2646    ///     <th>path</th>
2647    ///     <th>url</th>
2648    ///     <th>result</th>
2649    /// </tr>
2650    /// </thead>
2651    /// <tbody>
2652    /// <tr>
2653    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2654    ///     <td>README.md</td>
2655    ///     <td></td>
2656    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2657    /// </tr><tr>
2658    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2659    ///     <td>README.md</td>
2660    ///     <td>/</td>
2661    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2662    /// </tr><tr>
2663    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2664    ///     <td>README.md</td>
2665    ///     <td>/CONTRIBUTING.md</td>
2666    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2667    /// </tr><tr>
2668    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2669    ///     <td>README.md</td>
2670    ///     <td></td>
2671    ///     <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td>
2672    /// </tr><tr>
2673    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2674    ///     <td>README.md</td>
2675    ///     <td>/</td>
2676    ///     <td>https://github.com/rust-ammonia/ammonia/blob/</td>
2677    /// </tr><tr>
2678    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2679    ///     <td>README.md</td>
2680    ///     <td>/CONTRIBUTING.md</td>
2681    ///     <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td>
2682    /// </tr><tr>
2683    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2684    ///     <td></td>
2685    ///     <td></td>
2686    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2687    /// </tr><tr>
2688    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2689    ///     <td></td>
2690    ///     <td>/</td>
2691    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2692    /// </tr><tr>
2693    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2694    ///     <td></td>
2695    ///     <td>/CONTRIBUTING.md</td>
2696    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2697    /// </tr><tr>
2698    ///     <td>https://github.com/</td>
2699    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2700    ///     <td></td>
2701    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2702    /// </tr><tr>
2703    ///     <td>https://github.com/</td>
2704    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2705    ///     <td>/</td>
2706    ///     <td>https://github.com/</td>
2707    /// </tr><tr>
2708    ///     <td>https://github.com/</td>
2709    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2710    ///     <td>CONTRIBUTING.md</td>
2711    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2712    /// </tr><tr>
2713    ///     <td>https://github.com/</td>
2714    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2715    ///     <td>/CONTRIBUTING.md</td>
2716    ///     <td>https://github.com/CONTRIBUTING.md</td>
2717    /// </tr>
2718    /// </tbody>
2719    /// </table>
2720    RewriteWithRoot {
2721        /// The URL that is treated as the root by the resolver.
2722        root: Url,
2723        /// The "current path" used to resolve relative paths.
2724        path: String,
2725    },
2726    /// Rewrite URLs with a custom function.
2727    Custom(Box<dyn UrlRelativeEvaluate<'a>>),
2728}
2729
2730impl<'a> UrlRelative<'a> {
2731    fn evaluate(&self, url: &str) -> Option<html5ever::tendril::StrTendril> {
2732        match self {
2733            UrlRelative::RewriteWithBase(ref url_base) => url_base
2734                .join(url)
2735                .ok()
2736                .and_then(|x| StrTendril::from_str(x.as_str()).ok()),
2737            UrlRelative::RewriteWithRoot { ref root, ref path } => {
2738                (match url.as_bytes() {
2739                    // Scheme-relative URL
2740                    [b'/', b'/', ..] => root.join(url),
2741                    // Path-absolute URL
2742                    b"/" => root.join("."),
2743                    [b'/', ..] => root.join(&url[1..]),
2744                    // Path-relative URL
2745                    _ => root.join(path).and_then(|r| r.join(url)),
2746                })
2747                .ok()
2748                .and_then(|x| StrTendril::from_str(x.as_str()).ok())
2749            }
2750            UrlRelative::Custom(ref evaluate) => evaluate
2751                .evaluate(url)
2752                .as_ref()
2753                .map(Cow::as_ref)
2754                .map(StrTendril::from_str)
2755                .and_then(Result::ok),
2756            UrlRelative::PassThrough => StrTendril::from_str(url).ok(),
2757            UrlRelative::Deny => None,
2758        }
2759    }
2760}
2761
2762impl<'a> fmt::Debug for UrlRelative<'a> {
2763    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2764        match *self {
2765            UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
2766            UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
2767            UrlRelative::RewriteWithBase(ref base) => {
2768                write!(f, "UrlRelative::RewriteWithBase({})", base)
2769            }
2770            UrlRelative::RewriteWithRoot { ref root, ref path } => {
2771                write!(
2772                    f,
2773                    "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}"
2774                )
2775            }
2776            UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
2777        }
2778    }
2779}
2780
2781/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
2782///
2783/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
2784///
2785/// See [`url_relative`][url_relative] for more details.
2786///
2787/// [url_relative]: struct.Builder.html#method.url_relative
2788pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a {
2789    /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2790    fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>;
2791}
2792impl<'a, T> UrlRelativeEvaluate<'a> for T
2793where
2794    T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a,
2795{
2796    fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> {
2797        self(url)
2798    }
2799}
2800
2801impl fmt::Debug for dyn AttributeFilter {
2802    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2803        f.write_str("AttributeFilter")
2804    }
2805}
2806
2807/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
2808///
2809/// See [`attribute_filter`][attribute_filter] for more details.
2810///
2811/// [attribute_filter]: struct.Builder.html#method.attribute_filter
2812pub trait AttributeFilter: Send + Sync {
2813    /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2814    fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
2815}
2816
2817impl<T> AttributeFilter for T
2818where
2819    T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
2820{
2821    fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
2822        self(element, attribute, value)
2823    }
2824}
2825
2826/// A sanitized HTML document.
2827///
2828/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
2829/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
2830/// users to avoid buffering the serialized representation to a [`String`] when desired.
2831///
2832/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
2833///
2834/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
2835/// the complete fragment needs to be stored in memory during processing.
2836///
2837/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
2838/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2839///
2840/// # Examples
2841///
2842///     use ammonia::Builder;
2843///
2844///     let input = "<!-- comments will be stripped -->This is an Ammonia example.";
2845///     let output = "This is an Ammonia example.";
2846///
2847///     let document = Builder::new()
2848///         .clean(input);
2849///     assert_eq!(document.to_string(), output);
2850pub struct Document(RcDom);
2851
2852impl Document {
2853    /// Serializes a `Document` instance to a writer.
2854    ///
2855    /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
2856    ///
2857    /// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
2858    ///
2859    /// Note that the in-memory representation of `Document` is larger than the serialized
2860    /// `String`.
2861    ///
2862    /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2863    ///
2864    /// # Examples
2865    ///
2866    ///     use ammonia::Builder;
2867    ///
2868    ///     let input = "Some <style></style>HTML here";
2869    ///     let expected = b"Some HTML here";
2870    ///
2871    ///     let document = Builder::new()
2872    ///         .clean(input);
2873    ///
2874    ///     let mut sanitized = Vec::new();
2875    ///     document.write_to(&mut sanitized)
2876    ///         .expect("Writing to a string should not fail (except on OOM)");
2877    ///     assert_eq!(sanitized, expected);
2878    pub fn write_to<W>(&self, writer: W) -> io::Result<()>
2879    where
2880        W: io::Write,
2881    {
2882        let opts = Self::serialize_opts();
2883        let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2884        serialize(writer, &inner, opts)
2885    }
2886
2887    /// Exposes the `Document` instance as an [`rcdom::Handle`].
2888    ///
2889    /// This method returns the inner object backing the `Document` instance. This allows
2890    /// making further changes to the DOM without introducing redundant serialization and
2891    /// parsing.
2892    ///
2893    /// Note that this method should be considered unstable and sits outside of the semver
2894    /// stability guarantees. It may change, break, or go away at any time, either because
2895    /// of `html5ever` changes or `ammonia` implementation changes.
2896    ///
2897    /// For this method to be accessible, a `cfg` flag is required. The easiest way is to
2898    /// use the `RUSTFLAGS` environment variable:
2899    ///
2900    /// ```text
2901    /// RUSTFLAGS='--cfg ammonia_unstable' cargo build
2902    /// ```
2903    ///
2904    /// on Unix-like platforms, or
2905    ///
2906    /// ```text
2907    /// set RUSTFLAGS=--cfg ammonia_unstable
2908    /// cargo build
2909    /// ```
2910    ///
2911    /// on Windows.
2912    ///
2913    /// This requirement also applies to crates that transitively depend on crates that use
2914    /// this flag.
2915    ///
2916    /// # Examples
2917    ///
2918    ///     use ammonia::Builder;
2919    ///     use ammonia::rcdom::SerializableHandle;
2920    ///     use maplit::hashset;
2921    ///     use html5ever::serialize::{serialize, SerializeOpts};
2922    ///
2923    ///     # use std::error::Error;
2924    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
2925    ///     let input = "<a>one link</a> and <a>one more</a>";
2926    ///     let expected = "<a>one more</a> and <a>one link</a>";
2927    ///
2928    ///     let document = Builder::new()
2929    ///         .link_rel(None)
2930    ///         .clean(input);
2931    ///
2932    ///     let node = document.to_dom_node();
2933    ///     node.children.borrow_mut().reverse();
2934    ///
2935    ///     let mut buf = Vec::new();
2936    ///     let handle: SerializableHandle = node.into();
2937    ///     serialize(&mut buf, &handle, SerializeOpts::default())?;
2938    ///     let output = String::from_utf8(buf)?;
2939    ///
2940    ///     assert_eq!(output, expected);
2941    ///     # Ok(())
2942    ///     # }
2943    ///     # fn main() { do_main().unwrap() }
2944    #[cfg(ammonia_unstable)]
2945    pub fn to_dom_node(&self) -> Handle {
2946        self.0.document.children.borrow()[0].clone()
2947    }
2948
2949    fn serialize_opts() -> SerializeOpts {
2950        SerializeOpts::default()
2951    }
2952}
2953
2954impl Clone for Document {
2955    fn clone(&self) -> Self {
2956        let parser = Builder::make_parser();
2957        let dom = parser.one(&self.to_string()[..]);
2958        Document(dom)
2959    }
2960}
2961
2962/// Convert a `Document` to stringified HTML.
2963///
2964/// Since [`Document`] implements [`Display`], it can be converted to a [`String`] using the
2965/// standard [`ToString::to_string`] method. This is the simplest way to use `ammonia`.
2966///
2967/// [`Document`]: ammonia::Document
2968/// [`Display`]: std::fmt::Display
2969/// [`ToString::to_string`]: std::string::ToString
2970///
2971/// # Examples
2972///
2973///     use ammonia::Builder;
2974///
2975///     let input = "Some <style></style>HTML here";
2976///     let output = "Some HTML here";
2977///
2978///     let document = Builder::new()
2979///         .clean(input);
2980///     assert_eq!(document.to_string(), output);
2981impl Display for Document {
2982    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2983        let opts = Self::serialize_opts();
2984        let mut ret_val = Vec::new();
2985        let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2986        serialize(&mut ret_val, &inner, opts)
2987            .expect("Writing to a string shouldn't fail (expect on OOM)");
2988        String::from_utf8(ret_val)
2989            .expect("html5ever only supports UTF8")
2990            .fmt(f)
2991    }
2992}
2993
2994impl fmt::Debug for Document {
2995    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2996        write!(f, "Document({})", self)
2997    }
2998}
2999
3000impl From<Document> for String {
3001    fn from(document: Document) -> Self {
3002        document.to_string()
3003    }
3004}
3005
3006#[cfg(test)]
3007mod test {
3008    use super::*;
3009    #[test]
3010    fn deeply_nested_whitelisted_does_not_cause_stack_overflow() {
3011        clean(&"<b>".repeat(60_000));
3012    }
3013    #[test]
3014    fn deeply_nested_blacklisted_does_not_cause_stack_overflow() {
3015        clean(&"<b-b>".repeat(60_000));
3016    }
3017    #[test]
3018    fn deeply_nested_alternating_does_not_cause_stack_overflow() {
3019        clean(&"<b-b>".repeat(35_000));
3020    }
3021    #[test]
3022    fn document_level_tags_cannot_be_whitelisted() {
3023        // Adding `html`, `head`, or `body` to the allowed tags has no effect
3024        // because the parser runs in fragment mode and strips them before
3025        // the sanitizer sees the tree. This test pins that documented
3026        // behavior; if it ever changes, the docs on `Builder::tags` need to
3027        // change too.
3028        let fragment =
3029            "<html><head>head content</head><body><div>test</div></body></html>";
3030        let result = Builder::default()
3031            .add_tags(["html", "head", "body"])
3032            .clean(fragment)
3033            .to_string();
3034        assert_eq!(result, "head content<div>test</div>");
3035    }
3036    #[test]
3037    fn included_angles() {
3038        let fragment = "1 < 2";
3039        let result = clean(fragment);
3040        assert_eq!(result, "1 &lt; 2");
3041    }
3042    #[test]
3043    fn remove_script() {
3044        let fragment = "an <script>evil()</script> example";
3045        let result = clean(fragment);
3046        assert_eq!(result, "an  example");
3047    }
3048    #[test]
3049    fn ignore_link() {
3050        let fragment = "a <a href=\"http://www.google.com\">good</a> example";
3051        let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
3052                        good</a> example";
3053        let result = clean(fragment);
3054        assert_eq!(result, expected);
3055    }
3056    #[test]
3057    fn remove_unsafe_link() {
3058        let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
3059        let result = clean(fragment);
3060        assert_eq!(
3061            result,
3062            "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
3063        );
3064    }
3065    #[test]
3066    fn remove_js_link() {
3067        let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
3068        let result = clean(fragment);
3069        assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
3070    }
3071    #[test]
3072    fn tag_rebalance() {
3073        let fragment = "<b>AWESOME!";
3074        let result = clean(fragment);
3075        assert_eq!(result, "<b>AWESOME!</b>");
3076    }
3077    #[test]
3078    fn allow_url_relative() {
3079        let fragment = "<a href=test>Test</a>";
3080        let result = Builder::new()
3081            .url_relative(UrlRelative::PassThrough)
3082            .clean(fragment)
3083            .to_string();
3084        assert_eq!(
3085            result,
3086            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3087        );
3088    }
3089    #[test]
3090    fn rewrite_url_relative() {
3091        let fragment = "<a href=test>Test</a>";
3092        let result = Builder::new()
3093            .url_relative(UrlRelative::RewriteWithBase(
3094                Url::parse("http://example.com/").unwrap(),
3095            ))
3096            .clean(fragment)
3097            .to_string();
3098        assert_eq!(
3099            result,
3100            "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
3101        );
3102    }
3103    #[test]
3104    fn rewrite_url_relative_with_invalid_url() {
3105        // Reduced from https://github.com/Bauke/ammonia-crash-test
3106        let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
3107        let result = Builder::new()
3108            .url_relative(UrlRelative::RewriteWithBase(
3109                Url::parse("http://example.com/").unwrap(),
3110            ))
3111            .clean(fragment)
3112            .to_string();
3113        assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
3114    }
3115    #[test]
3116    fn attribute_filter_nop() {
3117        let fragment = "<a href=test>Test</a>";
3118        let result = Builder::new()
3119            .attribute_filter(|elem, attr, value| {
3120                assert_eq!("a", elem);
3121                assert!(
3122                    matches!(
3123                        (attr, value),
3124                        ("href", "test") | ("rel", "noopener noreferrer")
3125                    ),
3126                    "{}",
3127                    value.to_string()
3128                );
3129                Some(value.into())
3130            })
3131            .clean(fragment)
3132            .to_string();
3133        assert_eq!(
3134            result,
3135            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3136        );
3137    }
3138
3139    #[test]
3140    fn attribute_filter_drop() {
3141        let fragment = "Test<img alt=test src=imgtest>";
3142        let result = Builder::new()
3143            .attribute_filter(|elem, attr, value| {
3144                assert_eq!("img", elem);
3145                match (attr, value) {
3146                    ("src", "imgtest") => None,
3147                    ("alt", "test") => Some(value.into()),
3148                    _ => panic!("unexpected"),
3149                }
3150            })
3151            .clean(fragment)
3152            .to_string();
3153        assert_eq!(result, r#"Test<img alt="test">"#);
3154    }
3155
3156    #[test]
3157    fn url_filter_absolute() {
3158        let fragment = "Test<img alt=test src=imgtest>";
3159        let result = Builder::new()
3160            .attribute_filter(|elem, attr, value| {
3161                assert_eq!("img", elem);
3162                match (attr, value) {
3163                    ("src", "imgtest") => {
3164                        Some(format!("https://example.com/images/{}", value).into())
3165                    }
3166                    ("alt", "test") => None,
3167                    _ => panic!("unexpected"),
3168                }
3169            })
3170            .url_relative(UrlRelative::RewriteWithBase(
3171                Url::parse("http://wrong.invalid/").unwrap(),
3172            ))
3173            .clean(fragment)
3174            .to_string();
3175        assert_eq!(
3176            result,
3177            r#"Test<img src="https://example.com/images/imgtest">"#
3178        );
3179    }
3180
3181    #[test]
3182    fn url_filter_relative() {
3183        let fragment = "Test<img alt=test src=imgtest>";
3184        let result = Builder::new()
3185            .attribute_filter(|elem, attr, value| {
3186                assert_eq!("img", elem);
3187                match (attr, value) {
3188                    ("src", "imgtest") => Some("rewrite".into()),
3189                    ("alt", "test") => Some("altalt".into()),
3190                    _ => panic!("unexpected"),
3191                }
3192            })
3193            .url_relative(UrlRelative::RewriteWithBase(
3194                Url::parse("https://example.com/base/#").unwrap(),
3195            ))
3196            .clean(fragment)
3197            .to_string();
3198        assert_eq!(
3199            result,
3200            r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
3201        );
3202    }
3203
3204    #[test]
3205    fn rewrite_url_relative_no_rel() {
3206        let fragment = "<a href=test>Test</a>";
3207        let result = Builder::new()
3208            .url_relative(UrlRelative::RewriteWithBase(
3209                Url::parse("http://example.com/").unwrap(),
3210            ))
3211            .link_rel(None)
3212            .clean(fragment)
3213            .to_string();
3214        assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
3215    }
3216    #[test]
3217    fn deny_url_relative() {
3218        let fragment = "<a href=test>Test</a>";
3219        let result = Builder::new()
3220            .url_relative(UrlRelative::Deny)
3221            .clean(fragment)
3222            .to_string();
3223        assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
3224    }
3225    #[test]
3226    fn replace_rel() {
3227        let fragment = "<a href=test rel=\"garbage\">Test</a>";
3228        let result = Builder::new()
3229            .url_relative(UrlRelative::PassThrough)
3230            .clean(fragment)
3231            .to_string();
3232        assert_eq!(
3233            result,
3234            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3235        );
3236    }
3237    #[test]
3238    fn consider_rel_still_banned() {
3239        let fragment = "<a href=test rel=\"garbage\">Test</a>";
3240        let result = Builder::new()
3241            .url_relative(UrlRelative::PassThrough)
3242            .link_rel(None)
3243            .clean(fragment)
3244            .to_string();
3245        assert_eq!(result, "<a href=\"test\">Test</a>");
3246    }
3247    #[test]
3248    fn object_data() {
3249        let fragment = "<span data=\"javascript:evil()\">Test</span>\
3250                        <object data=\"javascript:evil()\"></object>M";
3251        let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
3252        let result = Builder::new()
3253            .tags(hashset!["span", "object"])
3254            .generic_attributes(hashset!["data"])
3255            .clean(fragment)
3256            .to_string();
3257        assert_eq!(result, expected);
3258    }
3259    #[test]
3260    fn remove_attributes() {
3261        let fragment = "<table border=\"1\"><tr></tr></table>";
3262        let result = Builder::new().clean(fragment);
3263        assert_eq!(
3264            result.to_string(),
3265            "<table><tbody><tr></tr></tbody></table>"
3266        );
3267    }
3268    #[test]
3269    fn quotes_in_attrs() {
3270        let fragment = "<b title='\"'>contents</b>";
3271        let result = clean(fragment);
3272        assert_eq!(result, "<b title=\"&quot;\">contents</b>");
3273    }
3274    #[test]
3275    #[should_panic]
3276    fn panic_if_rel_is_allowed_and_replaced_generic() {
3277        Builder::new()
3278            .link_rel(Some("noopener noreferrer"))
3279            .generic_attributes(hashset!["rel"])
3280            .clean("something");
3281    }
3282    #[test]
3283    #[should_panic]
3284    fn panic_if_rel_is_allowed_and_replaced_a() {
3285        Builder::new()
3286            .link_rel(Some("noopener noreferrer"))
3287            .tag_attributes(hashmap![
3288                "a" => hashset!["rel"],
3289            ])
3290            .clean("something");
3291    }
3292    #[test]
3293    fn no_panic_if_rel_is_allowed_and_replaced_span() {
3294        Builder::new()
3295            .link_rel(Some("noopener noreferrer"))
3296            .tag_attributes(hashmap![
3297                "span" => hashset!["rel"],
3298            ])
3299            .clean("<span rel=\"what\">s</span>");
3300    }
3301    #[test]
3302    fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
3303        Builder::new()
3304            .link_rel(None)
3305            .generic_attributes(hashset!["rel"])
3306            .clean("<a rel=\"what\">s</a>");
3307    }
3308    #[test]
3309    fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
3310        Builder::new()
3311            .link_rel(None)
3312            .tag_attributes(hashmap![
3313                "a" => hashset!["rel"],
3314            ])
3315            .clean("<a rel=\"what\">s</a>");
3316    }
3317    #[test]
3318    fn dont_close_void_elements() {
3319        let fragment = "<br>";
3320        let result = clean(fragment);
3321        assert_eq!(result.to_string(), "<br>");
3322    }
3323    #[should_panic]
3324    #[test]
3325    fn panic_on_allowed_classes_tag_attributes() {
3326        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3327        Builder::new()
3328            .link_rel(None)
3329            .tag_attributes(hashmap![
3330                "p" => hashset!["class"],
3331                "a" => hashset!["class"],
3332            ])
3333            .allowed_classes(hashmap![
3334                "p" => hashset!["foo", "bar"],
3335                "a" => hashset!["baz"],
3336            ])
3337            .clean(fragment);
3338    }
3339    #[should_panic]
3340    #[test]
3341    fn panic_on_allowed_classes_generic_attributes() {
3342        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3343        Builder::new()
3344            .link_rel(None)
3345            .generic_attributes(hashset!["class", "href", "some-foo"])
3346            .allowed_classes(hashmap![
3347                "p" => hashset!["foo", "bar"],
3348                "a" => hashset!["baz"],
3349            ])
3350            .clean(fragment);
3351    }
3352    #[test]
3353    fn remove_non_allowed_classes() {
3354        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3355        let result = Builder::new()
3356            .link_rel(None)
3357            .allowed_classes(hashmap![
3358                "p" => hashset!["foo", "bar"],
3359                "a" => hashset!["baz"],
3360            ])
3361            .clean(fragment);
3362        assert_eq!(
3363            result.to_string(),
3364            "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3365        );
3366    }
3367    #[test]
3368    fn remove_non_allowed_classes_with_tag_class() {
3369        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3370        let result = Builder::new()
3371            .link_rel(None)
3372            .tag_attributes(hashmap![
3373                "div" => hashset!["class"],
3374            ])
3375            .allowed_classes(hashmap![
3376                "p" => hashset!["foo", "bar"],
3377                "a" => hashset!["baz"],
3378            ])
3379            .clean(fragment);
3380        assert_eq!(
3381            result.to_string(),
3382            "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3383        );
3384    }
3385    #[test]
3386    fn allowed_classes_ascii_whitespace() {
3387        // According to https://infra.spec.whatwg.org/#ascii-whitespace,
3388        // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are
3389        // considered to be ASCII whitespace. Unicode whitespace characters
3390        // and VT (\x0B) aren't ASCII whitespace.
3391        let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
3392        let result = Builder::new()
3393            .allowed_classes(hashmap![
3394                "p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
3395            ])
3396            .clean(fragment);
3397        assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
3398    }
3399    #[test]
3400    fn remove_non_allowed_attributes_with_tag_attribute_values() {
3401        let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
3402        let result = Builder::new()
3403            .tag_attribute_values(hashmap![
3404                "p" => hashmap![
3405                    "data-label" => hashset!["bar"],
3406                ],
3407            ])
3408            .tag_attributes(hashmap![
3409                "p" => hashset!["name"],
3410            ])
3411            .clean(fragment);
3412        assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
3413    }
3414    #[test]
3415    fn keep_allowed_attributes_with_tag_attribute_values() {
3416        let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
3417        let result = Builder::new()
3418            .tag_attribute_values(hashmap![
3419                "p" => hashmap![
3420                    "data-label" => hashset!["bar"],
3421                ],
3422            ])
3423            .tag_attributes(hashmap![
3424                "p" => hashset!["name"],
3425            ])
3426            .clean(fragment);
3427        assert_eq!(
3428            result.to_string(),
3429            "<p data-label=\"bar\" name=\"foo\"></p>",
3430        );
3431    }
3432    #[test]
3433    fn tag_attribute_values_case_insensitive() {
3434        let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
3435        let result = Builder::new()
3436            .tags(hashset!["input"])
3437            .tag_attribute_values(hashmap![
3438                "input" => hashmap![
3439                    "type" => hashset!["checkbox"],
3440                ],
3441            ])
3442            .tag_attributes(hashmap![
3443                "input" => hashset!["name"],
3444            ])
3445            .clean(fragment);
3446        assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
3447    }
3448    #[test]
3449    fn set_tag_attribute_values() {
3450        let fragment = "<a href=\"https://example.com/\">Link</a>";
3451        let result = Builder::new()
3452            .link_rel(None)
3453            .add_tag_attributes("a", &["target"])
3454            .set_tag_attribute_value("a", "target", "_blank")
3455            .clean(fragment);
3456        assert_eq!(
3457            result.to_string(),
3458            "<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
3459        );
3460    }
3461    #[test]
3462    fn update_existing_set_tag_attribute_values() {
3463        let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
3464        let result = Builder::new()
3465            .link_rel(None)
3466            .add_tag_attributes("a", &["target"])
3467            .set_tag_attribute_value("a", "target", "_blank")
3468            .clean(fragment);
3469        assert_eq!(
3470            result.to_string(),
3471            "<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
3472        );
3473    }
3474    #[test]
3475    fn unwhitelisted_set_tag_attribute_values() {
3476        let fragment = "<span>hi</span><my-elem>";
3477        let result = Builder::new()
3478            .set_tag_attribute_value("my-elem", "my-attr", "val")
3479            .clean(fragment);
3480        assert_eq!(result.to_string(), "<span>hi</span>",);
3481    }
3482    #[test]
3483    fn remove_entity_link() {
3484        let fragment = "<a href=\"&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61\
3485                        &#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29\">Click me!</a>";
3486        let result = clean(fragment);
3487        assert_eq!(
3488            result.to_string(),
3489            "<a rel=\"noopener noreferrer\">Click me!</a>"
3490        );
3491    }
3492    #[test]
3493    fn remove_relative_url_evaluate() {
3494        fn is_absolute_path(url: &str) -> bool {
3495            let u = url.as_bytes();
3496            // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3497            // `/a/b/c` is an absolute path, and what we want to do stuff to.
3498            u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3499        }
3500        fn is_banned(url: &str) -> bool {
3501            let u = url.as_bytes();
3502            u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3503        }
3504        fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3505            if is_absolute_path(url) {
3506                Some(Cow::Owned(String::from("/root") + url))
3507            } else if is_banned(url) {
3508                None
3509            } else {
3510                Some(Cow::Borrowed(url))
3511            }
3512        }
3513        let a = Builder::new()
3514            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3515            .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
3516            .to_string();
3517        assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
3518    }
3519    #[test]
3520    fn remove_relative_url_evaluate_b() {
3521        fn is_absolute_path(url: &str) -> bool {
3522            let u = url.as_bytes();
3523            // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3524            // `/a/b/c` is an absolute path, and what we want to do stuff to.
3525            u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3526        }
3527        fn is_banned(url: &str) -> bool {
3528            let u = url.as_bytes();
3529            u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3530        }
3531        fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3532            if is_absolute_path(url) {
3533                Some(Cow::Owned(String::from("/root") + url))
3534            } else if is_banned(url) {
3535                None
3536            } else {
3537                Some(Cow::Borrowed(url))
3538            }
3539        }
3540        let a = Builder::new()
3541            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3542            .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
3543            .to_string();
3544        assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
3545    }
3546    #[test]
3547    fn remove_relative_url_evaluate_c() {
3548        // Don't run on absolute URLs.
3549        fn evaluate(_: &str) -> Option<Cow<'_, str>> {
3550            return Some(Cow::Owned(String::from("invalid")));
3551        }
3552        let a = Builder::new()
3553            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3554            .clean("<a href=\"https://www.google.com/\">google</a>")
3555            .to_string();
3556        assert_eq!(
3557            a,
3558            "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
3559        );
3560    }
3561    #[test]
3562    fn clean_children_of_bad_element() {
3563        let fragment = "<bad><evil>a</evil>b</bad>";
3564        let result = Builder::new().clean(fragment);
3565        assert_eq!(result.to_string(), "ab");
3566    }
3567    #[test]
3568    fn reader_input() {
3569        let fragment = b"an <script>evil()</script> example";
3570        let result = Builder::new().clean_from_reader(&fragment[..]);
3571        assert!(result.is_ok());
3572        assert_eq!(result.unwrap().to_string(), "an  example");
3573    }
3574    #[test]
3575    fn reader_non_utf8() {
3576        let fragment = b"non-utf8 \xF0\x90\x80string";
3577        let result = Builder::new().clean_from_reader(&fragment[..]);
3578        assert!(result.is_ok());
3579        assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
3580    }
3581    #[test]
3582    fn display_impl() {
3583        let fragment = r#"a <a>link</a>"#;
3584        let result = Builder::new().link_rel(None).clean(fragment);
3585        assert_eq!(format!("{}", result), "a <a>link</a>");
3586    }
3587    #[test]
3588    fn debug_impl() {
3589        let fragment = r#"a <a>link</a>"#;
3590        let result = Builder::new().link_rel(None).clean(fragment);
3591        assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
3592    }
3593    #[cfg(ammonia_unstable)]
3594    #[test]
3595    fn to_dom_node() {
3596        let fragment = r#"a <a>link</a>"#;
3597        let result = Builder::new().link_rel(None).clean(fragment);
3598        let _node = result.to_dom_node();
3599    }
3600    #[test]
3601    fn string_from_document() {
3602        let fragment = r#"a <a>link"#;
3603        let result = String::from(Builder::new().link_rel(None).clean(fragment));
3604        assert_eq!(format!("{}", result), "a <a>link</a>");
3605    }
3606    fn require_sync<T: Sync>(_: T) {}
3607    fn require_send<T: Send>(_: T) {}
3608    #[test]
3609    fn require_sync_and_send() {
3610        require_sync(Builder::new());
3611        require_send(Builder::new());
3612    }
3613    #[test]
3614    fn id_prefixed() {
3615        let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
3616        let result = String::from(
3617            Builder::new()
3618                .tag_attributes(hashmap![
3619                    "a" => hashset!["id"],
3620                ])
3621                .id_prefix(Some("prefix-"))
3622                .clean(fragment),
3623        );
3624        assert_eq!(
3625            result.to_string(),
3626            "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
3627        );
3628    }
3629    #[test]
3630    fn id_already_prefixed() {
3631        let fragment = "<a id=\"prefix-hello\"></a>";
3632        let result = String::from(
3633            Builder::new()
3634                .tag_attributes(hashmap![
3635                    "a" => hashset!["id"],
3636                ])
3637                .id_prefix(Some("prefix-"))
3638                .clean(fragment),
3639        );
3640        assert_eq!(
3641            result.to_string(),
3642            "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
3643        );
3644    }
3645    #[test]
3646    fn clean_content_tags() {
3647        let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
3648        let result = String::from(
3649            Builder::new()
3650                .clean_content_tags(hashset!["script"])
3651                .clean(fragment),
3652        );
3653        assert_eq!(result.to_string(), "");
3654    }
3655    #[test]
3656    fn only_clean_content_tags() {
3657        let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3658        let result = String::from(
3659            Builder::new()
3660                .clean_content_tags(hashset!["script"])
3661                .clean(fragment),
3662        );
3663        assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3664    }
3665    #[test]
3666    fn clean_removed_default_tag() {
3667        let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3668        let result = String::from(
3669            Builder::new()
3670                .rm_tags(hashset!["a"])
3671                .rm_tag_attributes("a", hashset!["href", "hreflang"])
3672                .clean_content_tags(hashset!["script"])
3673                .clean(fragment),
3674        );
3675        assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3676    }
3677    #[test]
3678    #[should_panic]
3679    fn panic_on_clean_content_tag_attribute() {
3680        Builder::new()
3681            .rm_tags(std::iter::once("a"))
3682            .clean_content_tags(hashset!["a"])
3683            .clean("");
3684    }
3685    #[test]
3686    #[should_panic]
3687    fn panic_on_clean_content_tag() {
3688        Builder::new().clean_content_tags(hashset!["a"]).clean("");
3689    }
3690
3691    #[test]
3692    fn clean_text_test() {
3693        assert_eq!(
3694            clean_text("<this> is <a test function"),
3695            "&lt;this&gt;&#32;is&#32;&lt;a&#32;test&#32;function"
3696        );
3697    }
3698
3699    #[test]
3700    fn clean_text_spaces_test() {
3701        assert_eq!(clean_text("\x09\x0a\x0c\x20"), "&#9;&#10;&#12;&#32;");
3702    }
3703
3704    #[test]
3705    fn ns_svg() {
3706        // https://github.com/cure53/DOMPurify/pull/495
3707        let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
3708        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3709        assert_eq!(result.to_string(), "");
3710
3711        let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
3712        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3713        assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3714
3715        let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
3716        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3717        assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3718
3719        let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
3720        let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
3721        assert_eq!(
3722            result.to_string(),
3723            "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
3724        );
3725    }
3726
3727    #[test]
3728    fn ns_svg_2() {
3729        let fragment = "<svg><foreignObject><table><path><xmp><!--</xmp><img title'--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3730        let result =  Builder::default()
3731            .strip_comments(false)
3732            .add_tags(&["svg","foreignObject","table","path","xmp"])
3733            .clean(fragment);
3734        assert_eq!(
3735            result.to_string(),
3736            "<svg><foreignObject><table></table></foreignObject></svg>"
3737        );
3738    }
3739
3740    #[test]
3741    fn ns_mathml() {
3742        // https://github.com/cure53/DOMPurify/pull/495
3743        let fragment = "<mglyph></mglyph>";
3744        let result = String::from(
3745            Builder::new()
3746                .add_tags(&["math", "mtext", "mglyph"])
3747                .clean(fragment),
3748        );
3749        assert_eq!(result.to_string(), "");
3750        let fragment = "<math><mtext><div><mglyph>";
3751        let result = String::from(
3752            Builder::new()
3753                .add_tags(&["math", "mtext", "mglyph"])
3754                .clean(fragment),
3755        );
3756        assert_eq!(
3757            result.to_string(),
3758            "<math><mtext><div></div></mtext></math>"
3759        );
3760        let fragment = "<math><mtext><mglyph>";
3761        let result = String::from(
3762            Builder::new()
3763                .add_tags(&["math", "mtext", "mglyph"])
3764                .clean(fragment),
3765        );
3766        assert_eq!(
3767            result.to_string(),
3768            "<math><mtext><mglyph></mglyph></mtext></math>"
3769        );
3770    }
3771
3772    #[test]
3773    fn ns_mathml_2() {
3774        let fragment = "<math><mtext><table><mglyph><xmp><!--</xmp><img title='--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3775        let result =  Builder::default()
3776            .strip_comments(false)
3777            .add_tags(&["math","mtext","table","mglyph","xmp"])
3778            .clean(fragment);
3779        assert_eq!(
3780            result.to_string(),
3781            "<math><mtext><table></table></mtext></math>"
3782        );
3783    }
3784
3785    #[test]
3786    fn ns_mathml_3() {
3787        // try without the attr
3788        let fragment = "<math><annotation-xml encoding='text/html'><xmp><!--</xmp><img title='--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3789        let result =  Builder::default()
3790            .strip_comments(false)
3791            .add_tags(&["math","annotation-xml","table","mglyph","xmp"])
3792            .clean(fragment);
3793        assert_eq!(
3794            result.to_string(),
3795            "<math><annotation-xml></annotation-xml></math>"
3796        );
3797        // now with the attr
3798        let fragment = "<math><annotation-xml encoding='text/html'><xmp><!--</xmp><img title='--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3799        let result =  Builder::default()
3800            .strip_comments(false)
3801            .add_tags(&["math","annotation-xml","table","mglyph","xmp"])
3802            .add_tag_attribute_values("annotation-xml", "encoding", ["text/html"])
3803            .clean(fragment);
3804        assert_eq!(
3805            result.to_string(),
3806            // yes, I tried it in Firefox, and the script didn't run
3807            r#"<math><annotation-xml encoding="text/html"><xmp><!--</xmp><img title="--&gt;&lt;img src=1 onerror=alert(1)&gt;"></annotation-xml></math>"#
3808        );
3809        // now with a tweaked attr
3810        let fragment = "<math><annotation-xml encoding='image/svg+xml'><xmp><!--</xmp><img title='--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3811        let result =  Builder::default()
3812            .strip_comments(false)
3813            .add_tags(&["math","annotation-xml","table","mglyph","xmp"])
3814            .add_tag_attribute_values("annotation-xml", "encoding", ["image/svg+xml"])
3815            .clean(fragment);
3816        assert_eq!(
3817            result.to_string(),
3818            // yes, I tried it in Firefox, and the script didn't run
3819            r#"<math><annotation-xml encoding="image/svg+xml"></annotation-xml></math>"#
3820        );
3821        // now with actual SVG
3822        let fragment = "<math><annotation-xml encoding='image/svg+xml'><svg>";
3823        let result =  Builder::default()
3824            .strip_comments(false)
3825            .add_tags(&["math","annotation-xml","svg"])
3826            .add_tag_attribute_values("annotation-xml", "encoding", ["image/svg+xml"])
3827            .clean(fragment);
3828        assert_eq!(
3829            result.to_string(),
3830            // yes, I tried it in Firefox, and the script didn't run
3831            r#"<math><annotation-xml encoding="image/svg+xml"><svg></svg></annotation-xml></math>"#
3832        );
3833    }
3834
3835
3836    #[test]
3837    fn xml_processing_instruction() {
3838        // https://blog.slonser.info/posts/dompurify-node-type-confusion/
3839        let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3840        let result = String::from(Builder::new().clean(fragment));
3841        assert_eq!(result.to_string(), "");
3842
3843        let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3844        let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3845        assert_eq!(result.to_string(), "<svg></svg>");
3846
3847        let fragment = r##"<svg><?xml-stylesheet ><img src=x onerror="alert('Ammonia bypassed!!!')"> ?></svg>"##;
3848        let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3849        assert_eq!(result.to_string(), "<svg></svg><img src=\"x\"> ?&gt;");
3850    }
3851
3852    #[test]
3853    fn generic_attribute_prefixes() {
3854        let prefix_data = ["data-"];
3855        let prefix_code = ["code-"];
3856        let mut b = Builder::new();
3857        let mut hs: HashSet<&'_ str> = HashSet::new();
3858        hs.insert("data-");
3859        assert!(b.generic_attribute_prefixes.is_none());
3860        b.generic_attribute_prefixes(hs);
3861        assert!(b.generic_attribute_prefixes.is_some());
3862        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3863        b.add_generic_attribute_prefixes(&prefix_data);
3864        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3865        b.add_generic_attribute_prefixes(&prefix_code);
3866        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
3867        b.rm_generic_attribute_prefixes(&prefix_code);
3868        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3869        b.rm_generic_attribute_prefixes(&prefix_code);
3870        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3871        b.rm_generic_attribute_prefixes(&prefix_data);
3872        assert!(b.generic_attribute_prefixes.is_none());
3873    }
3874
3875    #[test]
3876    fn selectedcontent() {
3877        // https://github.com/servo/html5ever/issues/712
3878        let fragment1 = r#"<select><selectedcontent></selectedcontent><option>X"#;
3879        let fragment2 = r#"<select><selectedcontent></selectedcontent><option>X</option></select>"#;
3880        let expected = r#"<select><selectedcontent></selectedcontent><option>X</option></select>"#;
3881        assert_eq!(String::from(Builder::new().add_tags(&["select", "selectedcontent", "option"]).clean(fragment1)), expected);
3882        assert_eq!(String::from(Builder::new().add_tags(&["select", "selectedcontent", "option"]).clean(fragment2)), expected);
3883    }
3884    
3885    #[test]
3886    fn new_select_parse() {
3887        // https://github.com/whatwg/html/issues/10310#issuecomment-2304377029
3888        let fragment = r#"
3889<select><style></select><img src onerror=xss()></style></select>
3890        "#;
3891        let expected = r#"
3892<select></select>
3893        "#;
3894        assert_eq!(String::from(Builder::new().add_tags(&["select", "new-select"]).clean_content_tags(hashset!["style"]).clean(fragment)), expected);
3895    }
3896
3897    #[test]
3898    fn selectedcontent_not_in_select() {
3899        // https://github.com/whatwg/html/issues/10310#issuecomment-2304377029
3900        let fragment = r#"
3901<selectedcontent>first</selectedcontent>
3902<div><selectedcontent>second</selectedcontent></div>
3903<select><selectedcontent>third</selectedcontent></select>
3904        "#;
3905        let expected = r#"
3906<selectedcontent>first</selectedcontent>
3907<div><selectedcontent>second</selectedcontent></div>
3908<select><selectedcontent></selectedcontent></select>
3909        "#;
3910        assert_eq!(String::from(Builder::new().add_tags(&["select", "selectedcontent"]).clean(fragment)), expected);
3911    }
3912
3913    #[test]
3914    fn generic_attribute_prefixes_clean() {
3915        let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
3916        let result_cleaned = String::from(
3917            Builder::new()
3918                .add_tag_attributes("a", &["data-1"])
3919                .clean(fragment),
3920        );
3921        assert_eq!(
3922            result_cleaned,
3923            r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3924        );
3925        let result_allowed = String::from(
3926            Builder::new()
3927                .add_tag_attributes("a", &["data-1"])
3928                .add_generic_attribute_prefixes(&["data-"])
3929                .clean(fragment),
3930        );
3931        assert_eq!(
3932            result_allowed,
3933            r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3934        );
3935        let result_allowed = String::from(
3936            Builder::new()
3937                .add_tag_attributes("a", &["data-1", "code-1"])
3938                .add_generic_attribute_prefixes(&["data-", "code-"])
3939                .clean(fragment),
3940        );
3941        assert_eq!(
3942            result_allowed,
3943            r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3944        );
3945    }
3946    #[test]
3947    fn lesser_than_isnt_html() {
3948        let fragment = "1 < 2";
3949        assert!(!is_html(fragment));
3950    }
3951    #[test]
3952    fn dense_lesser_than_isnt_html() {
3953        let fragment = "1<2";
3954        assert!(!is_html(fragment));
3955    }
3956    #[test]
3957    fn what_about_number_elements() {
3958        let fragment = "foo<2>bar";
3959        assert!(!is_html(fragment));
3960    }
3961    #[test]
3962    fn turbofish_is_html_sadly() {
3963        let fragment = "Vec::<u8>::new()";
3964        assert!(is_html(fragment));
3965    }
3966    #[test]
3967    fn stop_grinning() {
3968        let fragment = "did you really believe me? <g>";
3969        assert!(is_html(fragment));
3970    }
3971    #[test]
3972    fn dont_be_bold() {
3973        let fragment = "<b>";
3974        assert!(is_html(fragment));
3975    }
3976
3977    #[test]
3978    fn rewrite_with_root() {
3979        let tests = [
3980            (
3981                "https://github.com/rust-ammonia/ammonia/blob/master/",
3982                "README.md",
3983                "",
3984                "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3985            ),
3986            (
3987                "https://github.com/rust-ammonia/ammonia/blob/master/",
3988                "README.md",
3989                "/",
3990                "https://github.com/rust-ammonia/ammonia/blob/master/",
3991            ),
3992            (
3993                "https://github.com/rust-ammonia/ammonia/blob/master/",
3994                "README.md",
3995                "/CONTRIBUTING.md",
3996                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3997            ),
3998            (
3999                "https://github.com/rust-ammonia/ammonia/blob/master",
4000                "README.md",
4001                "",
4002                "https://github.com/rust-ammonia/ammonia/blob/README.md",
4003            ),
4004            (
4005                "https://github.com/rust-ammonia/ammonia/blob/master",
4006                "README.md",
4007                "/",
4008                "https://github.com/rust-ammonia/ammonia/blob/",
4009            ),
4010            (
4011                "https://github.com/rust-ammonia/ammonia/blob/master",
4012                "README.md",
4013                "/CONTRIBUTING.md",
4014                "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md",
4015            ),
4016            (
4017                "https://github.com/rust-ammonia/ammonia/blob/master/",
4018                "",
4019                "",
4020                "https://github.com/rust-ammonia/ammonia/blob/master/",
4021            ),
4022            (
4023                "https://github.com/rust-ammonia/ammonia/blob/master/",
4024                "",
4025                "/",
4026                "https://github.com/rust-ammonia/ammonia/blob/master/",
4027            ),
4028            (
4029                "https://github.com/rust-ammonia/ammonia/blob/master/",
4030                "",
4031                "/CONTRIBUTING.md",
4032                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
4033            ),
4034            (
4035                "https://github.com/",
4036                "rust-ammonia/ammonia/blob/master/README.md",
4037                "",
4038                "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
4039            ),
4040            (
4041                "https://github.com/",
4042                "rust-ammonia/ammonia/blob/master/README.md",
4043                "/",
4044                "https://github.com/",
4045            ),
4046            (
4047                "https://github.com/",
4048                "rust-ammonia/ammonia/blob/master/README.md",
4049                "CONTRIBUTING.md",
4050                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
4051            ),
4052            (
4053                "https://github.com/",
4054                "rust-ammonia/ammonia/blob/master/README.md",
4055                "/CONTRIBUTING.md",
4056                "https://github.com/CONTRIBUTING.md",
4057            ),
4058        ];
4059        for (root, path, url, result) in tests {
4060            let h = format!(r#"<a href="{url}">test</a>"#);
4061            let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#);
4062            let a = Builder::new()
4063                .url_relative(UrlRelative::RewriteWithRoot {
4064                    root: Url::parse(root).unwrap(),
4065                    path: path.to_string(),
4066                })
4067                .clean(&h)
4068                .to_string();
4069            if r != a {
4070                println!(
4071                    "failed to check ({root}, {path}, {url}, {result})\n{r} != {a}",
4072                    r = r
4073                );
4074                assert_eq!(r, a);
4075            }
4076        }
4077    }
4078}