ammonia/
lib.rs

1// Copyright (C) Michael Howell and others
2// this library is released under the same terms as Rust itself.
3
4#![deny(unsafe_code)]
5#![deny(missing_docs)]
6
7//! Ammonia is a whitelist-based HTML sanitization library. It is designed to
8//! prevent cross-site scripting, layout breaking, and clickjacking caused
9//! by untrusted user-provided HTML being mixed into a larger web page.
10//!
11//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do,
12//! so it is extremely resilient to syntactic obfuscation.
13//!
14//! Ammonia parses its input exactly according to the HTML5 specification;
15//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©.
16//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark].
17//!
18//! # Examples
19//!
20//! ```
21//! let result = ammonia::clean(
22//!     "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>"
23//! );
24//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>");
25//! ```
26//!
27//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo"
28//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser"
29
30#[cfg(ammonia_unstable)]
31pub mod rcdom;
32
33#[cfg(not(ammonia_unstable))]
34mod rcdom;
35
36mod style;
37
38use html5ever::interface::Attribute;
39use html5ever::serialize::{serialize, SerializeOpts};
40use html5ever::tree_builder::{NodeOrText, TreeSink};
41use html5ever::{driver as html, local_name, ns, QualName};
42use maplit::{hashmap, hashset};
43use std::sync::LazyLock;
44use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
45use std::borrow::{Borrow, Cow};
46use std::cell::Cell;
47use std::cmp::max;
48use std::collections::{HashMap, HashSet};
49use std::fmt::{self, Display};
50use std::io;
51use std::iter::IntoIterator as IntoIter;
52use std::mem;
53use std::rc::Rc;
54use std::str::FromStr;
55use tendril::stream::TendrilSink;
56use tendril::StrTendril;
57use tendril::{format_tendril, ByteTendril};
58pub use url::Url;
59
60use html5ever::buffer_queue::BufferQueue;
61use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
62pub use url;
63
64static AMMONIA: LazyLock<Builder<'static>> = LazyLock::new(Builder::default);
65
66/// Clean HTML with a conservative set of defaults.
67///
68/// * [tags](struct.Builder.html#defaults)
69/// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1)
70/// * [attributes on specific tags](struct.Builder.html#defaults-2)
71/// * [attributes on all tags](struct.Builder.html#defaults-6)
72/// * [url schemes](struct.Builder.html#defaults-7)
73/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8)
74/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9)
75/// * all `class=""` settings are blocked by default
76/// * comments are stripped by default
77/// * no generic attribute prefixes are turned on by default
78/// * no specific tag-attribute-value settings are configured by default
79///
80/// [opener]: https://mathiasbynens.github.io/rel-noopener/
81/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer
82///
83/// # Examples
84///
85///     assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS")
86pub fn clean(src: &str) -> String {
87    AMMONIA.clean(src).to_string()
88}
89
90/// Turn an arbitrary string into unformatted HTML.
91///
92/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`.
93/// It is as strict as possible, encoding every character that has special meaning to the
94/// HTML parser.
95///
96/// # Warnings
97///
98/// This function cannot be used to package strings into a `<script>` or `<style>` tag;
99/// you need a JavaScript or CSS escaper to do that.
100///
101///     // DO NOT DO THIS
102///     # use ammonia::clean_text;
103///     let untrusted = "Robert\"); abuse();//";
104///     let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted));
105///
106/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded.
107/// If you want to build an editor that works the way most folks expect them to, you should put a
108/// newline at the beginning of the tag, like this:
109///
110///     # use ammonia::{Builder, clean_text};
111///     let untrusted = "\n\nhi!";
112///     let mut b = Builder::new();
113///     b.add_tags(&["textarea"]);
114///     // This is the bad version
115///     // The user put two newlines at the beginning, but the first one was removed
116///     let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string();
117///     assert_eq!("<textarea>\nhi!</textarea>", sanitized);
118///     // This is a good version
119///     // The user put two newlines at the beginning, and we add a third one,
120///     // so the result still has two
121///     let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string();
122///     assert_eq!("<textarea>\n\nhi!</textarea>", sanitized);
123///     // This version is also often considered good
124///     // For many applications, leading and trailing whitespace is probably unwanted
125///     let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string();
126///     assert_eq!("<textarea>hi!</textarea>", sanitized);
127///
128/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`.
129/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes
130/// and paragraph contents.
131pub fn clean_text(src: &str) -> String {
132    let mut ret_val = String::with_capacity(max(4, src.len()));
133    for c in src.chars() {
134        let replacement = match c {
135            // this character, when confronted, will start a tag
136            '<' => "&lt;",
137            // in an unquoted attribute, will end the attribute value
138            '>' => "&gt;",
139            // in an attribute surrounded by double quotes, this character will end the attribute value
140            '\"' => "&quot;",
141            // in an attribute surrounded by single quotes, this character will end the attribute value
142            '\'' => "&apos;",
143            // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes
144            '`' => "&grave;",
145            // in an unquoted attribute, this character will end the attribute
146            '/' => "&#47;",
147            // starts an entity reference
148            '&' => "&amp;",
149            // if at the beginning of an unquoted attribute, will get ignored
150            '=' => "&#61;",
151            // will end an unquoted attribute
152            ' ' => "&#32;",
153            '\t' => "&#9;",
154            '\n' => "&#10;",
155            '\x0c' => "&#12;",
156            '\r' => "&#13;",
157            // a spec-compliant browser will perform this replacement anyway, but the middleware might not
158            '\0' => "&#65533;",
159            // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM
160            _ => {
161                ret_val.push(c);
162                continue;
163            }
164        };
165        ret_val.push_str(replacement);
166    }
167    ret_val
168}
169
170/// Determine if a given string contains HTML
171///
172/// This function is parses the full string into HTML and checks if the input contained any
173/// HTML syntax.
174///
175/// # Note
176/// This function will return positively for strings that contain invalid HTML syntax like
177/// `<g>` and even `Vec::<u8>::new()`.
178pub fn is_html(input: &str) -> bool {
179    let santok = SanitizationTokenizer::new();
180    let mut chunk = ByteTendril::new();
181    chunk.push_slice(input.as_bytes());
182    let mut input = BufferQueue::default();
183    input.push_back(chunk.try_reinterpret().unwrap());
184
185    let tok = Tokenizer::new(santok, Default::default());
186    let _ = tok.feed(&mut input);
187    tok.end();
188    tok.sink.was_sanitized.get()
189}
190
191#[derive(Clone)]
192struct SanitizationTokenizer {
193    was_sanitized: Cell<bool>,
194}
195
196impl SanitizationTokenizer {
197    pub fn new() -> SanitizationTokenizer {
198        SanitizationTokenizer {
199            was_sanitized: false.into(),
200        }
201    }
202}
203
204impl TokenSink for SanitizationTokenizer {
205    type Handle = ();
206    fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
207        match token {
208            Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
209            _ => {
210                self.was_sanitized.set(true);
211            }
212        }
213        TokenSinkResult::Continue
214    }
215    fn end(&self) {}
216}
217
218/// An HTML sanitizer.
219///
220/// Given a fragment of HTML, Ammonia will parse it according to the HTML5
221/// parsing algorithm and sanitize any disallowed tags or attributes. This
222/// algorithm also takes care of things like unclosed and (some) misnested
223/// tags.
224///
225/// # Examples
226///
227///     use ammonia::{Builder, UrlRelative};
228///
229///     let a = Builder::default()
230///         .link_rel(None)
231///         .url_relative(UrlRelative::PassThrough)
232///         .clean("<a href=/>test")
233///         .to_string();
234///     assert_eq!(
235///         a,
236///         "<a href=\"/\">test</a>");
237///
238/// # Panics
239///
240/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is
241/// configured with any of these (contradictory) settings:
242///
243///  * The `rel` attribute is added to [`generic_attributes`] or the
244///    [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`.
245///
246///    For example, this is going to panic, since [`link_rel`] is set  to
247///    `Some("noopener noreferrer")` by default,
248///    and it makes no sense to simultaneously say that the user is allowed to
249///    set their own `rel` attribute while saying that every link shall be set to
250///    a particular value:
251///
252///    ```should_panic
253///    use ammonia::Builder;
254///    use maplit::hashset;
255///
256///    # fn main() {
257///    Builder::default()
258///        .generic_attributes(hashset!["rel"])
259///        .clean("");
260///    # }
261///    ```
262///
263///    This, however, is perfectly valid:
264///
265///    ```
266///    use ammonia::Builder;
267///    use maplit::hashset;
268///
269///    # fn main() {
270///    Builder::default()
271///        .generic_attributes(hashset!["rel"])
272///        .link_rel(None)
273///        .clean("");
274///    # }
275///    ```
276///
277///  * The `class` attribute is in [`allowed_classes`] and is in the
278///    corresponding [`tag_attributes`] or in [`generic_attributes`].
279///
280///    This is done both to line up with the treatment of `rel`,
281///    and to prevent people from accidentally allowing arbitrary
282///    classes on a particular element.
283///
284///    This will panic:
285///
286///    ```should_panic
287///    use ammonia::Builder;
288///    use maplit::{hashmap, hashset};
289///
290///    # fn main() {
291///    Builder::default()
292///        .generic_attributes(hashset!["class"])
293///        .allowed_classes(hashmap!["span" => hashset!["hidden"]])
294///        .clean("");
295///    # }
296///    ```
297///
298///    This, however, is perfectly valid:
299///
300///    ```
301///    use ammonia::Builder;
302///    use maplit::{hashmap, hashset};
303///
304///    # fn main() {
305///    Builder::default()
306///        .allowed_classes(hashmap!["span" => hashset!["hidden"]])
307///        .clean("");
308///    # }
309///    ```
310///
311///  * A tag is in either [`tags`] or [`tag_attributes`] while also
312///    being in [`clean_content_tags`].
313///
314///    Both [`tags`] and [`tag_attributes`] are whitelists but
315///    [`clean_content_tags`] is a blacklist, so it doesn't make sense
316///    to have the same tag in both.
317///
318///    For example, this will panic, since the `aside` tag is in
319///    [`tags`] by default:
320///
321///    ```should_panic
322///    use ammonia::Builder;
323///    use maplit::hashset;
324///
325///    # fn main() {
326///    Builder::default()
327///        .clean_content_tags(hashset!["aside"])
328///        .clean("");
329///    # }
330///    ```
331///
332///    This, however, is valid:
333///
334///    ```
335///    use ammonia::Builder;
336///    use maplit::hashset;
337///
338///    # fn main() {
339///    Builder::default()
340///        .rm_tags(&["aside"])
341///        .clean_content_tags(hashset!["aside"])
342///        .clean("");
343///    # }
344///    ```
345///
346/// [`clean`]: #method.clean
347/// [`clean_from_reader`]: #method.clean_from_reader
348/// [`generic_attributes`]: #method.generic_attributes
349/// [`tag_attributes`]: #method.tag_attributes
350/// [`generic_attributes`]: #method.generic_attributes
351/// [`link_rel`]: #method.link_rel
352/// [`allowed_classes`]: #method.allowed_classes
353/// [`id_prefix`]: #method.id_prefix
354/// [`tags`]: #method.tags
355/// [`clean_content_tags`]: #method.clean_content_tags
356#[derive(Debug)]
357pub struct Builder<'a> {
358    tags: HashSet<&'a str>,
359    clean_content_tags: HashSet<&'a str>,
360    tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
361    tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
362    set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
363    generic_attributes: HashSet<&'a str>,
364    url_schemes: HashSet<&'a str>,
365    url_relative: UrlRelative<'a>,
366    attribute_filter: Option<Box<dyn AttributeFilter>>,
367    link_rel: Option<&'a str>,
368    allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
369    strip_comments: bool,
370    id_prefix: Option<&'a str>,
371    generic_attribute_prefixes: Option<HashSet<&'a str>>,
372    style_properties: Option<HashSet<&'a str>>,
373}
374
375impl<'a> Default for Builder<'a> {
376    fn default() -> Self {
377        #[rustfmt::skip]
378        let tags = hashset![
379            "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
380            "bdo", "blockquote", "br", "caption", "center", "cite", "code",
381            "col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
382            "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
383            "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
384            "ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
385            "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
386            "strike", "strong", "sub", "summary", "sup", "table", "tbody",
387            "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
388        ];
389        let clean_content_tags = hashset!["script", "style"];
390        let generic_attributes = hashset!["lang", "title"];
391        let tag_attributes = hashmap![
392            "a" => hashset![
393                "href", "hreflang"
394            ],
395            "bdo" => hashset![
396                "dir"
397            ],
398            "blockquote" => hashset![
399                "cite"
400            ],
401            "col" => hashset![
402                "align", "char", "charoff", "span"
403            ],
404            "colgroup" => hashset![
405                "align", "char", "charoff", "span"
406            ],
407            "del" => hashset![
408                "cite", "datetime"
409            ],
410            "hr" => hashset![
411                "align", "size", "width"
412            ],
413            "img" => hashset![
414                "align", "alt", "height", "src", "width"
415            ],
416            "ins" => hashset![
417                "cite", "datetime"
418            ],
419            "ol" => hashset![
420                "start"
421            ],
422            "q" => hashset![
423                "cite"
424            ],
425            "table" => hashset![
426                "align", "char", "charoff", "summary"
427            ],
428            "tbody" => hashset![
429                "align", "char", "charoff"
430            ],
431            "td" => hashset![
432                "align", "char", "charoff", "colspan", "headers", "rowspan"
433            ],
434            "tfoot" => hashset![
435                "align", "char", "charoff"
436            ],
437            "th" => hashset![
438                "align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
439            ],
440            "thead" => hashset![
441                "align", "char", "charoff"
442            ],
443            "tr" => hashset![
444                "align", "char", "charoff"
445            ],
446        ];
447        let tag_attribute_values = hashmap![];
448        let set_tag_attribute_values = hashmap![];
449        let url_schemes = hashset![
450            "bitcoin",
451            "ftp",
452            "ftps",
453            "geo",
454            "http",
455            "https",
456            "im",
457            "irc",
458            "ircs",
459            "magnet",
460            "mailto",
461            "mms",
462            "mx",
463            "news",
464            "nntp",
465            "openpgp4fpr",
466            "sip",
467            "sms",
468            "smsto",
469            "ssh",
470            "tel",
471            "url",
472            "webcal",
473            "wtai",
474            "xmpp"
475        ];
476        let allowed_classes = hashmap![];
477
478        Builder {
479            tags,
480            clean_content_tags,
481            tag_attributes,
482            tag_attribute_values,
483            set_tag_attribute_values,
484            generic_attributes,
485            url_schemes,
486            url_relative: UrlRelative::PassThrough,
487            attribute_filter: None,
488            link_rel: Some("noopener noreferrer"),
489            allowed_classes,
490            strip_comments: true,
491            id_prefix: None,
492            generic_attribute_prefixes: None,
493            style_properties: None,
494        }
495    }
496}
497
498impl<'a> Builder<'a> {
499    /// Sets the tags that are allowed.
500    ///
501    /// # Examples
502    ///
503    ///     use ammonia::Builder;
504    ///     use maplit::hashset;
505    ///
506    ///     # fn main() {
507    ///     let tags = hashset!["my-tag"];
508    ///     let a = Builder::new()
509    ///         .tags(tags)
510    ///         .clean("<my-tag>")
511    ///         .to_string();
512    ///     assert_eq!(a, "<my-tag></my-tag>");
513    ///     # }
514    ///
515    /// # Defaults
516    ///
517    /// ```notest
518    /// a, abbr, acronym, area, article, aside, b, bdi,
519    /// bdo, blockquote, br, caption, center, cite, code,
520    /// col, colgroup, data, dd, del, details, dfn, div,
521    /// dl, dt, em, figcaption, figure, footer, h1, h2,
522    /// h3, h4, h5, h6, header, hgroup, hr, i, img,
523    /// ins, kbd, li, map, mark, nav, ol, p, pre,
524    /// q, rp, rt, rtc, ruby, s, samp, small, span,
525    /// strike, strong, sub, summary, sup, table, tbody,
526    /// td, th, thead, time, tr, tt, u, ul, var, wbr
527    /// ```
528    pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
529        self.tags = value;
530        self
531    }
532
533    /// Add additonal whitelisted tags without overwriting old ones.
534    ///
535    /// Does nothing if the tag is already there.
536    ///
537    /// # Examples
538    ///
539    ///     let a = ammonia::Builder::default()
540    ///         .add_tags(&["my-tag"])
541    ///         .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
542    ///     assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a);
543    pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
544        &mut self,
545        it: I,
546    ) -> &mut Self {
547        self.tags.extend(it.into_iter().map(Borrow::borrow));
548        self
549    }
550
551    /// Remove already-whitelisted tags.
552    ///
553    /// Does nothing if the tags is already gone.
554    ///
555    /// # Examples
556    ///
557    ///     let a = ammonia::Builder::default()
558    ///         .rm_tags(&["span"])
559    ///         .clean("<span></span>").to_string();
560    ///     assert_eq!("", a);
561    pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
562        &mut self,
563        it: I,
564    ) -> &mut Self {
565        for i in it {
566            self.tags.remove(i.borrow());
567        }
568        self
569    }
570
571    /// Returns a copy of the set of whitelisted tags.
572    ///
573    /// # Examples
574    ///
575    ///     use maplit::hashset;
576    ///
577    ///     let tags = hashset!["my-tag-1", "my-tag-2"];
578    ///
579    ///     let mut b = ammonia::Builder::default();
580    ///     b.tags(Clone::clone(&tags));
581    ///     assert_eq!(tags, b.clone_tags());
582    pub fn clone_tags(&self) -> HashSet<&'a str> {
583        self.tags.clone()
584    }
585
586    /// Sets the tags whose contents will be completely removed from the output.
587    ///
588    /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
589    /// a panic.
590    ///
591    /// # Examples
592    ///
593    ///     use ammonia::Builder;
594    ///     use maplit::hashset;
595    ///
596    ///     # fn main() {
597    ///     let tag_blacklist = hashset!["script", "style"];
598    ///     let a = Builder::new()
599    ///         .clean_content_tags(tag_blacklist)
600    ///         .clean("<script>alert('hello')</script><style>a { background: #fff }</style>")
601    ///         .to_string();
602    ///     assert_eq!(a, "");
603    ///     # }
604    ///
605    /// # Defaults
606    ///
607    /// ```notest
608    /// script, style
609    /// ```
610    pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
611        self.clean_content_tags = value;
612        self
613    }
614
615    /// Add additonal blacklisted clean-content tags without overwriting old ones.
616    ///
617    /// Does nothing if the tag is already there.
618    ///
619    /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
620    /// a panic.
621    ///
622    /// # Examples
623    ///
624    ///     let a = ammonia::Builder::default()
625    ///         .add_clean_content_tags(&["my-tag"])
626    ///         .clean("<my-tag>test</my-tag><span>mess</span>").to_string();
627    ///     assert_eq!("<span>mess</span>", a);
628    pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
629        &mut self,
630        it: I,
631    ) -> &mut Self {
632        self.clean_content_tags
633            .extend(it.into_iter().map(Borrow::borrow));
634        self
635    }
636
637    /// Remove already-blacklisted clean-content tags.
638    ///
639    /// Does nothing if the tags aren't blacklisted.
640    ///
641    /// # Examples
642    ///     use ammonia::Builder;
643    ///     use maplit::hashset;
644    ///
645    ///     # fn main() {
646    ///     let tag_blacklist = hashset!["script"];
647    ///     let a = ammonia::Builder::default()
648    ///         .clean_content_tags(tag_blacklist)
649    ///         .rm_clean_content_tags(&["script"])
650    ///         .clean("<script>XSS</script>").to_string();
651    ///     assert_eq!("XSS", a);
652    ///     # }
653    pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
654        &mut self,
655        it: I,
656    ) -> &mut Self {
657        for i in it {
658            self.clean_content_tags.remove(i.borrow());
659        }
660        self
661    }
662
663    /// Returns a copy of the set of blacklisted clean-content tags.
664    ///
665    /// # Examples
666    ///     # use maplit::hashset;
667    ///
668    ///     let tags = hashset!["my-tag-1", "my-tag-2"];
669    ///
670    ///     let mut b = ammonia::Builder::default();
671    ///     b.clean_content_tags(Clone::clone(&tags));
672    ///     assert_eq!(tags, b.clone_clean_content_tags());
673    pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
674        self.clean_content_tags.clone()
675    }
676
677    /// Sets the HTML attributes that are allowed on specific tags.
678    ///
679    /// The value is structured as a map from tag names to a set of attribute names.
680    ///
681    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
682    ///
683    /// # Examples
684    ///
685    ///     use ammonia::Builder;
686    ///     use maplit::{hashmap, hashset};
687    ///
688    ///     # fn main() {
689    ///     let tags = hashset!["my-tag"];
690    ///     let tag_attributes = hashmap![
691    ///         "my-tag" => hashset!["val"]
692    ///     ];
693    ///     let a = Builder::new().tags(tags).tag_attributes(tag_attributes)
694    ///         .clean("<my-tag val=1>")
695    ///         .to_string();
696    ///     assert_eq!(a, "<my-tag val=\"1\"></my-tag>");
697    ///     # }
698    ///
699    /// # Defaults
700    ///
701    /// ```notest
702    /// a =>
703    ///     href, hreflang
704    /// bdo =>
705    ///     dir
706    /// blockquote =>
707    ///     cite
708    /// col =>
709    ///     align, char, charoff, span
710    /// colgroup =>
711    ///     align, char, charoff, span
712    /// del =>
713    ///     cite, datetime
714    /// hr =>
715    ///     align, size, width
716    /// img =>
717    ///     align, alt, height, src, width
718    /// ins =>
719    ///     cite, datetime
720    /// ol =>
721    ///     start
722    /// q =>
723    ///     cite
724    /// table =>
725    ///     align, char, charoff, summary
726    /// tbody =>
727    ///     align, char, charoff
728    /// td =>
729    ///     align, char, charoff, colspan, headers, rowspan
730    /// tfoot =>
731    ///     align, char, charoff
732    /// th =>
733    ///     align, char, charoff, colspan, headers, rowspan, scope
734    /// thead =>
735    ///     align, char, charoff
736    /// tr =>
737    ///     align, char, charoff
738    /// ```
739    pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
740        self.tag_attributes = value;
741        self
742    }
743
744    /// Add additonal whitelisted tag-specific attributes without overwriting old ones.
745    ///
746    /// # Examples
747    ///
748    ///     let a = ammonia::Builder::default()
749    ///         .add_tags(&["my-tag"])
750    ///         .add_tag_attributes("my-tag", &["my-attr"])
751    ///         .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
752    ///     assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
753    pub fn add_tag_attributes<
754        T: 'a + ?Sized + Borrow<str>,
755        U: 'a + ?Sized + Borrow<str>,
756        I: IntoIter<Item = &'a T>,
757    >(
758        &mut self,
759        tag: &'a U,
760        it: I,
761    ) -> &mut Self {
762        self.tag_attributes
763            .entry(tag.borrow())
764            .or_default()
765            .extend(it.into_iter().map(Borrow::borrow));
766        self
767    }
768
769    /// Remove already-whitelisted tag-specific attributes.
770    ///
771    /// Does nothing if the attribute is already gone.
772    ///
773    /// # Examples
774    ///
775    ///     let a = ammonia::Builder::default()
776    ///         .rm_tag_attributes("a", &["href"])
777    ///         .clean("<a href=\"/\"></a>").to_string();
778    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
779    pub fn rm_tag_attributes<
780        'b,
781        'c,
782        T: 'b + ?Sized + Borrow<str>,
783        U: 'c + ?Sized + Borrow<str>,
784        I: IntoIter<Item = &'b T>,
785    >(
786        &mut self,
787        tag: &'c U,
788        it: I,
789    ) -> &mut Self {
790        if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
791            for i in it {
792                tag.remove(i.borrow());
793            }
794        }
795        self
796    }
797
798    /// Returns a copy of the set of whitelisted tag-specific attributes.
799    ///
800    /// # Examples
801    ///     use maplit::{hashmap, hashset};
802    ///
803    ///     let tag_attributes = hashmap![
804    ///         "my-tag" => hashset!["my-attr-1", "my-attr-2"]
805    ///     ];
806    ///
807    ///     let mut b = ammonia::Builder::default();
808    ///     b.tag_attributes(Clone::clone(&tag_attributes));
809    ///     assert_eq!(tag_attributes, b.clone_tag_attributes());
810    pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
811        self.tag_attributes.clone()
812    }
813
814    /// Sets the values of HTML attributes that are allowed on specific tags.
815    ///
816    /// The value is structured as a map from tag names to a map from attribute names to a set of
817    /// attribute values.
818    ///
819    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
820    ///
821    /// # Examples
822    ///
823    ///     use ammonia::Builder;
824    ///     use maplit::{hashmap, hashset};
825    ///
826    ///     # fn main() {
827    ///     let tags = hashset!["my-tag"];
828    ///     let tag_attribute_values = hashmap![
829    ///         "my-tag" => hashmap![
830    ///             "my-attr" => hashset!["val"],
831    ///         ],
832    ///     ];
833    ///     let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values)
834    ///         .clean("<my-tag my-attr=val>")
835    ///         .to_string();
836    ///     assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
837    ///     # }
838    ///
839    /// # Defaults
840    ///
841    /// None.
842    pub fn tag_attribute_values(
843        &mut self,
844        value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
845    ) -> &mut Self {
846        self.tag_attribute_values = value;
847        self
848    }
849
850    /// Add additonal whitelisted tag-specific attribute values without overwriting old ones.
851    ///
852    /// # Examples
853    ///
854    ///     let a = ammonia::Builder::default()
855    ///         .add_tags(&["my-tag"])
856    ///         .add_tag_attribute_values("my-tag", "my-attr", &[""])
857    ///         .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
858    ///     assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
859    pub fn add_tag_attribute_values<
860        T: 'a + ?Sized + Borrow<str>,
861        U: 'a + ?Sized + Borrow<str>,
862        V: 'a + ?Sized + Borrow<str>,
863        I: IntoIter<Item = &'a T>,
864    >(
865        &mut self,
866        tag: &'a U,
867        attribute: &'a V,
868        it: I,
869    ) -> &mut Self {
870        self.tag_attribute_values
871            .entry(tag.borrow())
872            .or_default()
873            .entry(attribute.borrow())
874            .or_default()
875            .extend(it.into_iter().map(Borrow::borrow));
876
877        self
878    }
879
880    /// Remove already-whitelisted tag-specific attribute values.
881    ///
882    /// Does nothing if the attribute or the value is already gone.
883    ///
884    /// # Examples
885    ///
886    ///     let a = ammonia::Builder::default()
887    ///         .rm_tag_attributes("a", &["href"])
888    ///         .add_tag_attribute_values("a", "href", &["/"])
889    ///         .rm_tag_attribute_values("a", "href", &["/"])
890    ///         .clean("<a href=\"/\"></a>").to_string();
891    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
892    pub fn rm_tag_attribute_values<
893        'b,
894        'c,
895        T: 'b + ?Sized + Borrow<str>,
896        U: 'c + ?Sized + Borrow<str>,
897        V: 'c + ?Sized + Borrow<str>,
898        I: IntoIter<Item = &'b T>,
899    >(
900        &mut self,
901        tag: &'c U,
902        attribute: &'c V,
903        it: I,
904    ) -> &mut Self {
905        if let Some(attrs) = self
906            .tag_attribute_values
907            .get_mut(tag.borrow())
908            .and_then(|map| map.get_mut(attribute.borrow()))
909        {
910            for i in it {
911                attrs.remove(i.borrow());
912            }
913        }
914        self
915    }
916
917    /// Returns a copy of the set of whitelisted tag-specific attribute values.
918    ///
919    /// # Examples
920    ///
921    ///     use maplit::{hashmap, hashset};
922    ///
923    ///     let attribute_values = hashmap![
924    ///         "my-attr-1" => hashset!["foo"],
925    ///         "my-attr-2" => hashset!["baz", "bar"],
926    ///     ];
927    ///     let tag_attribute_values = hashmap![
928    ///         "my-tag" => attribute_values
929    ///     ];
930    ///
931    ///     let mut b = ammonia::Builder::default();
932    ///     b.tag_attribute_values(Clone::clone(&tag_attribute_values));
933    ///     assert_eq!(tag_attribute_values, b.clone_tag_attribute_values());
934    pub fn clone_tag_attribute_values(
935        &self,
936    ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
937        self.tag_attribute_values.clone()
938    }
939
940    /// Sets the values of HTML attributes that are to be set on specific tags.
941    ///
942    /// The value is structured as a map from tag names to a map from attribute names to an
943    /// attribute value.
944    ///
945    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
946    ///
947    /// # Examples
948    ///
949    ///     use ammonia::Builder;
950    ///     use maplit::{hashmap, hashset};
951    ///
952    ///     # fn main() {
953    ///     let tags = hashset!["my-tag"];
954    ///     let set_tag_attribute_values = hashmap![
955    ///         "my-tag" => hashmap![
956    ///             "my-attr" => "val",
957    ///         ],
958    ///     ];
959    ///     let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values)
960    ///         .clean("<my-tag>")
961    ///         .to_string();
962    ///     assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
963    ///     # }
964    ///
965    /// # Defaults
966    ///
967    /// None.
968    pub fn set_tag_attribute_values(
969        &mut self,
970        value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
971    ) -> &mut Self {
972        self.set_tag_attribute_values = value;
973        self
974    }
975
976    /// Add an attribute value to set on a specific element.
977    ///
978    /// # Examples
979    ///
980    ///     let a = ammonia::Builder::default()
981    ///         .add_tags(&["my-tag"])
982    ///         .set_tag_attribute_value("my-tag", "my-attr", "val")
983    ///         .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
984    ///     assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a);
985    pub fn set_tag_attribute_value<
986        T: 'a + ?Sized + Borrow<str>,
987        A: 'a + ?Sized + Borrow<str>,
988        V: 'a + ?Sized + Borrow<str>,
989    >(
990        &mut self,
991        tag: &'a T,
992        attribute: &'a A,
993        value: &'a V,
994    ) -> &mut Self {
995        self.set_tag_attribute_values
996            .entry(tag.borrow())
997            .or_default()
998            .insert(attribute.borrow(), value.borrow());
999        self
1000    }
1001
1002    /// Remove existing tag-specific attribute values to be set.
1003    ///
1004    /// Does nothing if the attribute is already gone.
1005    ///
1006    /// # Examples
1007    ///
1008    ///     let a = ammonia::Builder::default()
1009    ///         // this does nothing, since no value is set for this tag attribute yet
1010    ///         .rm_set_tag_attribute_value("a", "target")
1011    ///         .set_tag_attribute_value("a", "target", "_blank")
1012    ///         .rm_set_tag_attribute_value("a", "target")
1013    ///         .clean("<a href=\"/\"></a>").to_string();
1014    ///     assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a);
1015    pub fn rm_set_tag_attribute_value<
1016        T: 'a + ?Sized + Borrow<str>,
1017        A: 'a + ?Sized + Borrow<str>,
1018    >(
1019        &mut self,
1020        tag: &'a T,
1021        attribute: &'a A,
1022    ) -> &mut Self {
1023        if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
1024            attributes.remove(attribute.borrow());
1025        }
1026        self
1027    }
1028
1029    /// Returns the value that will be set for the attribute on the element, if any.
1030    ///
1031    /// # Examples
1032    ///
1033    ///     let mut b = ammonia::Builder::default();
1034    ///     b.set_tag_attribute_value("a", "target", "_blank");
1035    ///     let value = b.get_set_tag_attribute_value("a", "target");
1036    ///     assert_eq!(value, Some("_blank"));
1037    pub fn get_set_tag_attribute_value<
1038        T: 'a + ?Sized + Borrow<str>,
1039        A: 'a + ?Sized + Borrow<str>,
1040    >(
1041        &self,
1042        tag: &'a T,
1043        attribute: &'a A,
1044    ) -> Option<&'a str> {
1045        self.set_tag_attribute_values
1046            .get(tag.borrow())
1047            .and_then(|map| map.get(attribute.borrow()))
1048            .copied()
1049    }
1050
1051    /// Returns a copy of the set of tag-specific attribute values to be set.
1052    ///
1053    /// # Examples
1054    ///
1055    ///     use maplit::{hashmap, hashset};
1056    ///
1057    ///     let attribute_values = hashmap![
1058    ///         "my-attr-1" => "foo",
1059    ///         "my-attr-2" => "bar",
1060    ///     ];
1061    ///     let set_tag_attribute_values = hashmap![
1062    ///         "my-tag" => attribute_values,
1063    ///     ];
1064    ///
1065    ///     let mut b = ammonia::Builder::default();
1066    ///     b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values));
1067    ///     assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values());
1068    pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
1069        self.set_tag_attribute_values.clone()
1070    }
1071
1072    /// Sets the prefix of attributes that are allowed on any tag.
1073    ///
1074    /// # Examples
1075    ///
1076    ///     use ammonia::Builder;
1077    ///     use maplit::hashset;
1078    ///
1079    ///     # fn main() {
1080    ///     let prefixes = hashset!["data-"];
1081    ///     let a = Builder::new()
1082    ///         .generic_attribute_prefixes(prefixes)
1083    ///         .clean("<b data-val=1>")
1084    ///         .to_string();
1085    ///     assert_eq!(a, "<b data-val=\"1\"></b>");
1086    ///     # }
1087    ///
1088    /// # Defaults
1089    ///
1090    /// No attribute prefixes are allowed by default.
1091    pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1092        self.generic_attribute_prefixes = Some(value);
1093        self
1094    }
1095
1096    /// Add additional whitelisted attribute prefix without overwriting old ones.
1097    ///
1098    /// # Examples
1099    ///
1100    ///     let a = ammonia::Builder::default()
1101    ///         .add_generic_attribute_prefixes(&["my-"])
1102    ///         .clean("<span my-attr>mess</span>").to_string();
1103    ///     assert_eq!("<span my-attr=\"\">mess</span>", a);
1104    pub fn add_generic_attribute_prefixes<
1105        T: 'a + ?Sized + Borrow<str>,
1106        I: IntoIter<Item = &'a T>,
1107    >(
1108        &mut self,
1109        it: I,
1110    ) -> &mut Self {
1111        self.generic_attribute_prefixes
1112            .get_or_insert_with(HashSet::new)
1113            .extend(it.into_iter().map(Borrow::borrow));
1114        self
1115    }
1116
1117    /// Remove already-whitelisted attribute prefixes.
1118    ///
1119    /// Does nothing if the attribute prefix is already gone.
1120    ///
1121    /// # Examples
1122    ///
1123    ///     let a = ammonia::Builder::default()
1124    ///         .add_generic_attribute_prefixes(&["data-", "code-"])
1125    ///         .rm_generic_attribute_prefixes(&["data-"])
1126    ///         .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string();
1127    ///     assert_eq!("<span code-test=\"foo\"></span>", a);
1128    pub fn rm_generic_attribute_prefixes<
1129        'b,
1130        T: 'b + ?Sized + Borrow<str>,
1131        I: IntoIter<Item = &'b T>,
1132    >(
1133        &mut self,
1134        it: I,
1135    ) -> &mut Self {
1136        if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
1137            for i in it {
1138                let _ = prefixes.remove(i.borrow());
1139            }
1140            prefixes.is_empty()
1141        }) {
1142            self.generic_attribute_prefixes = None;
1143        }
1144        self
1145    }
1146
1147    /// Returns a copy of the set of whitelisted attribute prefixes.
1148    ///
1149    /// # Examples
1150    ///
1151    ///     use maplit::hashset;
1152    ///
1153    ///     let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"];
1154    ///
1155    ///     let mut b = ammonia::Builder::default();
1156    ///     b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes));
1157    ///     assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes());
1158    pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
1159        self.generic_attribute_prefixes.clone()
1160    }
1161
1162    /// Sets the attributes that are allowed on any tag.
1163    ///
1164    /// # Examples
1165    ///
1166    ///     use ammonia::Builder;
1167    ///     use maplit::hashset;
1168    ///
1169    ///     # fn main() {
1170    ///     let attributes = hashset!["data-val"];
1171    ///     let a = Builder::new()
1172    ///         .generic_attributes(attributes)
1173    ///         .clean("<b data-val=1>")
1174    ///         .to_string();
1175    ///     assert_eq!(a, "<b data-val=\"1\"></b>");
1176    ///     # }
1177    ///
1178    /// # Defaults
1179    ///
1180    /// ```notest
1181    /// lang, title
1182    /// ```
1183    pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1184        self.generic_attributes = value;
1185        self
1186    }
1187
1188    /// Add additonal whitelisted attributes without overwriting old ones.
1189    ///
1190    /// # Examples
1191    ///
1192    ///     let a = ammonia::Builder::default()
1193    ///         .add_generic_attributes(&["my-attr"])
1194    ///         .clean("<span my-attr>mess</span>").to_string();
1195    ///     assert_eq!("<span my-attr=\"\">mess</span>", a);
1196    pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1197        &mut self,
1198        it: I,
1199    ) -> &mut Self {
1200        self.generic_attributes
1201            .extend(it.into_iter().map(Borrow::borrow));
1202        self
1203    }
1204
1205    /// Remove already-whitelisted attributes.
1206    ///
1207    /// Does nothing if the attribute is already gone.
1208    ///
1209    /// # Examples
1210    ///
1211    ///     let a = ammonia::Builder::default()
1212    ///         .rm_generic_attributes(&["title"])
1213    ///         .clean("<span title=\"cool\"></span>").to_string();
1214    ///     assert_eq!("<span></span>", a);
1215    pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1216        &mut self,
1217        it: I,
1218    ) -> &mut Self {
1219        for i in it {
1220            self.generic_attributes.remove(i.borrow());
1221        }
1222        self
1223    }
1224
1225    /// Returns a copy of the set of whitelisted attributes.
1226    ///
1227    /// # Examples
1228    ///
1229    ///     use maplit::hashset;
1230    ///
1231    ///     let generic_attributes = hashset!["my-attr-1", "my-attr-2"];
1232    ///
1233    ///     let mut b = ammonia::Builder::default();
1234    ///     b.generic_attributes(Clone::clone(&generic_attributes));
1235    ///     assert_eq!(generic_attributes, b.clone_generic_attributes());
1236    pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
1237        self.generic_attributes.clone()
1238    }
1239
1240    /// Sets the URL schemes permitted on `href` and `src` attributes.
1241    ///
1242    /// # Examples
1243    ///
1244    ///     use ammonia::Builder;
1245    ///     use maplit::hashset;
1246    ///
1247    ///     # fn main() {
1248    ///     let url_schemes = hashset![
1249    ///         "http", "https", "mailto", "magnet"
1250    ///     ];
1251    ///     let a = Builder::new().url_schemes(url_schemes)
1252    ///         .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>")
1253    ///         .to_string();
1254    ///
1255    ///     // See `link_rel` for information on the rel="noopener noreferrer" attribute
1256    ///     // in the cleaned HTML.
1257    ///     assert_eq!(a,
1258    ///       "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&amp;xl=0&amp;dn=zero_len.fil&amp;xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&amp;xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>");
1259    ///     # }
1260    ///
1261    /// # Defaults
1262    ///
1263    /// ```notest
1264    /// bitcoin, ftp, ftps, geo, http, https, im, irc,
1265    /// ircs, magnet, mailto, mms, mx, news, nntp,
1266    /// openpgp4fpr, sip, sms, smsto, ssh, tel, url,
1267    /// webcal, wtai, xmpp
1268    /// ```
1269    pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1270        self.url_schemes = value;
1271        self
1272    }
1273
1274    /// Add additonal whitelisted URL schemes without overwriting old ones.
1275    ///
1276    /// # Examples
1277    ///
1278    ///     let a = ammonia::Builder::default()
1279    ///         .add_url_schemes(&["my-scheme"])
1280    ///         .clean("<a href=my-scheme:home>mess</span>").to_string();
1281    ///     assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a);
1282    pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1283        &mut self,
1284        it: I,
1285    ) -> &mut Self {
1286        self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
1287        self
1288    }
1289
1290    /// Remove already-whitelisted attributes.
1291    ///
1292    /// Does nothing if the attribute is already gone.
1293    ///
1294    /// # Examples
1295    ///
1296    ///     let a = ammonia::Builder::default()
1297    ///         .rm_url_schemes(&["ftp"])
1298    ///         .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string();
1299    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
1300    pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1301        &mut self,
1302        it: I,
1303    ) -> &mut Self {
1304        for i in it {
1305            self.url_schemes.remove(i.borrow());
1306        }
1307        self
1308    }
1309
1310    /// Returns a copy of the set of whitelisted URL schemes.
1311    ///
1312    /// # Examples
1313    ///     use maplit::hashset;
1314    ///
1315    ///     let url_schemes = hashset!["my-scheme-1", "my-scheme-2"];
1316    ///
1317    ///     let mut b = ammonia::Builder::default();
1318    ///     b.url_schemes(Clone::clone(&url_schemes));
1319    ///     assert_eq!(url_schemes, b.clone_url_schemes());
1320    pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
1321        self.url_schemes.clone()
1322    }
1323
1324    /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny.
1325    ///
1326    /// # Examples
1327    ///
1328    ///     use ammonia::{Builder, UrlRelative};
1329    ///
1330    ///     let a = Builder::new().url_relative(UrlRelative::PassThrough)
1331    ///         .clean("<a href=/>Home</a>")
1332    ///         .to_string();
1333    ///
1334    ///     // See `link_rel` for information on the rel="noopener noreferrer" attribute
1335    ///     // in the cleaned HTML.
1336    ///     assert_eq!(
1337    ///       a,
1338    ///       "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>");
1339    ///
1340    /// # Defaults
1341    ///
1342    /// ```notest
1343    /// UrlRelative::PassThrough
1344    /// ```
1345    pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self {
1346        self.url_relative = value;
1347        self
1348    }
1349
1350    /// Allows rewriting of all attributes using a callback.
1351    ///
1352    /// The callback takes name of the element, attribute and its value.
1353    /// Returns `None` to remove the attribute, or a value to use.
1354    ///
1355    /// Rewriting of attributes with URLs is done before `url_relative()`.
1356    ///
1357    /// # Panics
1358    ///
1359    /// If more than one callback is set.
1360    ///
1361    /// # Examples
1362    ///
1363    /// ```rust
1364    /// use ammonia::Builder;
1365    /// let a = Builder::new()
1366    ///     .attribute_filter(|element, attribute, value| {
1367    ///         match (element, attribute) {
1368    ///             ("img", "src") => None,
1369    ///             _ => Some(value.into())
1370    ///         }
1371    ///     })
1372    ///     .link_rel(None)
1373    ///     .clean("<a href=/><img alt=Home src=foo></a>")
1374    ///     .to_string();
1375    /// assert_eq!(a,
1376    ///     r#"<a href="/"><img alt="Home"></a>"#);
1377    /// ```
1378    pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
1379    where
1380        CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
1381    {
1382        assert!(
1383            self.attribute_filter.is_none(),
1384            "attribute_filter can be set only once"
1385        );
1386        self.attribute_filter = Some(Box::new(callback));
1387        self
1388    }
1389
1390    /// Returns `true` if the relative URL resolver is set to `Deny`.
1391    ///
1392    /// # Examples
1393    ///
1394    ///     use ammonia::{Builder, UrlRelative};
1395    ///     let mut a = Builder::default();
1396    ///     a.url_relative(UrlRelative::Deny);
1397    ///     assert!(a.is_url_relative_deny());
1398    ///     a.url_relative(UrlRelative::PassThrough);
1399    ///     assert!(!a.is_url_relative_deny());
1400    pub fn is_url_relative_deny(&self) -> bool {
1401        matches!(self.url_relative, UrlRelative::Deny)
1402    }
1403
1404    /// Returns `true` if the relative URL resolver is set to `PassThrough`.
1405    ///
1406    /// # Examples
1407    ///
1408    ///     use ammonia::{Builder, UrlRelative};
1409    ///     let mut a = Builder::default();
1410    ///     a.url_relative(UrlRelative::Deny);
1411    ///     assert!(!a.is_url_relative_pass_through());
1412    ///     a.url_relative(UrlRelative::PassThrough);
1413    ///     assert!(a.is_url_relative_pass_through());
1414    pub fn is_url_relative_pass_through(&self) -> bool {
1415        matches!(self.url_relative, UrlRelative::PassThrough)
1416    }
1417
1418    /// Returns `true` if the relative URL resolver is set to `Custom`.
1419    ///
1420    /// # Examples
1421    ///
1422    ///     use ammonia::{Builder, UrlRelative};
1423    ///     use std::borrow::Cow;
1424    ///     fn test(a: &str) -> Option<Cow<str>> { None }
1425    ///     # fn main() {
1426    ///     let mut a = Builder::default();
1427    ///     a.url_relative(UrlRelative::Custom(Box::new(test)));
1428    ///     assert!(a.is_url_relative_custom());
1429    ///     a.url_relative(UrlRelative::PassThrough);
1430    ///     assert!(!a.is_url_relative_custom());
1431    ///     a.url_relative(UrlRelative::Deny);
1432    ///     assert!(!a.is_url_relative_custom());
1433    ///     # }
1434    pub fn is_url_relative_custom(&self) -> bool {
1435        matches!(self.url_relative, UrlRelative::Custom(_))
1436    }
1437
1438    /// Configures a `rel` attribute that will be added on links.
1439    ///
1440    /// If `rel` is in the generic or tag attributes, this must be set to `None`.
1441    /// Common `rel` values to include:
1442    ///
1443    /// * `noopener`: This prevents [a particular type of XSS attack],
1444    ///   and should usually be turned on for untrusted HTML.
1445    /// * `noreferrer`: This prevents the browser from [sending the source URL]
1446    ///   to the website that is linked to.
1447    /// * `nofollow`: This prevents search engines from [using this link for
1448    ///   ranking], which disincentivizes spammers.
1449    ///
1450    /// To turn on rel-insertion, call this function with a space-separated list.
1451    /// Ammonia does not parse rel-attributes;
1452    /// it just puts the given string into the attribute directly.
1453    ///
1454    /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/
1455    /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer
1456    /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow
1457    ///
1458    /// # Examples
1459    ///
1460    ///     use ammonia::Builder;
1461    ///
1462    ///     let a = Builder::new().link_rel(None)
1463    ///         .clean("<a href=https://rust-lang.org/>Rust</a>")
1464    ///         .to_string();
1465    ///     assert_eq!(
1466    ///       a,
1467    ///       "<a href=\"https://rust-lang.org/\">Rust</a>");
1468    ///
1469    /// # Defaults
1470    ///
1471    /// ```notest
1472    /// Some("noopener noreferrer")
1473    /// ```
1474    pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
1475        self.link_rel = value;
1476        self
1477    }
1478
1479    /// Returns the settings for links' `rel` attribute, if one is set.
1480    ///
1481    /// # Examples
1482    ///
1483    ///     use ammonia::{Builder, UrlRelative};
1484    ///     let mut a = Builder::default();
1485    ///     a.link_rel(Some("a b"));
1486    ///     assert_eq!(a.get_link_rel(), Some("a b"));
1487    pub fn get_link_rel(&self) -> Option<&str> {
1488        self.link_rel
1489    }
1490
1491    /// Sets the CSS classes that are allowed on specific tags.
1492    ///
1493    /// The values is structured as a map from tag names to a set of class names.
1494    ///
1495    /// If the `class` attribute is itself whitelisted for a tag, then adding entries to
1496    /// this map will cause a panic.
1497    ///
1498    /// # Examples
1499    ///
1500    ///     use ammonia::Builder;
1501    ///     use maplit::{hashmap, hashset};
1502    ///
1503    ///     # fn main() {
1504    ///     let allowed_classes = hashmap![
1505    ///         "code" => hashset!["rs", "ex", "c", "cxx", "js"]
1506    ///     ];
1507    ///     let a = Builder::new()
1508    ///         .allowed_classes(allowed_classes)
1509    ///         .clean("<code class=rs>fn main() {}</code>")
1510    ///         .to_string();
1511    ///     assert_eq!(
1512    ///       a,
1513    ///       "<code class=\"rs\">fn main() {}</code>");
1514    ///     # }
1515    ///
1516    /// # Defaults
1517    ///
1518    /// The set of allowed classes is empty by default.
1519    pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
1520        self.allowed_classes = value;
1521        self
1522    }
1523
1524    /// Add additonal whitelisted classes without overwriting old ones.
1525    ///
1526    /// # Examples
1527    ///
1528    ///     let a = ammonia::Builder::default()
1529    ///         .add_allowed_classes("a", &["onebox"])
1530    ///         .clean("<a href=/ class=onebox>mess</span>").to_string();
1531    ///     assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a);
1532    pub fn add_allowed_classes<
1533        T: 'a + ?Sized + Borrow<str>,
1534        U: 'a + ?Sized + Borrow<str>,
1535        I: IntoIter<Item = &'a T>,
1536    >(
1537        &mut self,
1538        tag: &'a U,
1539        it: I,
1540    ) -> &mut Self {
1541        self.allowed_classes
1542            .entry(tag.borrow())
1543            .or_default()
1544            .extend(it.into_iter().map(Borrow::borrow));
1545        self
1546    }
1547
1548    /// Remove already-whitelisted attributes.
1549    ///
1550    /// Does nothing if the attribute is already gone.
1551    ///
1552    /// # Examples
1553    ///
1554    ///     let a = ammonia::Builder::default()
1555    ///         .add_allowed_classes("span", &["active"])
1556    ///         .rm_allowed_classes("span", &["active"])
1557    ///         .clean("<span class=active>").to_string();
1558    ///     assert_eq!("<span class=\"\"></span>", a);
1559    pub fn rm_allowed_classes<
1560        'b,
1561        'c,
1562        T: 'b + ?Sized + Borrow<str>,
1563        U: 'c + ?Sized + Borrow<str>,
1564        I: IntoIter<Item = &'b T>,
1565    >(
1566        &mut self,
1567        tag: &'c U,
1568        it: I,
1569    ) -> &mut Self {
1570        if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
1571            for i in it {
1572                tag.remove(i.borrow());
1573            }
1574        }
1575        self
1576    }
1577
1578    /// Returns a copy of the set of whitelisted class attributes.
1579    ///
1580    /// # Examples
1581    ///
1582    ///     use maplit::{hashmap, hashset};
1583    ///
1584    ///     let allowed_classes = hashmap![
1585    ///         "my-tag" => hashset!["my-class-1", "my-class-2"]
1586    ///     ];
1587    ///
1588    ///     let mut b = ammonia::Builder::default();
1589    ///     b.allowed_classes(Clone::clone(&allowed_classes));
1590    ///     assert_eq!(allowed_classes, b.clone_allowed_classes());
1591    pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
1592        self.allowed_classes.clone()
1593    }
1594
1595    /// Configures the handling of HTML comments.
1596    ///
1597    /// If this option is false, comments will be preserved.
1598    ///
1599    /// # Examples
1600    ///
1601    ///     use ammonia::Builder;
1602    ///
1603    ///     let a = Builder::new().strip_comments(false)
1604    ///         .clean("<!-- yes -->")
1605    ///         .to_string();
1606    ///     assert_eq!(
1607    ///       a,
1608    ///       "<!-- yes -->");
1609    ///
1610    /// # Defaults
1611    ///
1612    /// `true`
1613    pub fn strip_comments(&mut self, value: bool) -> &mut Self {
1614        self.strip_comments = value;
1615        self
1616    }
1617
1618    /// Returns `true` if comment stripping is turned on.
1619    ///
1620    /// # Examples
1621    ///
1622    ///     let mut a = ammonia::Builder::new();
1623    ///     a.strip_comments(true);
1624    ///     assert!(a.will_strip_comments());
1625    ///     a.strip_comments(false);
1626    ///     assert!(!a.will_strip_comments());
1627    pub fn will_strip_comments(&self) -> bool {
1628        self.strip_comments
1629    }
1630
1631    /// Prefixes all "id" attribute values with a given string.  Note that the tag and
1632    /// attribute themselves must still be whitelisted.
1633    ///
1634    /// # Examples
1635    ///
1636    ///     use ammonia::Builder;
1637    ///     use maplit::hashset;
1638    ///
1639    ///     # fn main() {
1640    ///     let attributes = hashset!["id"];
1641    ///     let a = Builder::new()
1642    ///         .generic_attributes(attributes)
1643    ///         .id_prefix(Some("safe-"))
1644    ///         .clean("<b id=42>")
1645    ///         .to_string();
1646    ///     assert_eq!(a, "<b id=\"safe-42\"></b>");
1647    ///     # }
1648
1649    ///
1650    /// # Defaults
1651    ///
1652    /// `None`
1653    pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
1654        self.id_prefix = value;
1655        self
1656    }
1657
1658    /// Only allows the specified properties in `style` attributes.
1659    ///
1660    /// Irrelevant if `style` is not an allowed attribute.
1661    ///
1662    /// Note that if style filtering is enabled style properties will be normalised e.g.
1663    /// invalid declarations and @rules will be removed, with only syntactically valid
1664    /// declarations kept.
1665    ///
1666    /// # Examples
1667    ///
1668    ///     use ammonia::Builder;
1669    ///     use maplit::hashset;
1670    ///
1671    ///     # fn main() {
1672    ///     let attributes = hashset!["style"];
1673    ///     let properties = hashset!["color"];
1674    ///     let a = Builder::new()
1675    ///         .generic_attributes(attributes)
1676    ///         .filter_style_properties(properties)
1677    ///         .clean("<p style=\"font-weight: heavy; color: red\">my html</p>")
1678    ///         .to_string();
1679    ///     assert_eq!(a, "<p style=\"color:red\">my html</p>");
1680    ///     # }
1681    pub fn filter_style_properties(&mut self, value: HashSet<&'a str>) -> &mut Self {
1682        self.style_properties = Some(value);
1683        self
1684    }
1685
1686    /// Constructs a [`Builder`] instance configured with the [default options].
1687    ///
1688    /// # Examples
1689    ///
1690    ///     use ammonia::{Builder, Url, UrlRelative};
1691    ///     # use std::error::Error;
1692    ///
1693    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1694    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1695    ///     let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1696    ///
1697    ///     let result = Builder::new() // <--
1698    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1699    ///         .clean(input)
1700    ///         .to_string();
1701    ///     assert_eq!(result, output);
1702    ///     # Ok(())
1703    ///     # }
1704    ///     # fn main() { do_main().unwrap() }
1705    ///
1706    /// [default options]: fn.clean.html
1707    /// [`Builder`]: struct.Builder.html
1708    pub fn new() -> Self {
1709        Self::default()
1710    }
1711
1712    /// Constructs a [`Builder`] instance configured with no allowed tags.
1713    ///
1714    /// # Examples
1715    ///
1716    ///     use ammonia::{Builder, Url, UrlRelative};
1717    ///     # use std::error::Error;
1718    ///
1719    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1720    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>.";
1721    ///     let output = "This is an Ammonia example using the empty() function.";
1722    ///
1723    ///     let result = Builder::empty() // <--
1724    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1725    ///         .clean(input)
1726    ///         .to_string();
1727    ///     assert_eq!(result, output);
1728    ///     # Ok(())
1729    ///     # }
1730    ///     # fn main() { do_main().unwrap() }
1731    ///
1732    /// [default options]: fn.clean.html
1733    /// [`Builder`]: struct.Builder.html
1734    pub fn empty() -> Self {
1735        Self {
1736            tags: hashset![],
1737            ..Self::default()
1738        }
1739    }
1740
1741    /// Sanitizes an HTML fragment in a string according to the configured options.
1742    ///
1743    /// # Examples
1744    ///
1745    ///     use ammonia::{Builder, Url, UrlRelative};
1746    ///     # use std::error::Error;
1747    ///
1748    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1749    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1750    ///     let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1751    ///
1752    ///     let result = Builder::new()
1753    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1754    ///         .clean(input)
1755    ///         .to_string(); // <--
1756    ///     assert_eq!(result, output);
1757    ///     # Ok(())
1758    ///     # }
1759    ///     # fn main() { do_main().unwrap() }
1760    pub fn clean(&self, src: &str) -> Document {
1761        let parser = Self::make_parser();
1762        let dom = parser.one(src);
1763        self.clean_dom(dom)
1764    }
1765
1766    /// Sanitizes an HTML fragment from a reader according to the configured options.
1767    ///
1768    /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just
1769    /// like when using [`String::from_utf8_lossy`].
1770    ///
1771    /// To avoid consuming the reader, a mutable reference can be passed to this method.
1772    ///
1773    /// # Examples
1774    ///
1775    ///     use ammonia::Builder;
1776    ///     # use std::error::Error;
1777    ///
1778    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1779    ///     let a = Builder::new()
1780    ///         .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b`
1781    ///         .to_string();
1782    ///     assert_eq!(a, "");
1783    ///     # Ok(()) }
1784    ///     # fn main() { do_main().unwrap() }
1785    ///
1786    /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy
1787    pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
1788    where
1789        R: io::Read,
1790    {
1791        let parser = Self::make_parser().from_utf8();
1792        let dom = parser.read_from(&mut src)?;
1793        Ok(self.clean_dom(dom))
1794    }
1795
1796    /// Clean a post-parsing DOM.
1797    ///
1798    /// This is not a public API because RcDom isn't really stable.
1799    /// We want to be able to take breaking changes to html5ever itself
1800    /// without having to break Ammonia's API.
1801    fn clean_dom(&self, dom: RcDom) -> Document {
1802        let mut stack = Vec::new();
1803        let mut removed = Vec::new();
1804        let link_rel = self
1805            .link_rel
1806            .map(|link_rel| format_tendril!("{}", link_rel));
1807        if link_rel.is_some() {
1808            assert!(self.generic_attributes.get("rel").is_none());
1809            assert!(self
1810                .tag_attributes
1811                .get("a")
1812                .and_then(|a| a.get("rel"))
1813                .is_none());
1814        }
1815        assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
1816        for tag_name in self.allowed_classes.keys() {
1817            assert!(self
1818                .tag_attributes
1819                .get(tag_name)
1820                .and_then(|a| a.get("class"))
1821                .is_none());
1822        }
1823        for tag_name in &self.clean_content_tags {
1824            assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
1825            assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
1826        }
1827        let body = {
1828            let children = dom.document.children.borrow();
1829            children[0].clone()
1830        };
1831        stack.extend(
1832            mem::take(&mut *body.children.borrow_mut())
1833                .into_iter()
1834                .rev(),
1835        );
1836        // This design approach is used to prevent pathological content from producing
1837        // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
1838        // of course, contains nodes that need to be dropped (we can't just drop them,
1839        // because they could have a very deep child tree).
1840        while let Some(mut node) = stack.pop() {
1841            let parent = node.parent
1842                .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
1843                .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
1844            if self.clean_node_content(&node) || !self.check_expected_namespace(&parent, &node) {
1845                removed.push(node);
1846                continue;
1847            }
1848            let pass = self.clean_child(&mut node);
1849            if pass {
1850                self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix);
1851                dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
1852            } else {
1853                for sub in node.children.borrow_mut().iter_mut() {
1854                    sub.parent.replace(Some(Rc::downgrade(&parent)));
1855                }
1856            }
1857            stack.extend(
1858                mem::take(&mut *node.children.borrow_mut())
1859                    .into_iter()
1860                    .rev(),
1861            );
1862            if !pass {
1863                removed.push(node);
1864            }
1865        }
1866        // Now, imperatively clean up all of the child nodes.
1867        // Otherwise, we could wind up with a DoS, either caused by a memory leak,
1868        // or caused by a stack overflow.
1869        while let Some(node) = removed.pop() {
1870            removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]);
1871        }
1872        Document(dom)
1873    }
1874
1875    /// Returns `true` if a node and all its content should be removed.
1876    fn clean_node_content(&self, node: &Handle) -> bool {
1877        match node.data {
1878            NodeData::Text { .. }
1879            | NodeData::Comment { .. }
1880            | NodeData::Doctype { .. }
1881            | NodeData::Document
1882            | NodeData::ProcessingInstruction { .. } => false,
1883            NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
1884        }
1885    }
1886
1887    /// Remove unwanted attributes, and check if the node should be kept or not.
1888    ///
1889    /// The root node doesn't need cleaning because we create the root node ourselves,
1890    /// and it doesn't get serialized, and ... it just exists to give the parser
1891    /// a context (in this case, a div-like block context).
1892    fn clean_child(&self, child: &mut Handle) -> bool {
1893        match child.data {
1894            NodeData::Text { .. } => true,
1895            NodeData::Comment { .. } => !self.strip_comments,
1896            NodeData::Doctype { .. }
1897            | NodeData::Document
1898            | NodeData::ProcessingInstruction { .. } => false,
1899            NodeData::Element {
1900                ref name,
1901                ref attrs,
1902                ..
1903            } => {
1904                if self.tags.contains(&*name.local) {
1905                    let attr_filter = |attr: &html5ever::Attribute| {
1906                        let whitelisted = self.generic_attributes.contains(&*attr.name.local)
1907                            || self.generic_attribute_prefixes.as_ref().map(|prefixes| {
1908                                prefixes.iter().any(|&p| attr.name.local.starts_with(p))
1909                            }) == Some(true)
1910                            || self
1911                                .tag_attributes
1912                                .get(&*name.local)
1913                                .map(|ta| ta.contains(&*attr.name.local))
1914                                == Some(true)
1915                            || self
1916                                .tag_attribute_values
1917                                .get(&*name.local)
1918                                .and_then(|tav| tav.get(&*attr.name.local))
1919                                .map(|vs| {
1920                                    let attr_val = attr.value.to_lowercase();
1921                                    vs.iter().any(|v| v.to_lowercase() == attr_val)
1922                                })
1923                                == Some(true);
1924                        if !whitelisted {
1925                            // If the class attribute is not whitelisted,
1926                            // but there is a whitelisted set of allowed_classes,
1927                            // do not strip out the class attribute.
1928                            // Banned classes will be filtered later.
1929                            &*attr.name.local == "class"
1930                                && self.allowed_classes.contains_key(&*name.local)
1931                        } else if is_url_attr(&name.local, &attr.name.local) {
1932                            let url = Url::parse(&attr.value);
1933                            if let Ok(url) = url {
1934                                self.url_schemes.contains(url.scheme())
1935                            } else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
1936                                !matches!(self.url_relative, UrlRelative::Deny)
1937                            } else {
1938                                false
1939                            }
1940                        } else {
1941                            true
1942                        }
1943                    };
1944                    attrs.borrow_mut().retain(attr_filter);
1945                    true
1946                } else {
1947                    false
1948                }
1949            }
1950        }
1951    }
1952
1953    // Check for unexpected namespace changes.
1954    //
1955    // The issue happens if developers added to the list of allowed tags any
1956    // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state,
1957    // that is:
1958    //
1959    // * title
1960    // * textarea
1961    // * xmp
1962    // * iframe
1963    // * noembed
1964    // * noframes
1965    // * plaintext
1966    // * noscript
1967    // * style
1968    // * script
1969    //
1970    // An example in the wild is Plume, that allows iframe [1].  So in next
1971    // examples I'll assume the following policy:
1972    //
1973    //     Builder::new()
1974    //        .add_tags(&["iframe"])
1975    //
1976    // In HTML namespace `<iframe>` is parsed specially; that is, its content is
1977    // treated as text. For instance, the following html:
1978    //
1979    //     <iframe><a>test
1980    //
1981    // Is parsed into the following DOM tree:
1982    //
1983    //     iframe
1984    //     └─ #text: <a>test
1985    //
1986    // So iframe cannot have any children other than a text node.
1987    //
1988    // The same is not true, though, in "foreign content"; that is, within
1989    // <svg> or <math> tags. The following html:
1990    //
1991    //     <svg><iframe><a>test
1992    //
1993    // is parsed differently:
1994    //
1995    //    svg
1996    //    └─ iframe
1997    //       └─ a
1998    //          └─ #text: test
1999    //
2000    // So in SVG namespace iframe can have children.
2001    //
2002    // Ammonia disallows <svg> but it keeps its content after deleting it. And
2003    // the parser internally keeps track of the namespace of the element. So
2004    // assume we have the following snippet:
2005    //
2006    //     <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test
2007    //
2008    // It is parsed into:
2009    //
2010    //     svg
2011    //     └─ iframe
2012    //        └─ a title="</iframe><img src onerror=alert(1)>"
2013    //           └─ #text: test
2014    //
2015    // This DOM tree is harmless from ammonia point of view because the piece
2016    // of code that looks like XSS is in a title attribute. Hence, the
2017    // resulting "safe" HTML from ammonia would be:
2018    //
2019    //     <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener
2020    // noreferrer">test</a></iframe>
2021    //
2022    // However, at this point, the information about namespace is lost, which
2023    // means that the browser will parse this snippet into:
2024    //
2025    //     ├─ iframe
2026    //     │  └─ #text: <a title="
2027    //     ├─ img src="" onerror="alert(1)"
2028    //     └─ #text: " rel="noopener noreferrer">test
2029    //
2030    // Leading to XSS.
2031    //
2032    // To solve this issue, check for unexpected namespace switches after cleanup.
2033    // Elements which change namespace at an unexpected point are removed.
2034    // This function returns `true` if `child` should be kept, and `false` if it
2035    // should be removed.
2036    //
2037    // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21
2038    fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
2039        let (parent, child) = match (&parent.data, &child.data) {
2040            (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn),
2041            _ => return true,
2042        };
2043        // The only way to switch from html to svg is with the <svg> tag
2044        if parent.ns == ns!(html) && child.ns == ns!(svg) {
2045            child.local == local_name!("svg")
2046        // The only way to switch from html to mathml is with the <math> tag
2047        } else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
2048            child.local == local_name!("math")
2049        // The only way to switch from mathml to svg/html is with a text integration point
2050        } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
2051            // https://html.spec.whatwg.org/#mathml
2052            matches!(
2053                &*parent.local,
2054                "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml"
2055            ) && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2056        // The only way to switch from svg to mathml/html is with an html integration point
2057        } else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
2058            // https://html.spec.whatwg.org/#svg-0
2059            matches!(&*parent.local, "foreignObject")
2060                && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2061        } else if child.ns == ns!(svg) {
2062            is_svg_tag(&child.local)
2063        } else if child.ns == ns!(mathml) {
2064            is_mathml_tag(&child.local)
2065        } else if child.ns == ns!(html) {
2066            is_html_tag(&child.local)
2067        } else {
2068            // There are no other supported ways to switch namespace
2069            parent.ns == child.ns
2070        }
2071    }
2072
2073    /// Add and transform special-cased attributes and elements.
2074    ///
2075    /// This function handles:
2076    ///
2077    /// * relative URL rewriting
2078    /// * adding `<a rel>` attributes
2079    /// * filtering out banned style properties
2080    /// * filtering out banned classes
2081    fn adjust_node_attributes(
2082        &self,
2083        child: &mut Handle,
2084        link_rel: &Option<StrTendril>,
2085        id_prefix: Option<&'a str>,
2086    ) {
2087        if let NodeData::Element {
2088            ref name,
2089            ref attrs,
2090            ..
2091        } = child.data
2092        {
2093            if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
2094                let mut attrs = attrs.borrow_mut();
2095                for (&set_name, &set_value) in set_attrs {
2096                    // set the value of the attribute if the attribute is already present
2097                    if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
2098                    {
2099                        if &*attr.value != set_value {
2100                            attr.value = set_value.into();
2101                        }
2102                    } else {
2103                        // otherwise, add the attribute
2104                        let attr = Attribute {
2105                            name: QualName::new(None, ns!(), set_name.into()),
2106                            value: set_value.into(),
2107                        };
2108                        attrs.push(attr);
2109                    }
2110                }
2111            }
2112            if let Some(ref link_rel) = *link_rel {
2113                if &*name.local == "a" {
2114                    attrs.borrow_mut().push(Attribute {
2115                        name: QualName::new(None, ns!(), local_name!("rel")),
2116                        value: link_rel.clone(),
2117                    })
2118                }
2119            }
2120            if let Some(ref id_prefix) = id_prefix {
2121                for attr in &mut *attrs.borrow_mut() {
2122                    if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) {
2123                        attr.value = format_tendril!("{}{}", id_prefix, attr.value);
2124                    }
2125                }
2126            }
2127            if let Some(ref attr_filter) = self.attribute_filter {
2128                let mut drop_attrs = Vec::new();
2129                let mut attrs = attrs.borrow_mut();
2130                for (i, attr) in &mut attrs.iter_mut().enumerate() {
2131                    let replace_with = if let Some(new) =
2132                        attr_filter.filter(&name.local, &attr.name.local, &attr.value)
2133                    {
2134                        if *new != *attr.value {
2135                            Some(format_tendril!("{}", new))
2136                        } else {
2137                            None // no need to replace the attr if filter returned the same value
2138                        }
2139                    } else {
2140                        drop_attrs.push(i);
2141                        None
2142                    };
2143                    if let Some(replace_with) = replace_with {
2144                        attr.value = replace_with;
2145                    }
2146                }
2147                for i in drop_attrs.into_iter().rev() {
2148                    attrs.swap_remove(i);
2149                }
2150            }
2151            {
2152                let mut drop_attrs = Vec::new();
2153                let mut attrs = attrs.borrow_mut();
2154                for (i, attr) in attrs.iter_mut().enumerate() {
2155                    if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) {
2156                        let new_value = self.url_relative.evaluate(&attr.value);
2157                        if let Some(new_value) = new_value {
2158                            attr.value = new_value;
2159                        } else {
2160                            drop_attrs.push(i);
2161                        }
2162                    }
2163                }
2164                // Swap remove scrambles the vector after the current point.
2165                // We will not do anything except with items before the current point.
2166                // The `rev()` is, as such, necessary for correctness.
2167                // We could use regular `remove(usize)` and a forward iterator,
2168                // but that's slower.
2169                for i in drop_attrs.into_iter().rev() {
2170                    attrs.swap_remove(i);
2171                }
2172            }
2173            if let Some(allowed_values) = &self.style_properties {
2174                for attr in &mut *attrs.borrow_mut() {
2175                    if &attr.name.local == "style" {
2176                        attr.value = style::filter_style_attribute(&attr.value, allowed_values).into();
2177                    }
2178                }
2179            }
2180            if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
2181                for attr in &mut *attrs.borrow_mut() {
2182                    if &attr.name.local == "class" {
2183                        let mut classes = vec![];
2184                        // https://html.spec.whatwg.org/#global-attributes:classes-2
2185                        for class in attr.value.split_ascii_whitespace() {
2186                            if allowed_values.contains(class) {
2187                                classes.push(class.to_owned());
2188                            }
2189                        }
2190                        attr.value = format_tendril!("{}", classes.join(" "));
2191                    }
2192                }
2193            }
2194        }
2195    }
2196
2197    /// Initializes an HTML fragment parser.
2198    ///
2199    /// Ammonia conforms to the HTML5 fragment parsing rules,
2200    /// by parsing the given fragment as if it were included in a <div> tag.
2201    fn make_parser() -> html::Parser<RcDom> {
2202        html::parse_fragment(
2203            RcDom::default(),
2204            html::ParseOpts::default(),
2205            QualName::new(None, ns!(html), local_name!("div")),
2206            vec![],
2207            false,
2208        )
2209    }
2210}
2211
2212/// Given an element name and attribute name, determine if the given attribute contains a URL.
2213fn is_url_attr(element: &str, attr: &str) -> bool {
2214    attr == "href"
2215        || attr == "src"
2216        || (element == "form" && attr == "action")
2217        || (element == "object" && attr == "data")
2218        || ((element == "button" || element == "input") && attr == "formaction")
2219        || (element == "a" && attr == "ping")
2220        || (element == "video" && attr == "poster")
2221}
2222
2223fn is_html_tag(element: &str) -> bool {
2224    (!is_svg_tag(element) && !is_mathml_tag(element))
2225        || matches!(
2226            element,
2227            "title" | "style" | "font" | "a" | "script" | "span"
2228        )
2229}
2230
2231/// Given an element name, check if it's SVG
2232fn is_svg_tag(element: &str) -> bool {
2233    // https://svgwg.org/svg2-draft/eltindex.html
2234    matches!(
2235        element,
2236        "a" | "animate"
2237            | "animateMotion"
2238            | "animateTransform"
2239            | "circle"
2240            | "clipPath"
2241            | "defs"
2242            | "desc"
2243            | "discard"
2244            | "ellipse"
2245            | "feBlend"
2246            | "feColorMatrix"
2247            | "feComponentTransfer"
2248            | "feComposite"
2249            | "feConvolveMatrix"
2250            | "feDiffuseLighting"
2251            | "feDisplacementMap"
2252            | "feDistantLight"
2253            | "feDropShadow"
2254            | "feFlood"
2255            | "feFuncA"
2256            | "feFuncB"
2257            | "feFuncG"
2258            | "feFuncR"
2259            | "feGaussianBlur"
2260            | "feImage"
2261            | "feMerge"
2262            | "feMergeNode"
2263            | "feMorphology"
2264            | "feOffset"
2265            | "fePointLight"
2266            | "feSpecularLighting"
2267            | "feSpotLight"
2268            | "feTile"
2269            | "feTurbulence"
2270            | "filter"
2271            | "foreignObject"
2272            | "g"
2273            | "image"
2274            | "line"
2275            | "linearGradient"
2276            | "marker"
2277            | "mask"
2278            | "metadata"
2279            | "mpath"
2280            | "path"
2281            | "pattern"
2282            | "polygon"
2283            | "polyline"
2284            | "radialGradient"
2285            | "rect"
2286            | "script"
2287            | "set"
2288            | "stop"
2289            | "style"
2290            | "svg"
2291            | "switch"
2292            | "symbol"
2293            | "text"
2294            | "textPath"
2295            | "title"
2296            | "tspan"
2297            | "use"
2298            | "view"
2299    )
2300}
2301
2302/// Given an element name, check if it's Math
2303fn is_mathml_tag(element: &str) -> bool {
2304    // https://svgwg.org/svg2-draft/eltindex.html
2305    matches!(
2306        element,
2307        "abs"
2308            | "and"
2309            | "annotation"
2310            | "annotation-xml"
2311            | "apply"
2312            | "approx"
2313            | "arccos"
2314            | "arccosh"
2315            | "arccot"
2316            | "arccoth"
2317            | "arccsc"
2318            | "arccsch"
2319            | "arcsec"
2320            | "arcsech"
2321            | "arcsin"
2322            | "arcsinh"
2323            | "arctan"
2324            | "arctanh"
2325            | "arg"
2326            | "bind"
2327            | "bvar"
2328            | "card"
2329            | "cartesianproduct"
2330            | "cbytes"
2331            | "ceiling"
2332            | "cerror"
2333            | "ci"
2334            | "cn"
2335            | "codomain"
2336            | "complexes"
2337            | "compose"
2338            | "condition"
2339            | "conjugate"
2340            | "cos"
2341            | "cosh"
2342            | "cot"
2343            | "coth"
2344            | "cs"
2345            | "csc"
2346            | "csch"
2347            | "csymbol"
2348            | "curl"
2349            | "declare"
2350            | "degree"
2351            | "determinant"
2352            | "diff"
2353            | "divergence"
2354            | "divide"
2355            | "domain"
2356            | "domainofapplication"
2357            | "emptyset"
2358            | "eq"
2359            | "equivalent"
2360            | "eulergamma"
2361            | "exists"
2362            | "exp"
2363            | "exponentiale"
2364            | "factorial"
2365            | "factorof"
2366            | "false"
2367            | "floor"
2368            | "fn"
2369            | "forall"
2370            | "gcd"
2371            | "geq"
2372            | "grad"
2373            | "gt"
2374            | "ident"
2375            | "image"
2376            | "imaginary"
2377            | "imaginaryi"
2378            | "implies"
2379            | "in"
2380            | "infinity"
2381            | "int"
2382            | "integers"
2383            | "intersect"
2384            | "interval"
2385            | "inverse"
2386            | "lambda"
2387            | "laplacian"
2388            | "lcm"
2389            | "leq"
2390            | "limit"
2391            | "list"
2392            | "ln"
2393            | "log"
2394            | "logbase"
2395            | "lowlimit"
2396            | "lt"
2397            | "maction"
2398            | "maligngroup"
2399            | "malignmark"
2400            | "math"
2401            | "matrix"
2402            | "matrixrow"
2403            | "max"
2404            | "mean"
2405            | "median"
2406            | "menclose"
2407            | "merror"
2408            | "mfenced"
2409            | "mfrac"
2410            | "mglyph"
2411            | "mi"
2412            | "min"
2413            | "minus"
2414            | "mlabeledtr"
2415            | "mlongdiv"
2416            | "mmultiscripts"
2417            | "mn"
2418            | "mo"
2419            | "mode"
2420            | "moment"
2421            | "momentabout"
2422            | "mover"
2423            | "mpadded"
2424            | "mphantom"
2425            | "mprescripts"
2426            | "mroot"
2427            | "mrow"
2428            | "ms"
2429            | "mscarries"
2430            | "mscarry"
2431            | "msgroup"
2432            | "msline"
2433            | "mspace"
2434            | "msqrt"
2435            | "msrow"
2436            | "mstack"
2437            | "mstyle"
2438            | "msub"
2439            | "msubsup"
2440            | "msup"
2441            | "mtable"
2442            | "mtd"
2443            | "mtext"
2444            | "mtr"
2445            | "munder"
2446            | "munderover"
2447            | "naturalnumbers"
2448            | "neq"
2449            | "none"
2450            | "not"
2451            | "notanumber"
2452            | "notin"
2453            | "notprsubset"
2454            | "notsubset"
2455            | "or"
2456            | "otherwise"
2457            | "outerproduct"
2458            | "partialdiff"
2459            | "pi"
2460            | "piece"
2461            | "piecewise"
2462            | "plus"
2463            | "power"
2464            | "primes"
2465            | "product"
2466            | "prsubset"
2467            | "quotient"
2468            | "rationals"
2469            | "real"
2470            | "reals"
2471            | "reln"
2472            | "rem"
2473            | "root"
2474            | "scalarproduct"
2475            | "sdev"
2476            | "sec"
2477            | "sech"
2478            | "selector"
2479            | "semantics"
2480            | "sep"
2481            | "set"
2482            | "setdiff"
2483            | "share"
2484            | "sin"
2485            | "sinh"
2486            | "span"
2487            | "subset"
2488            | "sum"
2489            | "tan"
2490            | "tanh"
2491            | "tendsto"
2492            | "times"
2493            | "transpose"
2494            | "true"
2495            | "union"
2496            | "uplimit"
2497            | "variance"
2498            | "vector"
2499            | "vectorproduct"
2500            | "xor"
2501    )
2502}
2503
2504fn is_url_relative(url: &str) -> bool {
2505    matches!(
2506        Url::parse(url),
2507        Err(url::ParseError::RelativeUrlWithoutBase)
2508    )
2509}
2510
2511/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
2512///
2513/// This policy kicks in, if set, for any attribute named `src` or `href`,
2514/// as well as the `data` attribute of an `object` tag.
2515///
2516/// [relative URLs]: struct.Builder.html#method.url_relative
2517///
2518/// # Examples
2519///
2520/// ## `Deny`
2521///
2522/// * `<a href="test">` is a file-relative URL, and will be removed
2523/// * `<a href="/test">` is a domain-relative URL, and will be removed
2524/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed
2525/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept
2526///
2527/// ## `PassThrough`
2528///
2529/// No changes will be made to any URLs, except if a disallowed scheme is used.
2530///
2531/// ## `RewriteWithBase`
2532///
2533/// If the base is set to `http://notriddle.com/some-directory/some-file`
2534///
2535/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">`
2536/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">`
2537/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">`
2538/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is
2539///
2540/// ## `Custom`
2541///
2542/// Pass the relative URL to a function.
2543/// If it returns `Some(string)`, then that one gets used.
2544/// Otherwise, it will remove the attribute (like `Deny` does).
2545///
2546///     use std::borrow::Cow;
2547///     fn is_absolute_path(url: &str) -> bool {
2548///         let u = url.as_bytes();
2549///         // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
2550///         // `/a/b/c` is an absolute path, and what we want to do stuff to.
2551///         u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
2552///     }
2553///     fn evaluate(url: &str) -> Option<Cow<str>> {
2554///         if is_absolute_path(url) {
2555///             Some(Cow::Owned(String::from("/root") + url))
2556///         } else {
2557///             Some(Cow::Borrowed(url))
2558///         }
2559///     }
2560///     fn main() {
2561///         let a = ammonia::Builder::new()
2562///             .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
2563///             .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
2564///             .to_string();
2565///         assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
2566///     }
2567///
2568/// This function is only applied to relative URLs.
2569/// To filter all of the URLs,
2570/// use the not-yet-implemented Content Security Policy.
2571#[non_exhaustive]
2572pub enum UrlRelative<'a> {
2573    /// Relative URLs will be completely stripped from the document.
2574    Deny,
2575    /// Relative URLs will be passed through unchanged.
2576    PassThrough,
2577    /// Relative URLs will be changed into absolute URLs, based on this base URL.
2578    RewriteWithBase(Url),
2579    /// Force absolute and relative paths into a particular directory.
2580    ///
2581    /// Since the resolver does not affect fully-qualified URLs, it doesn't
2582    /// prevent users from linking wherever they want. This feature only
2583    /// serves to make content more portable.
2584    ///
2585    /// # Examples
2586    ///
2587    /// <table>
2588    /// <thead>
2589    /// <tr>
2590    ///     <th>root</th>
2591    ///     <th>path</th>
2592    ///     <th>url</th>
2593    ///     <th>result</th>
2594    /// </tr>
2595    /// </thead>
2596    /// <tbody>
2597    /// <tr>
2598    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2599    ///     <td>README.md</td>
2600    ///     <td></td>
2601    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2602    /// </tr><tr>
2603    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2604    ///     <td>README.md</td>
2605    ///     <td>/</td>
2606    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2607    /// </tr><tr>
2608    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2609    ///     <td>README.md</td>
2610    ///     <td>/CONTRIBUTING.md</td>
2611    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2612    /// </tr><tr>
2613    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2614    ///     <td>README.md</td>
2615    ///     <td></td>
2616    ///     <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td>
2617    /// </tr><tr>
2618    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2619    ///     <td>README.md</td>
2620    ///     <td>/</td>
2621    ///     <td>https://github.com/rust-ammonia/ammonia/blob/</td>
2622    /// </tr><tr>
2623    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2624    ///     <td>README.md</td>
2625    ///     <td>/CONTRIBUTING.md</td>
2626    ///     <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td>
2627    /// </tr><tr>
2628    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2629    ///     <td></td>
2630    ///     <td></td>
2631    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2632    /// </tr><tr>
2633    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2634    ///     <td></td>
2635    ///     <td>/</td>
2636    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2637    /// </tr><tr>
2638    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2639    ///     <td></td>
2640    ///     <td>/CONTRIBUTING.md</td>
2641    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2642    /// </tr><tr>
2643    ///     <td>https://github.com/</td>
2644    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2645    ///     <td></td>
2646    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2647    /// </tr><tr>
2648    ///     <td>https://github.com/</td>
2649    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2650    ///     <td>/</td>
2651    ///     <td>https://github.com/</td>
2652    /// </tr><tr>
2653    ///     <td>https://github.com/</td>
2654    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2655    ///     <td>CONTRIBUTING.md</td>
2656    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2657    /// </tr><tr>
2658    ///     <td>https://github.com/</td>
2659    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2660    ///     <td>/CONTRIBUTING.md</td>
2661    ///     <td>https://github.com/CONTRIBUTING.md</td>
2662    /// </tr>
2663    /// </tbody>
2664    /// </table>
2665    RewriteWithRoot {
2666        /// The URL that is treated as the root by the resolver.
2667        root: Url,
2668        /// The "current path" used to resolve relative paths.
2669        path: String,
2670    },
2671    /// Rewrite URLs with a custom function.
2672    Custom(Box<dyn UrlRelativeEvaluate<'a>>),
2673}
2674
2675impl<'a> UrlRelative<'a> {
2676    fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> {
2677        match self {
2678            UrlRelative::RewriteWithBase(ref url_base) => url_base
2679                .join(url)
2680                .ok()
2681                .and_then(|x| StrTendril::from_str(x.as_str()).ok()),
2682            UrlRelative::RewriteWithRoot { ref root, ref path } => {
2683                (match url.as_bytes() {
2684                    // Scheme-relative URL
2685                    [b'/', b'/', ..] => root.join(url),
2686                    // Path-absolute URL
2687                    b"/" => root.join("."),
2688                    [b'/', ..] => root.join(&url[1..]),
2689                    // Path-relative URL
2690                    _ => root.join(path).and_then(|r| r.join(url)),
2691                })
2692                .ok()
2693                .and_then(|x| StrTendril::from_str(x.as_str()).ok())
2694            }
2695            UrlRelative::Custom(ref evaluate) => evaluate
2696                .evaluate(url)
2697                .as_ref()
2698                .map(Cow::as_ref)
2699                .map(StrTendril::from_str)
2700                .and_then(Result::ok),
2701            UrlRelative::PassThrough => StrTendril::from_str(url).ok(),
2702            UrlRelative::Deny => None,
2703        }
2704    }
2705}
2706
2707impl<'a> fmt::Debug for UrlRelative<'a> {
2708    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2709        match *self {
2710            UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
2711            UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
2712            UrlRelative::RewriteWithBase(ref base) => {
2713                write!(f, "UrlRelative::RewriteWithBase({})", base)
2714            }
2715            UrlRelative::RewriteWithRoot { ref root, ref path } => {
2716                write!(
2717                    f,
2718                    "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}"
2719                )
2720            }
2721            UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
2722        }
2723    }
2724}
2725
2726/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
2727///
2728/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
2729///
2730/// See [`url_relative`][url_relative] for more details.
2731///
2732/// [url_relative]: struct.Builder.html#method.url_relative
2733pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a {
2734    /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2735    fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>;
2736}
2737impl<'a, T> UrlRelativeEvaluate<'a> for T
2738where
2739    T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a,
2740{
2741    fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> {
2742        self(url)
2743    }
2744}
2745
2746impl fmt::Debug for dyn AttributeFilter {
2747    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2748        f.write_str("AttributeFilter")
2749    }
2750}
2751
2752/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
2753///
2754/// See [`attribute_filter`][attribute_filter] for more details.
2755///
2756/// [attribute_filter]: struct.Builder.html#method.attribute_filter
2757pub trait AttributeFilter: Send + Sync {
2758    /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2759    fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
2760}
2761
2762impl<T> AttributeFilter for T
2763where
2764    T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
2765{
2766    fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
2767        self(element, attribute, value)
2768    }
2769}
2770
2771/// A sanitized HTML document.
2772///
2773/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
2774/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
2775/// users to avoid buffering the serialized representation to a [`String`] when desired.
2776///
2777/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
2778///
2779/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
2780/// the complete fragment needs to be stored in memory during processing.
2781///
2782/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
2783/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2784///
2785/// # Examples
2786///
2787///     use ammonia::Builder;
2788///
2789///     let input = "<!-- comments will be stripped -->This is an Ammonia example.";
2790///     let output = "This is an Ammonia example.";
2791///
2792///     let document = Builder::new()
2793///         .clean(input);
2794///     assert_eq!(document.to_string(), output);
2795pub struct Document(RcDom);
2796
2797impl Document {
2798    /// Serializes a `Document` instance to a writer.
2799    ///
2800    /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
2801    ///
2802    /// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
2803    ///
2804    /// Note that the in-memory representation of `Document` is larger than the serialized
2805    /// `String`.
2806    ///
2807    /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2808    ///
2809    /// # Examples
2810    ///
2811    ///     use ammonia::Builder;
2812    ///
2813    ///     let input = "Some <style></style>HTML here";
2814    ///     let expected = b"Some HTML here";
2815    ///
2816    ///     let document = Builder::new()
2817    ///         .clean(input);
2818    ///
2819    ///     let mut sanitized = Vec::new();
2820    ///     document.write_to(&mut sanitized)
2821    ///         .expect("Writing to a string should not fail (except on OOM)");
2822    ///     assert_eq!(sanitized, expected);
2823    pub fn write_to<W>(&self, writer: W) -> io::Result<()>
2824    where
2825        W: io::Write,
2826    {
2827        let opts = Self::serialize_opts();
2828        let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2829        serialize(writer, &inner, opts)
2830    }
2831
2832    /// Exposes the `Document` instance as an [`rcdom::Handle`].
2833    ///
2834    /// This method returns the inner object backing the `Document` instance. This allows
2835    /// making further changes to the DOM without introducing redundant serialization and
2836    /// parsing.
2837    ///
2838    /// Note that this method should be considered unstable and sits outside of the semver
2839    /// stability guarantees. It may change, break, or go away at any time, either because
2840    /// of `html5ever` changes or `ammonia` implementation changes.
2841    ///
2842    /// For this method to be accessible, a `cfg` flag is required. The easiest way is to
2843    /// use the `RUSTFLAGS` environment variable:
2844    ///
2845    /// ```text
2846    /// RUSTFLAGS='--cfg ammonia_unstable' cargo build
2847    /// ```
2848    ///
2849    /// on Unix-like platforms, or
2850    ///
2851    /// ```text
2852    /// set RUSTFLAGS=--cfg ammonia_unstable
2853    /// cargo build
2854    /// ```
2855    ///
2856    /// on Windows.
2857    ///
2858    /// This requirement also applies to crates that transitively depend on crates that use
2859    /// this flag.
2860    ///
2861    /// # Examples
2862    ///
2863    ///     use ammonia::Builder;
2864    ///     use maplit::hashset;
2865    ///     use html5ever::serialize::{serialize, SerializeOpts};
2866    ///
2867    ///     # use std::error::Error;
2868    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
2869    ///     let input = "<a>one link</a> and <a>one more</a>";
2870    ///     let expected = "<a>one more</a> and <a>one link</a>";
2871    ///
2872    ///     let document = Builder::new()
2873    ///         .link_rel(None)
2874    ///         .clean(input);
2875    ///
2876    ///     let mut node = document.to_dom_node();
2877    ///     node.children.borrow_mut().reverse();
2878    ///
2879    ///     let mut buf = Vec::new();
2880    ///     serialize(&mut buf, &node, SerializeOpts::default())?;
2881    ///     let output = String::from_utf8(buf)?;
2882    ///
2883    ///     assert_eq!(output, expected);
2884    ///     # Ok(())
2885    ///     # }
2886    ///     # fn main() { do_main().unwrap() }
2887    #[cfg(ammonia_unstable)]
2888    pub fn to_dom_node(&self) -> Handle {
2889        self.0.document.children.borrow()[0].clone()
2890    }
2891
2892    fn serialize_opts() -> SerializeOpts {
2893        SerializeOpts::default()
2894    }
2895}
2896
2897impl Clone for Document {
2898    fn clone(&self) -> Self {
2899        let parser = Builder::make_parser();
2900        let dom = parser.one(&self.to_string()[..]);
2901        Document(dom)
2902    }
2903}
2904
2905/// Convert a `Document` to stringified HTML.
2906///
2907/// Since [`Document`] implements [`Display`], it can be converted to a [`String`] using the
2908/// standard [`ToString::to_string`] method. This is the simplest way to use `ammonia`.
2909///
2910/// [`Document`]: ammonia::Document
2911/// [`Display`]: std::fmt::Display
2912/// [`ToString::to_string`]: std::string::ToString
2913///
2914/// # Examples
2915///
2916///     use ammonia::Builder;
2917///
2918///     let input = "Some <style></style>HTML here";
2919///     let output = "Some HTML here";
2920///
2921///     let document = Builder::new()
2922///         .clean(input);
2923///     assert_eq!(document.to_string(), output);
2924impl Display for Document {
2925    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2926        let opts = Self::serialize_opts();
2927        let mut ret_val = Vec::new();
2928        let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2929        serialize(&mut ret_val, &inner, opts)
2930            .expect("Writing to a string shouldn't fail (expect on OOM)");
2931        String::from_utf8(ret_val)
2932            .expect("html5ever only supports UTF8")
2933            .fmt(f)
2934    }
2935}
2936
2937impl fmt::Debug for Document {
2938    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2939        write!(f, "Document({})", self)
2940    }
2941}
2942
2943impl From<Document> for String {
2944    fn from(document: Document) -> Self {
2945        document.to_string()
2946    }
2947}
2948
2949#[cfg(test)]
2950mod test {
2951    use super::*;
2952    #[test]
2953    fn deeply_nested_whitelisted() {
2954        clean(&"<b>".repeat(60_000));
2955    }
2956    #[test]
2957    fn deeply_nested_blacklisted() {
2958        clean(&"<b-b>".repeat(60_000));
2959    }
2960    #[test]
2961    fn deeply_nested_alternating() {
2962        clean(&"<b-b>".repeat(35_000));
2963    }
2964    #[test]
2965    fn included_angles() {
2966        let fragment = "1 < 2";
2967        let result = clean(fragment);
2968        assert_eq!(result, "1 &lt; 2");
2969    }
2970    #[test]
2971    fn remove_script() {
2972        let fragment = "an <script>evil()</script> example";
2973        let result = clean(fragment);
2974        assert_eq!(result, "an  example");
2975    }
2976    #[test]
2977    fn ignore_link() {
2978        let fragment = "a <a href=\"http://www.google.com\">good</a> example";
2979        let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
2980                        good</a> example";
2981        let result = clean(fragment);
2982        assert_eq!(result, expected);
2983    }
2984    #[test]
2985    fn remove_unsafe_link() {
2986        let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
2987        let result = clean(fragment);
2988        assert_eq!(
2989            result,
2990            "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
2991        );
2992    }
2993    #[test]
2994    fn remove_js_link() {
2995        let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
2996        let result = clean(fragment);
2997        assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
2998    }
2999    #[test]
3000    fn tag_rebalance() {
3001        let fragment = "<b>AWESOME!";
3002        let result = clean(fragment);
3003        assert_eq!(result, "<b>AWESOME!</b>");
3004    }
3005    #[test]
3006    fn allow_url_relative() {
3007        let fragment = "<a href=test>Test</a>";
3008        let result = Builder::new()
3009            .url_relative(UrlRelative::PassThrough)
3010            .clean(fragment)
3011            .to_string();
3012        assert_eq!(
3013            result,
3014            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3015        );
3016    }
3017    #[test]
3018    fn rewrite_url_relative() {
3019        let fragment = "<a href=test>Test</a>";
3020        let result = Builder::new()
3021            .url_relative(UrlRelative::RewriteWithBase(
3022                Url::parse("http://example.com/").unwrap(),
3023            ))
3024            .clean(fragment)
3025            .to_string();
3026        assert_eq!(
3027            result,
3028            "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
3029        );
3030    }
3031    #[test]
3032    fn rewrite_url_relative_with_invalid_url() {
3033        // Reduced from https://github.com/Bauke/ammonia-crash-test
3034        let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
3035        let result = Builder::new()
3036            .url_relative(UrlRelative::RewriteWithBase(
3037                Url::parse("http://example.com/").unwrap(),
3038            ))
3039            .clean(fragment)
3040            .to_string();
3041        assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
3042    }
3043    #[test]
3044    fn attribute_filter_nop() {
3045        let fragment = "<a href=test>Test</a>";
3046        let result = Builder::new()
3047            .attribute_filter(|elem, attr, value| {
3048                assert_eq!("a", elem);
3049                assert!(
3050                    matches!(
3051                        (attr, value),
3052                        ("href", "test") | ("rel", "noopener noreferrer")
3053                    ),
3054                    "{}",
3055                    value.to_string()
3056                );
3057                Some(value.into())
3058            })
3059            .clean(fragment)
3060            .to_string();
3061        assert_eq!(
3062            result,
3063            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3064        );
3065    }
3066
3067    #[test]
3068    fn attribute_filter_drop() {
3069        let fragment = "Test<img alt=test src=imgtest>";
3070        let result = Builder::new()
3071            .attribute_filter(|elem, attr, value| {
3072                assert_eq!("img", elem);
3073                match (attr, value) {
3074                    ("src", "imgtest") => None,
3075                    ("alt", "test") => Some(value.into()),
3076                    _ => panic!("unexpected"),
3077                }
3078            })
3079            .clean(fragment)
3080            .to_string();
3081        assert_eq!(result, r#"Test<img alt="test">"#);
3082    }
3083
3084    #[test]
3085    fn url_filter_absolute() {
3086        let fragment = "Test<img alt=test src=imgtest>";
3087        let result = Builder::new()
3088            .attribute_filter(|elem, attr, value| {
3089                assert_eq!("img", elem);
3090                match (attr, value) {
3091                    ("src", "imgtest") => {
3092                        Some(format!("https://example.com/images/{}", value).into())
3093                    }
3094                    ("alt", "test") => None,
3095                    _ => panic!("unexpected"),
3096                }
3097            })
3098            .url_relative(UrlRelative::RewriteWithBase(
3099                Url::parse("http://wrong.invalid/").unwrap(),
3100            ))
3101            .clean(fragment)
3102            .to_string();
3103        assert_eq!(
3104            result,
3105            r#"Test<img src="https://example.com/images/imgtest">"#
3106        );
3107    }
3108
3109    #[test]
3110    fn url_filter_relative() {
3111        let fragment = "Test<img alt=test src=imgtest>";
3112        let result = Builder::new()
3113            .attribute_filter(|elem, attr, value| {
3114                assert_eq!("img", elem);
3115                match (attr, value) {
3116                    ("src", "imgtest") => Some("rewrite".into()),
3117                    ("alt", "test") => Some("altalt".into()),
3118                    _ => panic!("unexpected"),
3119                }
3120            })
3121            .url_relative(UrlRelative::RewriteWithBase(
3122                Url::parse("https://example.com/base/#").unwrap(),
3123            ))
3124            .clean(fragment)
3125            .to_string();
3126        assert_eq!(
3127            result,
3128            r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
3129        );
3130    }
3131
3132    #[test]
3133    fn rewrite_url_relative_no_rel() {
3134        let fragment = "<a href=test>Test</a>";
3135        let result = Builder::new()
3136            .url_relative(UrlRelative::RewriteWithBase(
3137                Url::parse("http://example.com/").unwrap(),
3138            ))
3139            .link_rel(None)
3140            .clean(fragment)
3141            .to_string();
3142        assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
3143    }
3144    #[test]
3145    fn deny_url_relative() {
3146        let fragment = "<a href=test>Test</a>";
3147        let result = Builder::new()
3148            .url_relative(UrlRelative::Deny)
3149            .clean(fragment)
3150            .to_string();
3151        assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
3152    }
3153    #[test]
3154    fn replace_rel() {
3155        let fragment = "<a href=test rel=\"garbage\">Test</a>";
3156        let result = Builder::new()
3157            .url_relative(UrlRelative::PassThrough)
3158            .clean(fragment)
3159            .to_string();
3160        assert_eq!(
3161            result,
3162            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3163        );
3164    }
3165    #[test]
3166    fn consider_rel_still_banned() {
3167        let fragment = "<a href=test rel=\"garbage\">Test</a>";
3168        let result = Builder::new()
3169            .url_relative(UrlRelative::PassThrough)
3170            .link_rel(None)
3171            .clean(fragment)
3172            .to_string();
3173        assert_eq!(result, "<a href=\"test\">Test</a>");
3174    }
3175    #[test]
3176    fn object_data() {
3177        let fragment = "<span data=\"javascript:evil()\">Test</span>\
3178                        <object data=\"javascript:evil()\"></object>M";
3179        let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
3180        let result = Builder::new()
3181            .tags(hashset!["span", "object"])
3182            .generic_attributes(hashset!["data"])
3183            .clean(fragment)
3184            .to_string();
3185        assert_eq!(result, expected);
3186    }
3187    #[test]
3188    fn remove_attributes() {
3189        let fragment = "<table border=\"1\"><tr></tr></table>";
3190        let result = Builder::new().clean(fragment);
3191        assert_eq!(
3192            result.to_string(),
3193            "<table><tbody><tr></tr></tbody></table>"
3194        );
3195    }
3196    #[test]
3197    fn quotes_in_attrs() {
3198        let fragment = "<b title='\"'>contents</b>";
3199        let result = clean(fragment);
3200        assert_eq!(result, "<b title=\"&quot;\">contents</b>");
3201    }
3202    #[test]
3203    #[should_panic]
3204    fn panic_if_rel_is_allowed_and_replaced_generic() {
3205        Builder::new()
3206            .link_rel(Some("noopener noreferrer"))
3207            .generic_attributes(hashset!["rel"])
3208            .clean("something");
3209    }
3210    #[test]
3211    #[should_panic]
3212    fn panic_if_rel_is_allowed_and_replaced_a() {
3213        Builder::new()
3214            .link_rel(Some("noopener noreferrer"))
3215            .tag_attributes(hashmap![
3216                "a" => hashset!["rel"],
3217            ])
3218            .clean("something");
3219    }
3220    #[test]
3221    fn no_panic_if_rel_is_allowed_and_replaced_span() {
3222        Builder::new()
3223            .link_rel(Some("noopener noreferrer"))
3224            .tag_attributes(hashmap![
3225                "span" => hashset!["rel"],
3226            ])
3227            .clean("<span rel=\"what\">s</span>");
3228    }
3229    #[test]
3230    fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
3231        Builder::new()
3232            .link_rel(None)
3233            .generic_attributes(hashset!["rel"])
3234            .clean("<a rel=\"what\">s</a>");
3235    }
3236    #[test]
3237    fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
3238        Builder::new()
3239            .link_rel(None)
3240            .tag_attributes(hashmap![
3241                "a" => hashset!["rel"],
3242            ])
3243            .clean("<a rel=\"what\">s</a>");
3244    }
3245    #[test]
3246    fn dont_close_void_elements() {
3247        let fragment = "<br>";
3248        let result = clean(fragment);
3249        assert_eq!(result.to_string(), "<br>");
3250    }
3251    #[should_panic]
3252    #[test]
3253    fn panic_on_allowed_classes_tag_attributes() {
3254        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3255        Builder::new()
3256            .link_rel(None)
3257            .tag_attributes(hashmap![
3258                "p" => hashset!["class"],
3259                "a" => hashset!["class"],
3260            ])
3261            .allowed_classes(hashmap![
3262                "p" => hashset!["foo", "bar"],
3263                "a" => hashset!["baz"],
3264            ])
3265            .clean(fragment);
3266    }
3267    #[should_panic]
3268    #[test]
3269    fn panic_on_allowed_classes_generic_attributes() {
3270        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3271        Builder::new()
3272            .link_rel(None)
3273            .generic_attributes(hashset!["class", "href", "some-foo"])
3274            .allowed_classes(hashmap![
3275                "p" => hashset!["foo", "bar"],
3276                "a" => hashset!["baz"],
3277            ])
3278            .clean(fragment);
3279    }
3280    #[test]
3281    fn remove_non_allowed_classes() {
3282        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3283        let result = Builder::new()
3284            .link_rel(None)
3285            .allowed_classes(hashmap![
3286                "p" => hashset!["foo", "bar"],
3287                "a" => hashset!["baz"],
3288            ])
3289            .clean(fragment);
3290        assert_eq!(
3291            result.to_string(),
3292            "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3293        );
3294    }
3295    #[test]
3296    fn remove_non_allowed_classes_with_tag_class() {
3297        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3298        let result = Builder::new()
3299            .link_rel(None)
3300            .tag_attributes(hashmap![
3301                "div" => hashset!["class"],
3302            ])
3303            .allowed_classes(hashmap![
3304                "p" => hashset!["foo", "bar"],
3305                "a" => hashset!["baz"],
3306            ])
3307            .clean(fragment);
3308        assert_eq!(
3309            result.to_string(),
3310            "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3311        );
3312    }
3313    #[test]
3314    fn allowed_classes_ascii_whitespace() {
3315        // According to https://infra.spec.whatwg.org/#ascii-whitespace,
3316        // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are
3317        // considered to be ASCII whitespace. Unicode whitespace characters
3318        // and VT (\x0B) aren't ASCII whitespace.
3319        let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
3320        let result = Builder::new()
3321            .allowed_classes(hashmap![
3322                "p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
3323            ])
3324            .clean(fragment);
3325        assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
3326    }
3327    #[test]
3328    fn remove_non_allowed_attributes_with_tag_attribute_values() {
3329        let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
3330        let result = Builder::new()
3331            .tag_attribute_values(hashmap![
3332                "p" => hashmap![
3333                    "data-label" => hashset!["bar"],
3334                ],
3335            ])
3336            .tag_attributes(hashmap![
3337                "p" => hashset!["name"],
3338            ])
3339            .clean(fragment);
3340        assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
3341    }
3342    #[test]
3343    fn keep_allowed_attributes_with_tag_attribute_values() {
3344        let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
3345        let result = Builder::new()
3346            .tag_attribute_values(hashmap![
3347                "p" => hashmap![
3348                    "data-label" => hashset!["bar"],
3349                ],
3350            ])
3351            .tag_attributes(hashmap![
3352                "p" => hashset!["name"],
3353            ])
3354            .clean(fragment);
3355        assert_eq!(
3356            result.to_string(),
3357            "<p data-label=\"bar\" name=\"foo\"></p>",
3358        );
3359    }
3360    #[test]
3361    fn tag_attribute_values_case_insensitive() {
3362        let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
3363        let result = Builder::new()
3364            .tags(hashset!["input"])
3365            .tag_attribute_values(hashmap![
3366                "input" => hashmap![
3367                    "type" => hashset!["checkbox"],
3368                ],
3369            ])
3370            .tag_attributes(hashmap![
3371                "input" => hashset!["name"],
3372            ])
3373            .clean(fragment);
3374        assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
3375    }
3376    #[test]
3377    fn set_tag_attribute_values() {
3378        let fragment = "<a href=\"https://example.com/\">Link</a>";
3379        let result = Builder::new()
3380            .link_rel(None)
3381            .add_tag_attributes("a", &["target"])
3382            .set_tag_attribute_value("a", "target", "_blank")
3383            .clean(fragment);
3384        assert_eq!(
3385            result.to_string(),
3386            "<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
3387        );
3388    }
3389    #[test]
3390    fn update_existing_set_tag_attribute_values() {
3391        let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
3392        let result = Builder::new()
3393            .link_rel(None)
3394            .add_tag_attributes("a", &["target"])
3395            .set_tag_attribute_value("a", "target", "_blank")
3396            .clean(fragment);
3397        assert_eq!(
3398            result.to_string(),
3399            "<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
3400        );
3401    }
3402    #[test]
3403    fn unwhitelisted_set_tag_attribute_values() {
3404        let fragment = "<span>hi</span><my-elem>";
3405        let result = Builder::new()
3406            .set_tag_attribute_value("my-elem", "my-attr", "val")
3407            .clean(fragment);
3408        assert_eq!(result.to_string(), "<span>hi</span>",);
3409    }
3410    #[test]
3411    fn remove_entity_link() {
3412        let fragment = "<a href=\"&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61\
3413                        &#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29\">Click me!</a>";
3414        let result = clean(fragment);
3415        assert_eq!(
3416            result.to_string(),
3417            "<a rel=\"noopener noreferrer\">Click me!</a>"
3418        );
3419    }
3420    #[test]
3421    fn remove_relative_url_evaluate() {
3422        fn is_absolute_path(url: &str) -> bool {
3423            let u = url.as_bytes();
3424            // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3425            // `/a/b/c` is an absolute path, and what we want to do stuff to.
3426            u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3427        }
3428        fn is_banned(url: &str) -> bool {
3429            let u = url.as_bytes();
3430            u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3431        }
3432        fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3433            if is_absolute_path(url) {
3434                Some(Cow::Owned(String::from("/root") + url))
3435            } else if is_banned(url) {
3436                None
3437            } else {
3438                Some(Cow::Borrowed(url))
3439            }
3440        }
3441        let a = Builder::new()
3442            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3443            .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
3444            .to_string();
3445        assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
3446    }
3447    #[test]
3448    fn remove_relative_url_evaluate_b() {
3449        fn is_absolute_path(url: &str) -> bool {
3450            let u = url.as_bytes();
3451            // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3452            // `/a/b/c` is an absolute path, and what we want to do stuff to.
3453            u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3454        }
3455        fn is_banned(url: &str) -> bool {
3456            let u = url.as_bytes();
3457            u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3458        }
3459        fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3460            if is_absolute_path(url) {
3461                Some(Cow::Owned(String::from("/root") + url))
3462            } else if is_banned(url) {
3463                None
3464            } else {
3465                Some(Cow::Borrowed(url))
3466            }
3467        }
3468        let a = Builder::new()
3469            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3470            .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
3471            .to_string();
3472        assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
3473    }
3474    #[test]
3475    fn remove_relative_url_evaluate_c() {
3476        // Don't run on absolute URLs.
3477        fn evaluate(_: &str) -> Option<Cow<'_, str>> {
3478            return Some(Cow::Owned(String::from("invalid")));
3479        }
3480        let a = Builder::new()
3481            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3482            .clean("<a href=\"https://www.google.com/\">google</a>")
3483            .to_string();
3484        assert_eq!(
3485            a,
3486            "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
3487        );
3488    }
3489    #[test]
3490    fn clean_children_of_bad_element() {
3491        let fragment = "<bad><evil>a</evil>b</bad>";
3492        let result = Builder::new().clean(fragment);
3493        assert_eq!(result.to_string(), "ab");
3494    }
3495    #[test]
3496    fn reader_input() {
3497        let fragment = b"an <script>evil()</script> example";
3498        let result = Builder::new().clean_from_reader(&fragment[..]);
3499        assert!(result.is_ok());
3500        assert_eq!(result.unwrap().to_string(), "an  example");
3501    }
3502    #[test]
3503    fn reader_non_utf8() {
3504        let fragment = b"non-utf8 \xF0\x90\x80string";
3505        let result = Builder::new().clean_from_reader(&fragment[..]);
3506        assert!(result.is_ok());
3507        assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
3508    }
3509    #[test]
3510    fn display_impl() {
3511        let fragment = r#"a <a>link</a>"#;
3512        let result = Builder::new().link_rel(None).clean(fragment);
3513        assert_eq!(format!("{}", result), "a <a>link</a>");
3514    }
3515    #[test]
3516    fn debug_impl() {
3517        let fragment = r#"a <a>link</a>"#;
3518        let result = Builder::new().link_rel(None).clean(fragment);
3519        assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
3520    }
3521    #[cfg(ammonia_unstable)]
3522    #[test]
3523    fn to_dom_node() {
3524        let fragment = r#"a <a>link</a>"#;
3525        let result = Builder::new().link_rel(None).clean(fragment);
3526        let _node = result.to_dom_node();
3527    }
3528    #[test]
3529    fn string_from_document() {
3530        let fragment = r#"a <a>link"#;
3531        let result = String::from(Builder::new().link_rel(None).clean(fragment));
3532        assert_eq!(format!("{}", result), "a <a>link</a>");
3533    }
3534    fn require_sync<T: Sync>(_: T) {}
3535    fn require_send<T: Send>(_: T) {}
3536    #[test]
3537    fn require_sync_and_send() {
3538        require_sync(Builder::new());
3539        require_send(Builder::new());
3540    }
3541    #[test]
3542    fn id_prefixed() {
3543        let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
3544        let result = String::from(
3545            Builder::new()
3546                .tag_attributes(hashmap![
3547                    "a" => hashset!["id"],
3548                ])
3549                .id_prefix(Some("prefix-"))
3550                .clean(fragment),
3551        );
3552        assert_eq!(
3553            result.to_string(),
3554            "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
3555        );
3556    }
3557    #[test]
3558    fn id_already_prefixed() {
3559        let fragment = "<a id=\"prefix-hello\"></a>";
3560        let result = String::from(
3561            Builder::new()
3562                .tag_attributes(hashmap![
3563                    "a" => hashset!["id"],
3564                ])
3565                .id_prefix(Some("prefix-"))
3566                .clean(fragment),
3567        );
3568        assert_eq!(
3569            result.to_string(),
3570            "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
3571        );
3572    }
3573    #[test]
3574    fn clean_content_tags() {
3575        let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
3576        let result = String::from(
3577            Builder::new()
3578                .clean_content_tags(hashset!["script"])
3579                .clean(fragment),
3580        );
3581        assert_eq!(result.to_string(), "");
3582    }
3583    #[test]
3584    fn only_clean_content_tags() {
3585        let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3586        let result = String::from(
3587            Builder::new()
3588                .clean_content_tags(hashset!["script"])
3589                .clean(fragment),
3590        );
3591        assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3592    }
3593    #[test]
3594    fn clean_removed_default_tag() {
3595        let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3596        let result = String::from(
3597            Builder::new()
3598                .rm_tags(hashset!["a"])
3599                .rm_tag_attributes("a", hashset!["href", "hreflang"])
3600                .clean_content_tags(hashset!["script"])
3601                .clean(fragment),
3602        );
3603        assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3604    }
3605    #[test]
3606    #[should_panic]
3607    fn panic_on_clean_content_tag_attribute() {
3608        Builder::new()
3609            .rm_tags(std::iter::once("a"))
3610            .clean_content_tags(hashset!["a"])
3611            .clean("");
3612    }
3613    #[test]
3614    #[should_panic]
3615    fn panic_on_clean_content_tag() {
3616        Builder::new().clean_content_tags(hashset!["a"]).clean("");
3617    }
3618
3619    #[test]
3620    fn clean_text_test() {
3621        assert_eq!(
3622            clean_text("<this> is <a test function"),
3623            "&lt;this&gt;&#32;is&#32;&lt;a&#32;test&#32;function"
3624        );
3625    }
3626
3627    #[test]
3628    fn clean_text_spaces_test() {
3629        assert_eq!(clean_text("\x09\x0a\x0c\x20"), "&#9;&#10;&#12;&#32;");
3630    }
3631
3632    #[test]
3633    fn ns_svg() {
3634        // https://github.com/cure53/DOMPurify/pull/495
3635        let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
3636        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3637        assert_eq!(result.to_string(), "");
3638
3639        let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
3640        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3641        assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3642
3643        let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
3644        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3645        assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3646
3647        let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
3648        let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
3649        assert_eq!(
3650            result.to_string(),
3651            "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
3652        );
3653    }
3654
3655    #[test]
3656    fn ns_svg_2() {
3657        let fragment = "<svg><foreignObject><table><path><xmp><!--</xmp><img title'--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3658        let result =  Builder::default()
3659            .strip_comments(false)
3660            .add_tags(&["svg","foreignObject","table","path","xmp"])
3661            .clean(fragment);
3662        assert_eq!(
3663            result.to_string(),
3664            "<svg><foreignObject><table></table></foreignObject></svg>"
3665        );
3666    }
3667
3668    #[test]
3669    fn ns_mathml() {
3670        // https://github.com/cure53/DOMPurify/pull/495
3671        let fragment = "<mglyph></mglyph>";
3672        let result = String::from(
3673            Builder::new()
3674                .add_tags(&["math", "mtext", "mglyph"])
3675                .clean(fragment),
3676        );
3677        assert_eq!(result.to_string(), "");
3678        let fragment = "<math><mtext><div><mglyph>";
3679        let result = String::from(
3680            Builder::new()
3681                .add_tags(&["math", "mtext", "mglyph"])
3682                .clean(fragment),
3683        );
3684        assert_eq!(
3685            result.to_string(),
3686            "<math><mtext><div></div></mtext></math>"
3687        );
3688        let fragment = "<math><mtext><mglyph>";
3689        let result = String::from(
3690            Builder::new()
3691                .add_tags(&["math", "mtext", "mglyph"])
3692                .clean(fragment),
3693        );
3694        assert_eq!(
3695            result.to_string(),
3696            "<math><mtext><mglyph></mglyph></mtext></math>"
3697        );
3698    }
3699
3700    #[test]
3701    fn ns_mathml_2() {
3702        let fragment = "<math><mtext><table><mglyph><xmp><!--</xmp><img title='--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3703        let result =  Builder::default()
3704            .strip_comments(false)
3705            .add_tags(&["math","mtext","table","mglyph","xmp"])
3706            .clean(fragment);
3707        assert_eq!(
3708            result.to_string(),
3709            "<math><mtext><table></table></mtext></math>"
3710        );
3711    }
3712
3713
3714    #[test]
3715    fn xml_processing_instruction() {
3716        // https://blog.slonser.info/posts/dompurify-node-type-confusion/
3717        let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3718        let result = String::from(Builder::new().clean(fragment));
3719        assert_eq!(result.to_string(), "");
3720
3721        let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3722        let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3723        assert_eq!(result.to_string(), "<svg></svg>");
3724
3725        let fragment = r##"<svg><?xml-stylesheet ><img src=x onerror="alert('Ammonia bypassed!!!')"> ?></svg>"##;
3726        let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3727        assert_eq!(result.to_string(), "<svg></svg><img src=\"x\"> ?&gt;");
3728    }
3729
3730    #[test]
3731    fn generic_attribute_prefixes() {
3732        let prefix_data = ["data-"];
3733        let prefix_code = ["code-"];
3734        let mut b = Builder::new();
3735        let mut hs: HashSet<&'_ str> = HashSet::new();
3736        hs.insert("data-");
3737        assert!(b.generic_attribute_prefixes.is_none());
3738        b.generic_attribute_prefixes(hs);
3739        assert!(b.generic_attribute_prefixes.is_some());
3740        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3741        b.add_generic_attribute_prefixes(&prefix_data);
3742        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3743        b.add_generic_attribute_prefixes(&prefix_code);
3744        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
3745        b.rm_generic_attribute_prefixes(&prefix_code);
3746        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3747        b.rm_generic_attribute_prefixes(&prefix_code);
3748        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3749        b.rm_generic_attribute_prefixes(&prefix_data);
3750        assert!(b.generic_attribute_prefixes.is_none());
3751    }
3752
3753    #[test]
3754    fn generic_attribute_prefixes_clean() {
3755        let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
3756        let result_cleaned = String::from(
3757            Builder::new()
3758                .add_tag_attributes("a", &["data-1"])
3759                .clean(fragment),
3760        );
3761        assert_eq!(
3762            result_cleaned,
3763            r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3764        );
3765        let result_allowed = String::from(
3766            Builder::new()
3767                .add_tag_attributes("a", &["data-1"])
3768                .add_generic_attribute_prefixes(&["data-"])
3769                .clean(fragment),
3770        );
3771        assert_eq!(
3772            result_allowed,
3773            r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3774        );
3775        let result_allowed = String::from(
3776            Builder::new()
3777                .add_tag_attributes("a", &["data-1", "code-1"])
3778                .add_generic_attribute_prefixes(&["data-", "code-"])
3779                .clean(fragment),
3780        );
3781        assert_eq!(
3782            result_allowed,
3783            r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3784        );
3785    }
3786    #[test]
3787    fn lesser_than_isnt_html() {
3788        let fragment = "1 < 2";
3789        assert!(!is_html(fragment));
3790    }
3791    #[test]
3792    fn dense_lesser_than_isnt_html() {
3793        let fragment = "1<2";
3794        assert!(!is_html(fragment));
3795    }
3796    #[test]
3797    fn what_about_number_elements() {
3798        let fragment = "foo<2>bar";
3799        assert!(!is_html(fragment));
3800    }
3801    #[test]
3802    fn turbofish_is_html_sadly() {
3803        let fragment = "Vec::<u8>::new()";
3804        assert!(is_html(fragment));
3805    }
3806    #[test]
3807    fn stop_grinning() {
3808        let fragment = "did you really believe me? <g>";
3809        assert!(is_html(fragment));
3810    }
3811    #[test]
3812    fn dont_be_bold() {
3813        let fragment = "<b>";
3814        assert!(is_html(fragment));
3815    }
3816
3817    #[test]
3818    fn rewrite_with_root() {
3819        let tests = [
3820            (
3821                "https://github.com/rust-ammonia/ammonia/blob/master/",
3822                "README.md",
3823                "",
3824                "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3825            ),
3826            (
3827                "https://github.com/rust-ammonia/ammonia/blob/master/",
3828                "README.md",
3829                "/",
3830                "https://github.com/rust-ammonia/ammonia/blob/master/",
3831            ),
3832            (
3833                "https://github.com/rust-ammonia/ammonia/blob/master/",
3834                "README.md",
3835                "/CONTRIBUTING.md",
3836                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3837            ),
3838            (
3839                "https://github.com/rust-ammonia/ammonia/blob/master",
3840                "README.md",
3841                "",
3842                "https://github.com/rust-ammonia/ammonia/blob/README.md",
3843            ),
3844            (
3845                "https://github.com/rust-ammonia/ammonia/blob/master",
3846                "README.md",
3847                "/",
3848                "https://github.com/rust-ammonia/ammonia/blob/",
3849            ),
3850            (
3851                "https://github.com/rust-ammonia/ammonia/blob/master",
3852                "README.md",
3853                "/CONTRIBUTING.md",
3854                "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md",
3855            ),
3856            (
3857                "https://github.com/rust-ammonia/ammonia/blob/master/",
3858                "",
3859                "",
3860                "https://github.com/rust-ammonia/ammonia/blob/master/",
3861            ),
3862            (
3863                "https://github.com/rust-ammonia/ammonia/blob/master/",
3864                "",
3865                "/",
3866                "https://github.com/rust-ammonia/ammonia/blob/master/",
3867            ),
3868            (
3869                "https://github.com/rust-ammonia/ammonia/blob/master/",
3870                "",
3871                "/CONTRIBUTING.md",
3872                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3873            ),
3874            (
3875                "https://github.com/",
3876                "rust-ammonia/ammonia/blob/master/README.md",
3877                "",
3878                "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3879            ),
3880            (
3881                "https://github.com/",
3882                "rust-ammonia/ammonia/blob/master/README.md",
3883                "/",
3884                "https://github.com/",
3885            ),
3886            (
3887                "https://github.com/",
3888                "rust-ammonia/ammonia/blob/master/README.md",
3889                "CONTRIBUTING.md",
3890                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3891            ),
3892            (
3893                "https://github.com/",
3894                "rust-ammonia/ammonia/blob/master/README.md",
3895                "/CONTRIBUTING.md",
3896                "https://github.com/CONTRIBUTING.md",
3897            ),
3898        ];
3899        for (root, path, url, result) in tests {
3900            let h = format!(r#"<a href="{url}">test</a>"#);
3901            let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#);
3902            let a = Builder::new()
3903                .url_relative(UrlRelative::RewriteWithRoot {
3904                    root: Url::parse(root).unwrap(),
3905                    path: path.to_string(),
3906                })
3907                .clean(&h)
3908                .to_string();
3909            if r != a {
3910                println!(
3911                    "failed to check ({root}, {path}, {url}, {result})\n{r} != {a}",
3912                    r = r
3913                );
3914                assert_eq!(r, a);
3915            }
3916        }
3917    }
3918}