ammonia/lib.rs
1// Copyright (C) Michael Howell and others
2// this library is released under the same terms as Rust itself.
3
4#![deny(unsafe_code)]
5#![deny(missing_docs)]
6
7//! Ammonia is a whitelist-based HTML sanitization library. It is designed to
8//! prevent cross-site scripting, layout breaking, and clickjacking caused
9//! by untrusted user-provided HTML being mixed into a larger web page.
10//!
11//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do,
12//! so it is extremely resilient to syntactic obfuscation.
13//!
14//! Ammonia parses its input exactly according to the HTML5 specification;
15//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©.
16//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark].
17//!
18//! # Examples
19//!
20//! ```
21//! let result = ammonia::clean(
22//! "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>"
23//! );
24//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>");
25//! ```
26//!
27//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo"
28//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser"
29
30#[cfg(ammonia_unstable)]
31pub mod rcdom;
32
33#[cfg(not(ammonia_unstable))]
34mod rcdom;
35
36mod style;
37
38use html5ever::interface::Attribute;
39use html5ever::serialize::{serialize, SerializeOpts};
40use html5ever::tree_builder::{NodeOrText, TreeSink};
41use html5ever::{driver as html, local_name, ns, Namespace, QualName};
42use maplit::{hashmap, hashset};
43use std::sync::LazyLock;
44use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
45use std::borrow::{Borrow, Cow};
46use std::cell::Cell;
47use std::cmp::max;
48use std::collections::{HashMap, HashSet};
49use std::fmt::{self, Display};
50use std::io;
51use std::iter::IntoIterator as IntoIter;
52use std::mem;
53use std::rc::Rc;
54use std::str::FromStr;
55use html5ever::tendril::stream::TendrilSink;
56use html5ever::tendril::StrTendril;
57use html5ever::tendril::{format_tendril, ByteTendril};
58pub use url::Url;
59
60use html5ever::buffer_queue::BufferQueue;
61use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
62pub use url;
63
64static AMMONIA: LazyLock<Builder<'static>> = LazyLock::new(Builder::default);
65
66/// Clean HTML with a conservative set of defaults.
67///
68/// * [tags](struct.Builder.html#defaults)
69/// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1)
70/// * [attributes on specific tags](struct.Builder.html#defaults-2)
71/// * [attributes on all tags](struct.Builder.html#defaults-6)
72/// * [url schemes](struct.Builder.html#defaults-7)
73/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8)
74/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9)
75/// * all `class=""` settings are blocked by default
76/// * comments are stripped by default
77/// * no generic attribute prefixes are turned on by default
78/// * no specific tag-attribute-value settings are configured by default
79///
80/// [opener]: https://mathiasbynens.github.io/rel-noopener/
81/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer
82///
83/// # Examples
84///
85/// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS")
86pub fn clean(src: &str) -> String {
87 AMMONIA.clean(src).to_string()
88}
89
90/// Turn an arbitrary string into unformatted HTML.
91///
92/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`.
93/// It is as strict as possible, encoding every character that has special meaning to the
94/// HTML parser.
95///
96/// # Warnings
97///
98/// This function cannot be used to package strings into a `<script>` or `<style>` tag;
99/// you need a JavaScript or CSS escaper to do that.
100///
101/// // DO NOT DO THIS
102/// # use ammonia::clean_text;
103/// let untrusted = "Robert\"); abuse();//";
104/// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted));
105///
106/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded.
107/// If you want to build an editor that works the way most folks expect them to, you should put a
108/// newline at the beginning of the tag, like this:
109///
110/// # use ammonia::{Builder, clean_text};
111/// let untrusted = "\n\nhi!";
112/// let mut b = Builder::new();
113/// b.add_tags(&["textarea"]);
114/// // This is the bad version
115/// // The user put two newlines at the beginning, but the first one was removed
116/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string();
117/// assert_eq!("<textarea>\nhi!</textarea>", sanitized);
118/// // This is a good version
119/// // The user put two newlines at the beginning, and we add a third one,
120/// // so the result still has two
121/// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string();
122/// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized);
123/// // This version is also often considered good
124/// // For many applications, leading and trailing whitespace is probably unwanted
125/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string();
126/// assert_eq!("<textarea>hi!</textarea>", sanitized);
127///
128/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`.
129/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes
130/// and paragraph contents.
131pub fn clean_text(src: &str) -> String {
132 let mut ret_val = String::with_capacity(max(4, src.len()));
133 for c in src.chars() {
134 let replacement = match c {
135 // this character, when confronted, will start a tag
136 '<' => "<",
137 // in an unquoted attribute, will end the attribute value
138 '>' => ">",
139 // in an attribute surrounded by double quotes, this character will end the attribute value
140 '\"' => """,
141 // in an attribute surrounded by single quotes, this character will end the attribute value
142 '\'' => "'",
143 // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes
144 '`' => "`",
145 // in an unquoted attribute, this character will end the attribute
146 '/' => "/",
147 // starts an entity reference
148 '&' => "&",
149 // if at the beginning of an unquoted attribute, will get ignored
150 '=' => "=",
151 // will end an unquoted attribute
152 ' ' => " ",
153 '\t' => "	",
154 '\n' => " ",
155 '\x0c' => "",
156 '\r' => " ",
157 // a spec-compliant browser will perform this replacement anyway, but the middleware might not
158 '\0' => "�",
159 // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM
160 _ => {
161 ret_val.push(c);
162 continue;
163 }
164 };
165 ret_val.push_str(replacement);
166 }
167 ret_val
168}
169
170/// Determine if a given string contains HTML
171///
172/// This function is parses the full string into HTML and checks if the input contained any
173/// HTML syntax.
174///
175/// # Note
176/// This function will return positively for strings that contain invalid HTML syntax like
177/// `<g>` and even `Vec::<u8>::new()`.
178pub fn is_html(input: &str) -> bool {
179 let santok = SanitizationTokenizer::new();
180 let mut chunk = ByteTendril::new();
181 chunk.push_slice(input.as_bytes());
182 let mut input = BufferQueue::default();
183 input.push_back(chunk.try_reinterpret().unwrap());
184
185 let tok = Tokenizer::new(santok, Default::default());
186 let _ = tok.feed(&mut input);
187 tok.end();
188 tok.sink.was_sanitized.get()
189}
190
191#[derive(Clone)]
192struct SanitizationTokenizer {
193 was_sanitized: Cell<bool>,
194}
195
196impl SanitizationTokenizer {
197 pub fn new() -> SanitizationTokenizer {
198 SanitizationTokenizer {
199 was_sanitized: false.into(),
200 }
201 }
202}
203
204impl TokenSink for SanitizationTokenizer {
205 type Handle = ();
206 fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
207 match token {
208 Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
209 _ => {
210 self.was_sanitized.set(true);
211 }
212 }
213 TokenSinkResult::Continue
214 }
215 fn end(&self) {}
216}
217
218/// An HTML sanitizer.
219///
220/// Given a fragment of HTML, Ammonia will parse it according to the HTML5
221/// parsing algorithm and sanitize any disallowed tags or attributes. This
222/// algorithm also takes care of things like unclosed and (some) misnested
223/// tags.
224///
225/// # Examples
226///
227/// use ammonia::{Builder, UrlRelative};
228///
229/// let a = Builder::default()
230/// .link_rel(None)
231/// .url_relative(UrlRelative::PassThrough)
232/// .clean("<a href=/>test")
233/// .to_string();
234/// assert_eq!(
235/// a,
236/// "<a href=\"/\">test</a>");
237///
238/// # Panics
239///
240/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is
241/// configured with any of these (contradictory) settings:
242///
243/// * The `rel` attribute is added to [`generic_attributes`] or the
244/// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`.
245///
246/// For example, this is going to panic, since [`link_rel`] is set to
247/// `Some("noopener noreferrer")` by default,
248/// and it makes no sense to simultaneously say that the user is allowed to
249/// set their own `rel` attribute while saying that every link shall be set to
250/// a particular value:
251///
252/// ```should_panic
253/// use ammonia::Builder;
254/// use maplit::hashset;
255///
256/// # fn main() {
257/// Builder::default()
258/// .generic_attributes(hashset!["rel"])
259/// .clean("");
260/// # }
261/// ```
262///
263/// This, however, is perfectly valid:
264///
265/// ```
266/// use ammonia::Builder;
267/// use maplit::hashset;
268///
269/// # fn main() {
270/// Builder::default()
271/// .generic_attributes(hashset!["rel"])
272/// .link_rel(None)
273/// .clean("");
274/// # }
275/// ```
276///
277/// * The `class` attribute is in [`allowed_classes`] and is in the
278/// corresponding [`tag_attributes`] or in [`generic_attributes`].
279///
280/// This is done both to line up with the treatment of `rel`,
281/// and to prevent people from accidentally allowing arbitrary
282/// classes on a particular element.
283///
284/// This will panic:
285///
286/// ```should_panic
287/// use ammonia::Builder;
288/// use maplit::{hashmap, hashset};
289///
290/// # fn main() {
291/// Builder::default()
292/// .generic_attributes(hashset!["class"])
293/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
294/// .clean("");
295/// # }
296/// ```
297///
298/// This, however, is perfectly valid:
299///
300/// ```
301/// use ammonia::Builder;
302/// use maplit::{hashmap, hashset};
303///
304/// # fn main() {
305/// Builder::default()
306/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
307/// .clean("");
308/// # }
309/// ```
310///
311/// * A tag is in either [`tags`] or [`tag_attributes`] while also
312/// being in [`clean_content_tags`].
313///
314/// Both [`tags`] and [`tag_attributes`] are whitelists but
315/// [`clean_content_tags`] is a blacklist, so it doesn't make sense
316/// to have the same tag in both.
317///
318/// For example, this will panic, since the `aside` tag is in
319/// [`tags`] by default:
320///
321/// ```should_panic
322/// use ammonia::Builder;
323/// use maplit::hashset;
324///
325/// # fn main() {
326/// Builder::default()
327/// .clean_content_tags(hashset!["aside"])
328/// .clean("");
329/// # }
330/// ```
331///
332/// This, however, is valid:
333///
334/// ```
335/// use ammonia::Builder;
336/// use maplit::hashset;
337///
338/// # fn main() {
339/// Builder::default()
340/// .rm_tags(&["aside"])
341/// .clean_content_tags(hashset!["aside"])
342/// .clean("");
343/// # }
344/// ```
345///
346/// [`clean`]: #method.clean
347/// [`clean_from_reader`]: #method.clean_from_reader
348/// [`generic_attributes`]: #method.generic_attributes
349/// [`tag_attributes`]: #method.tag_attributes
350/// [`generic_attributes`]: #method.generic_attributes
351/// [`link_rel`]: #method.link_rel
352/// [`allowed_classes`]: #method.allowed_classes
353/// [`id_prefix`]: #method.id_prefix
354/// [`tags`]: #method.tags
355/// [`clean_content_tags`]: #method.clean_content_tags
356#[derive(Debug)]
357pub struct Builder<'a> {
358 tags: HashSet<&'a str>,
359 clean_content_tags: HashSet<&'a str>,
360 tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
361 tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
362 set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
363 generic_attributes: HashSet<&'a str>,
364 url_schemes: HashSet<&'a str>,
365 url_relative: UrlRelative<'a>,
366 attribute_filter: Option<Box<dyn AttributeFilter>>,
367 link_rel: Option<&'a str>,
368 allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
369 strip_comments: bool,
370 id_prefix: Option<&'a str>,
371 generic_attribute_prefixes: Option<HashSet<&'a str>>,
372 style_properties: Option<HashSet<&'a str>>,
373}
374
375impl<'a> Default for Builder<'a> {
376 fn default() -> Self {
377 #[rustfmt::skip]
378 let tags = hashset![
379 "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
380 "bdo", "blockquote", "br", "caption", "center", "cite", "code",
381 "col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
382 "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
383 "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
384 "ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
385 "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
386 "strike", "strong", "sub", "summary", "sup", "table", "tbody",
387 "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
388 ];
389 let clean_content_tags = hashset!["script", "style"];
390 let generic_attributes = hashset!["lang", "title"];
391 let tag_attributes = hashmap![
392 "a" => hashset![
393 "href", "hreflang"
394 ],
395 "bdo" => hashset![
396 "dir"
397 ],
398 "blockquote" => hashset![
399 "cite"
400 ],
401 "col" => hashset![
402 "align", "char", "charoff", "span"
403 ],
404 "colgroup" => hashset![
405 "align", "char", "charoff", "span"
406 ],
407 "del" => hashset![
408 "cite", "datetime"
409 ],
410 "hr" => hashset![
411 "align", "size", "width"
412 ],
413 "img" => hashset![
414 "align", "alt", "height", "src", "width"
415 ],
416 "ins" => hashset![
417 "cite", "datetime"
418 ],
419 "ol" => hashset![
420 "start"
421 ],
422 "q" => hashset![
423 "cite"
424 ],
425 "table" => hashset![
426 "align", "char", "charoff", "summary"
427 ],
428 "tbody" => hashset![
429 "align", "char", "charoff"
430 ],
431 "td" => hashset![
432 "align", "char", "charoff", "colspan", "headers", "rowspan"
433 ],
434 "tfoot" => hashset![
435 "align", "char", "charoff"
436 ],
437 "th" => hashset![
438 "align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
439 ],
440 "thead" => hashset![
441 "align", "char", "charoff"
442 ],
443 "tr" => hashset![
444 "align", "char", "charoff"
445 ],
446 ];
447 let tag_attribute_values = hashmap![];
448 let set_tag_attribute_values = hashmap![];
449 let url_schemes = hashset![
450 "bitcoin",
451 "ftp",
452 "ftps",
453 "geo",
454 "http",
455 "https",
456 "im",
457 "irc",
458 "ircs",
459 "magnet",
460 "mailto",
461 "mms",
462 "mx",
463 "news",
464 "nntp",
465 "openpgp4fpr",
466 "sip",
467 "sms",
468 "smsto",
469 "ssh",
470 "tel",
471 "url",
472 "webcal",
473 "wtai",
474 "xmpp"
475 ];
476 let allowed_classes = hashmap![];
477
478 Builder {
479 tags,
480 clean_content_tags,
481 tag_attributes,
482 tag_attribute_values,
483 set_tag_attribute_values,
484 generic_attributes,
485 url_schemes,
486 url_relative: UrlRelative::PassThrough,
487 attribute_filter: None,
488 link_rel: Some("noopener noreferrer"),
489 allowed_classes,
490 strip_comments: true,
491 id_prefix: None,
492 generic_attribute_prefixes: None,
493 style_properties: None,
494 }
495 }
496}
497
498impl<'a> Builder<'a> {
499 /// Sets the tags that are allowed.
500 ///
501 /// Note that the document-level tags `<html>`, `<head>`, and `<body>` cannot
502 /// be allowed here. Ammonia parses its input as a fragment (as if it were
503 /// the contents of a `<div>`), so these tags are stripped by the parser
504 /// before they reach the sanitizer.
505 ///
506 /// # Examples
507 ///
508 /// use ammonia::Builder;
509 /// use maplit::hashset;
510 ///
511 /// # fn main() {
512 /// let tags = hashset!["my-tag"];
513 /// let a = Builder::new()
514 /// .tags(tags)
515 /// .clean("<my-tag>")
516 /// .to_string();
517 /// assert_eq!(a, "<my-tag></my-tag>");
518 /// # }
519 ///
520 /// # Defaults
521 ///
522 /// ```notest
523 /// a, abbr, acronym, area, article, aside, b, bdi,
524 /// bdo, blockquote, br, caption, center, cite, code,
525 /// col, colgroup, data, dd, del, details, dfn, div,
526 /// dl, dt, em, figcaption, figure, footer, h1, h2,
527 /// h3, h4, h5, h6, header, hgroup, hr, i, img,
528 /// ins, kbd, li, map, mark, nav, ol, p, pre,
529 /// q, rp, rt, rtc, ruby, s, samp, small, span,
530 /// strike, strong, sub, summary, sup, table, tbody,
531 /// td, th, thead, time, tr, tt, u, ul, var, wbr
532 /// ```
533 pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
534 self.tags = value;
535 self
536 }
537
538 /// Add additonal whitelisted tags without overwriting old ones.
539 ///
540 /// Does nothing if the tag is already there.
541 ///
542 /// # Examples
543 ///
544 /// let a = ammonia::Builder::default()
545 /// .add_tags(&["my-tag"])
546 /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
547 /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a);
548 pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
549 &mut self,
550 it: I,
551 ) -> &mut Self {
552 self.tags.extend(it.into_iter().map(Borrow::borrow));
553 self
554 }
555
556 /// Remove already-whitelisted tags.
557 ///
558 /// Does nothing if the tags is already gone.
559 ///
560 /// # Examples
561 ///
562 /// let a = ammonia::Builder::default()
563 /// .rm_tags(&["span"])
564 /// .clean("<span></span>").to_string();
565 /// assert_eq!("", a);
566 pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
567 &mut self,
568 it: I,
569 ) -> &mut Self {
570 for i in it {
571 self.tags.remove(i.borrow());
572 }
573 self
574 }
575
576 /// Returns a copy of the set of whitelisted tags.
577 ///
578 /// # Examples
579 ///
580 /// use maplit::hashset;
581 ///
582 /// let tags = hashset!["my-tag-1", "my-tag-2"];
583 ///
584 /// let mut b = ammonia::Builder::default();
585 /// b.tags(Clone::clone(&tags));
586 /// assert_eq!(tags, b.clone_tags());
587 pub fn clone_tags(&self) -> HashSet<&'a str> {
588 self.tags.clone()
589 }
590
591 /// Sets the tags whose contents will be completely removed from the output.
592 ///
593 /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
594 /// a panic.
595 ///
596 /// # Examples
597 ///
598 /// use ammonia::Builder;
599 /// use maplit::hashset;
600 ///
601 /// # fn main() {
602 /// let tag_blacklist = hashset!["script", "style"];
603 /// let a = Builder::new()
604 /// .clean_content_tags(tag_blacklist)
605 /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>")
606 /// .to_string();
607 /// assert_eq!(a, "");
608 /// # }
609 ///
610 /// # Defaults
611 ///
612 /// ```notest
613 /// script, style
614 /// ```
615 pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
616 self.clean_content_tags = value;
617 self
618 }
619
620 /// Add additonal blacklisted clean-content tags without overwriting old ones.
621 ///
622 /// Does nothing if the tag is already there.
623 ///
624 /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
625 /// a panic.
626 ///
627 /// # Examples
628 ///
629 /// let a = ammonia::Builder::default()
630 /// .add_clean_content_tags(&["my-tag"])
631 /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string();
632 /// assert_eq!("<span>mess</span>", a);
633 pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
634 &mut self,
635 it: I,
636 ) -> &mut Self {
637 self.clean_content_tags
638 .extend(it.into_iter().map(Borrow::borrow));
639 self
640 }
641
642 /// Remove already-blacklisted clean-content tags.
643 ///
644 /// Does nothing if the tags aren't blacklisted.
645 ///
646 /// # Examples
647 /// use ammonia::Builder;
648 /// use maplit::hashset;
649 ///
650 /// # fn main() {
651 /// let tag_blacklist = hashset!["script"];
652 /// let a = ammonia::Builder::default()
653 /// .clean_content_tags(tag_blacklist)
654 /// .rm_clean_content_tags(&["script"])
655 /// .clean("<script>XSS</script>").to_string();
656 /// assert_eq!("XSS", a);
657 /// # }
658 pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
659 &mut self,
660 it: I,
661 ) -> &mut Self {
662 for i in it {
663 self.clean_content_tags.remove(i.borrow());
664 }
665 self
666 }
667
668 /// Returns a copy of the set of blacklisted clean-content tags.
669 ///
670 /// # Examples
671 /// # use maplit::hashset;
672 ///
673 /// let tags = hashset!["my-tag-1", "my-tag-2"];
674 ///
675 /// let mut b = ammonia::Builder::default();
676 /// b.clean_content_tags(Clone::clone(&tags));
677 /// assert_eq!(tags, b.clone_clean_content_tags());
678 pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
679 self.clean_content_tags.clone()
680 }
681
682 /// Sets the HTML attributes that are allowed on specific tags.
683 ///
684 /// The value is structured as a map from tag names to a set of attribute names.
685 ///
686 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
687 ///
688 /// # Examples
689 ///
690 /// use ammonia::Builder;
691 /// use maplit::{hashmap, hashset};
692 ///
693 /// # fn main() {
694 /// let tags = hashset!["my-tag"];
695 /// let tag_attributes = hashmap![
696 /// "my-tag" => hashset!["val"]
697 /// ];
698 /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes)
699 /// .clean("<my-tag val=1>")
700 /// .to_string();
701 /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>");
702 /// # }
703 ///
704 /// # Defaults
705 ///
706 /// ```notest
707 /// a =>
708 /// href, hreflang
709 /// bdo =>
710 /// dir
711 /// blockquote =>
712 /// cite
713 /// col =>
714 /// align, char, charoff, span
715 /// colgroup =>
716 /// align, char, charoff, span
717 /// del =>
718 /// cite, datetime
719 /// hr =>
720 /// align, size, width
721 /// img =>
722 /// align, alt, height, src, width
723 /// ins =>
724 /// cite, datetime
725 /// ol =>
726 /// start
727 /// q =>
728 /// cite
729 /// table =>
730 /// align, char, charoff, summary
731 /// tbody =>
732 /// align, char, charoff
733 /// td =>
734 /// align, char, charoff, colspan, headers, rowspan
735 /// tfoot =>
736 /// align, char, charoff
737 /// th =>
738 /// align, char, charoff, colspan, headers, rowspan, scope
739 /// thead =>
740 /// align, char, charoff
741 /// tr =>
742 /// align, char, charoff
743 /// ```
744 pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
745 self.tag_attributes = value;
746 self
747 }
748
749 /// Add additonal whitelisted tag-specific attributes without overwriting old ones.
750 ///
751 /// # Examples
752 ///
753 /// let a = ammonia::Builder::default()
754 /// .add_tags(&["my-tag"])
755 /// .add_tag_attributes("my-tag", &["my-attr"])
756 /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
757 /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
758 pub fn add_tag_attributes<
759 T: 'a + ?Sized + Borrow<str>,
760 U: 'a + ?Sized + Borrow<str>,
761 I: IntoIter<Item = &'a T>,
762 >(
763 &mut self,
764 tag: &'a U,
765 it: I,
766 ) -> &mut Self {
767 self.tag_attributes
768 .entry(tag.borrow())
769 .or_default()
770 .extend(it.into_iter().map(Borrow::borrow));
771 self
772 }
773
774 /// Remove already-whitelisted tag-specific attributes.
775 ///
776 /// Does nothing if the attribute is already gone.
777 ///
778 /// # Examples
779 ///
780 /// let a = ammonia::Builder::default()
781 /// .rm_tag_attributes("a", &["href"])
782 /// .clean("<a href=\"/\"></a>").to_string();
783 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
784 pub fn rm_tag_attributes<
785 'b,
786 'c,
787 T: 'b + ?Sized + Borrow<str>,
788 U: 'c + ?Sized + Borrow<str>,
789 I: IntoIter<Item = &'b T>,
790 >(
791 &mut self,
792 tag: &'c U,
793 it: I,
794 ) -> &mut Self {
795 if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
796 for i in it {
797 tag.remove(i.borrow());
798 }
799 }
800 self
801 }
802
803 /// Returns a copy of the set of whitelisted tag-specific attributes.
804 ///
805 /// # Examples
806 /// use maplit::{hashmap, hashset};
807 ///
808 /// let tag_attributes = hashmap![
809 /// "my-tag" => hashset!["my-attr-1", "my-attr-2"]
810 /// ];
811 ///
812 /// let mut b = ammonia::Builder::default();
813 /// b.tag_attributes(Clone::clone(&tag_attributes));
814 /// assert_eq!(tag_attributes, b.clone_tag_attributes());
815 pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
816 self.tag_attributes.clone()
817 }
818
819 /// Sets the values of HTML attributes that are allowed on specific tags.
820 ///
821 /// The value is structured as a map from tag names to a map from attribute names to a set of
822 /// attribute values.
823 ///
824 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
825 ///
826 /// # Examples
827 ///
828 /// use ammonia::Builder;
829 /// use maplit::{hashmap, hashset};
830 ///
831 /// # fn main() {
832 /// let tags = hashset!["my-tag"];
833 /// let tag_attribute_values = hashmap![
834 /// "my-tag" => hashmap![
835 /// "my-attr" => hashset!["val"],
836 /// ],
837 /// ];
838 /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values)
839 /// .clean("<my-tag my-attr=val>")
840 /// .to_string();
841 /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
842 /// # }
843 ///
844 /// # Defaults
845 ///
846 /// None.
847 pub fn tag_attribute_values(
848 &mut self,
849 value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
850 ) -> &mut Self {
851 self.tag_attribute_values = value;
852 self
853 }
854
855 /// Add additonal whitelisted tag-specific attribute values without overwriting old ones.
856 ///
857 /// # Examples
858 ///
859 /// let a = ammonia::Builder::default()
860 /// .add_tags(&["my-tag"])
861 /// .add_tag_attribute_values("my-tag", "my-attr", &[""])
862 /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
863 /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
864 pub fn add_tag_attribute_values<
865 T: 'a + ?Sized + Borrow<str>,
866 U: 'a + ?Sized + Borrow<str>,
867 V: 'a + ?Sized + Borrow<str>,
868 I: IntoIter<Item = &'a T>,
869 >(
870 &mut self,
871 tag: &'a U,
872 attribute: &'a V,
873 it: I,
874 ) -> &mut Self {
875 self.tag_attribute_values
876 .entry(tag.borrow())
877 .or_default()
878 .entry(attribute.borrow())
879 .or_default()
880 .extend(it.into_iter().map(Borrow::borrow));
881
882 self
883 }
884
885 /// Remove already-whitelisted tag-specific attribute values.
886 ///
887 /// Does nothing if the attribute or the value is already gone.
888 ///
889 /// # Examples
890 ///
891 /// let a = ammonia::Builder::default()
892 /// .rm_tag_attributes("a", &["href"])
893 /// .add_tag_attribute_values("a", "href", &["/"])
894 /// .rm_tag_attribute_values("a", "href", &["/"])
895 /// .clean("<a href=\"/\"></a>").to_string();
896 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
897 pub fn rm_tag_attribute_values<
898 'b,
899 'c,
900 T: 'b + ?Sized + Borrow<str>,
901 U: 'c + ?Sized + Borrow<str>,
902 V: 'c + ?Sized + Borrow<str>,
903 I: IntoIter<Item = &'b T>,
904 >(
905 &mut self,
906 tag: &'c U,
907 attribute: &'c V,
908 it: I,
909 ) -> &mut Self {
910 if let Some(attrs) = self
911 .tag_attribute_values
912 .get_mut(tag.borrow())
913 .and_then(|map| map.get_mut(attribute.borrow()))
914 {
915 for i in it {
916 attrs.remove(i.borrow());
917 }
918 }
919 self
920 }
921
922 /// Returns a copy of the set of whitelisted tag-specific attribute values.
923 ///
924 /// # Examples
925 ///
926 /// use maplit::{hashmap, hashset};
927 ///
928 /// let attribute_values = hashmap![
929 /// "my-attr-1" => hashset!["foo"],
930 /// "my-attr-2" => hashset!["baz", "bar"],
931 /// ];
932 /// let tag_attribute_values = hashmap![
933 /// "my-tag" => attribute_values
934 /// ];
935 ///
936 /// let mut b = ammonia::Builder::default();
937 /// b.tag_attribute_values(Clone::clone(&tag_attribute_values));
938 /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values());
939 pub fn clone_tag_attribute_values(
940 &self,
941 ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
942 self.tag_attribute_values.clone()
943 }
944
945 /// Sets the values of HTML attributes that are to be set on specific tags.
946 ///
947 /// The value is structured as a map from tag names to a map from attribute names to an
948 /// attribute value.
949 ///
950 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
951 ///
952 /// # Examples
953 ///
954 /// use ammonia::Builder;
955 /// use maplit::{hashmap, hashset};
956 ///
957 /// # fn main() {
958 /// let tags = hashset!["my-tag"];
959 /// let set_tag_attribute_values = hashmap![
960 /// "my-tag" => hashmap![
961 /// "my-attr" => "val",
962 /// ],
963 /// ];
964 /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values)
965 /// .clean("<my-tag>")
966 /// .to_string();
967 /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
968 /// # }
969 ///
970 /// # Defaults
971 ///
972 /// None.
973 pub fn set_tag_attribute_values(
974 &mut self,
975 value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
976 ) -> &mut Self {
977 self.set_tag_attribute_values = value;
978 self
979 }
980
981 /// Add an attribute value to set on a specific element.
982 ///
983 /// # Examples
984 ///
985 /// let a = ammonia::Builder::default()
986 /// .add_tags(&["my-tag"])
987 /// .set_tag_attribute_value("my-tag", "my-attr", "val")
988 /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
989 /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a);
990 pub fn set_tag_attribute_value<
991 T: 'a + ?Sized + Borrow<str>,
992 A: 'a + ?Sized + Borrow<str>,
993 V: 'a + ?Sized + Borrow<str>,
994 >(
995 &mut self,
996 tag: &'a T,
997 attribute: &'a A,
998 value: &'a V,
999 ) -> &mut Self {
1000 self.set_tag_attribute_values
1001 .entry(tag.borrow())
1002 .or_default()
1003 .insert(attribute.borrow(), value.borrow());
1004 self
1005 }
1006
1007 /// Remove existing tag-specific attribute values to be set.
1008 ///
1009 /// Does nothing if the attribute is already gone.
1010 ///
1011 /// # Examples
1012 ///
1013 /// let a = ammonia::Builder::default()
1014 /// // this does nothing, since no value is set for this tag attribute yet
1015 /// .rm_set_tag_attribute_value("a", "target")
1016 /// .set_tag_attribute_value("a", "target", "_blank")
1017 /// .rm_set_tag_attribute_value("a", "target")
1018 /// .clean("<a href=\"/\"></a>").to_string();
1019 /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a);
1020 pub fn rm_set_tag_attribute_value<
1021 T: 'a + ?Sized + Borrow<str>,
1022 A: 'a + ?Sized + Borrow<str>,
1023 >(
1024 &mut self,
1025 tag: &'a T,
1026 attribute: &'a A,
1027 ) -> &mut Self {
1028 if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
1029 attributes.remove(attribute.borrow());
1030 }
1031 self
1032 }
1033
1034 /// Returns the value that will be set for the attribute on the element, if any.
1035 ///
1036 /// # Examples
1037 ///
1038 /// let mut b = ammonia::Builder::default();
1039 /// b.set_tag_attribute_value("a", "target", "_blank");
1040 /// let value = b.get_set_tag_attribute_value("a", "target");
1041 /// assert_eq!(value, Some("_blank"));
1042 pub fn get_set_tag_attribute_value<
1043 T: 'a + ?Sized + Borrow<str>,
1044 A: 'a + ?Sized + Borrow<str>,
1045 >(
1046 &self,
1047 tag: &'a T,
1048 attribute: &'a A,
1049 ) -> Option<&'a str> {
1050 self.set_tag_attribute_values
1051 .get(tag.borrow())
1052 .and_then(|map| map.get(attribute.borrow()))
1053 .copied()
1054 }
1055
1056 /// Returns a copy of the set of tag-specific attribute values to be set.
1057 ///
1058 /// # Examples
1059 ///
1060 /// use maplit::{hashmap, hashset};
1061 ///
1062 /// let attribute_values = hashmap![
1063 /// "my-attr-1" => "foo",
1064 /// "my-attr-2" => "bar",
1065 /// ];
1066 /// let set_tag_attribute_values = hashmap![
1067 /// "my-tag" => attribute_values,
1068 /// ];
1069 ///
1070 /// let mut b = ammonia::Builder::default();
1071 /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values));
1072 /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values());
1073 pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
1074 self.set_tag_attribute_values.clone()
1075 }
1076
1077 /// Sets the prefix of attributes that are allowed on any tag.
1078 ///
1079 /// # Examples
1080 ///
1081 /// use ammonia::Builder;
1082 /// use maplit::hashset;
1083 ///
1084 /// # fn main() {
1085 /// let prefixes = hashset!["data-"];
1086 /// let a = Builder::new()
1087 /// .generic_attribute_prefixes(prefixes)
1088 /// .clean("<b data-val=1>")
1089 /// .to_string();
1090 /// assert_eq!(a, "<b data-val=\"1\"></b>");
1091 /// # }
1092 ///
1093 /// # Defaults
1094 ///
1095 /// No attribute prefixes are allowed by default.
1096 pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1097 self.generic_attribute_prefixes = Some(value);
1098 self
1099 }
1100
1101 /// Add additional whitelisted attribute prefix without overwriting old ones.
1102 ///
1103 /// # Examples
1104 ///
1105 /// let a = ammonia::Builder::default()
1106 /// .add_generic_attribute_prefixes(&["my-"])
1107 /// .clean("<span my-attr>mess</span>").to_string();
1108 /// assert_eq!("<span my-attr=\"\">mess</span>", a);
1109 pub fn add_generic_attribute_prefixes<
1110 T: 'a + ?Sized + Borrow<str>,
1111 I: IntoIter<Item = &'a T>,
1112 >(
1113 &mut self,
1114 it: I,
1115 ) -> &mut Self {
1116 self.generic_attribute_prefixes
1117 .get_or_insert_with(HashSet::new)
1118 .extend(it.into_iter().map(Borrow::borrow));
1119 self
1120 }
1121
1122 /// Remove already-whitelisted attribute prefixes.
1123 ///
1124 /// Does nothing if the attribute prefix is already gone.
1125 ///
1126 /// # Examples
1127 ///
1128 /// let a = ammonia::Builder::default()
1129 /// .add_generic_attribute_prefixes(&["data-", "code-"])
1130 /// .rm_generic_attribute_prefixes(&["data-"])
1131 /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string();
1132 /// assert_eq!("<span code-test=\"foo\"></span>", a);
1133 pub fn rm_generic_attribute_prefixes<
1134 'b,
1135 T: 'b + ?Sized + Borrow<str>,
1136 I: IntoIter<Item = &'b T>,
1137 >(
1138 &mut self,
1139 it: I,
1140 ) -> &mut Self {
1141 if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
1142 for i in it {
1143 let _ = prefixes.remove(i.borrow());
1144 }
1145 prefixes.is_empty()
1146 }) {
1147 self.generic_attribute_prefixes = None;
1148 }
1149 self
1150 }
1151
1152 /// Returns a copy of the set of whitelisted attribute prefixes.
1153 ///
1154 /// # Examples
1155 ///
1156 /// use maplit::hashset;
1157 ///
1158 /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"];
1159 ///
1160 /// let mut b = ammonia::Builder::default();
1161 /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes));
1162 /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes());
1163 pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
1164 self.generic_attribute_prefixes.clone()
1165 }
1166
1167 /// Sets the attributes that are allowed on any tag.
1168 ///
1169 /// # Examples
1170 ///
1171 /// use ammonia::Builder;
1172 /// use maplit::hashset;
1173 ///
1174 /// # fn main() {
1175 /// let attributes = hashset!["data-val"];
1176 /// let a = Builder::new()
1177 /// .generic_attributes(attributes)
1178 /// .clean("<b data-val=1>")
1179 /// .to_string();
1180 /// assert_eq!(a, "<b data-val=\"1\"></b>");
1181 /// # }
1182 ///
1183 /// # Defaults
1184 ///
1185 /// ```notest
1186 /// lang, title
1187 /// ```
1188 pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1189 self.generic_attributes = value;
1190 self
1191 }
1192
1193 /// Add additonal whitelisted attributes without overwriting old ones.
1194 ///
1195 /// # Examples
1196 ///
1197 /// let a = ammonia::Builder::default()
1198 /// .add_generic_attributes(&["my-attr"])
1199 /// .clean("<span my-attr>mess</span>").to_string();
1200 /// assert_eq!("<span my-attr=\"\">mess</span>", a);
1201 pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1202 &mut self,
1203 it: I,
1204 ) -> &mut Self {
1205 self.generic_attributes
1206 .extend(it.into_iter().map(Borrow::borrow));
1207 self
1208 }
1209
1210 /// Remove already-whitelisted attributes.
1211 ///
1212 /// Does nothing if the attribute is already gone.
1213 ///
1214 /// # Examples
1215 ///
1216 /// let a = ammonia::Builder::default()
1217 /// .rm_generic_attributes(&["title"])
1218 /// .clean("<span title=\"cool\"></span>").to_string();
1219 /// assert_eq!("<span></span>", a);
1220 pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1221 &mut self,
1222 it: I,
1223 ) -> &mut Self {
1224 for i in it {
1225 self.generic_attributes.remove(i.borrow());
1226 }
1227 self
1228 }
1229
1230 /// Returns a copy of the set of whitelisted attributes.
1231 ///
1232 /// # Examples
1233 ///
1234 /// use maplit::hashset;
1235 ///
1236 /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"];
1237 ///
1238 /// let mut b = ammonia::Builder::default();
1239 /// b.generic_attributes(Clone::clone(&generic_attributes));
1240 /// assert_eq!(generic_attributes, b.clone_generic_attributes());
1241 pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
1242 self.generic_attributes.clone()
1243 }
1244
1245 /// Sets the URL schemes permitted on `href` and `src` attributes.
1246 ///
1247 /// # Examples
1248 ///
1249 /// use ammonia::Builder;
1250 /// use maplit::hashset;
1251 ///
1252 /// # fn main() {
1253 /// let url_schemes = hashset![
1254 /// "http", "https", "mailto", "magnet"
1255 /// ];
1256 /// let a = Builder::new().url_schemes(url_schemes)
1257 /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>")
1258 /// .to_string();
1259 ///
1260 /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
1261 /// // in the cleaned HTML.
1262 /// assert_eq!(a,
1263 /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>");
1264 /// # }
1265 ///
1266 /// # Defaults
1267 ///
1268 /// ```notest
1269 /// bitcoin, ftp, ftps, geo, http, https, im, irc,
1270 /// ircs, magnet, mailto, mms, mx, news, nntp,
1271 /// openpgp4fpr, sip, sms, smsto, ssh, tel, url,
1272 /// webcal, wtai, xmpp
1273 /// ```
1274 pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1275 self.url_schemes = value;
1276 self
1277 }
1278
1279 /// Add additonal whitelisted URL schemes without overwriting old ones.
1280 ///
1281 /// # Examples
1282 ///
1283 /// let a = ammonia::Builder::default()
1284 /// .add_url_schemes(&["my-scheme"])
1285 /// .clean("<a href=my-scheme:home>mess</span>").to_string();
1286 /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a);
1287 pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1288 &mut self,
1289 it: I,
1290 ) -> &mut Self {
1291 self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
1292 self
1293 }
1294
1295 /// Remove already-whitelisted attributes.
1296 ///
1297 /// Does nothing if the attribute is already gone.
1298 ///
1299 /// # Examples
1300 ///
1301 /// let a = ammonia::Builder::default()
1302 /// .rm_url_schemes(&["ftp"])
1303 /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string();
1304 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
1305 pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1306 &mut self,
1307 it: I,
1308 ) -> &mut Self {
1309 for i in it {
1310 self.url_schemes.remove(i.borrow());
1311 }
1312 self
1313 }
1314
1315 /// Returns a copy of the set of whitelisted URL schemes.
1316 ///
1317 /// # Examples
1318 /// use maplit::hashset;
1319 ///
1320 /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"];
1321 ///
1322 /// let mut b = ammonia::Builder::default();
1323 /// b.url_schemes(Clone::clone(&url_schemes));
1324 /// assert_eq!(url_schemes, b.clone_url_schemes());
1325 pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
1326 self.url_schemes.clone()
1327 }
1328
1329 /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny.
1330 ///
1331 /// # Examples
1332 ///
1333 /// use ammonia::{Builder, UrlRelative};
1334 ///
1335 /// let a = Builder::new().url_relative(UrlRelative::PassThrough)
1336 /// .clean("<a href=/>Home</a>")
1337 /// .to_string();
1338 ///
1339 /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
1340 /// // in the cleaned HTML.
1341 /// assert_eq!(
1342 /// a,
1343 /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>");
1344 ///
1345 /// # Defaults
1346 ///
1347 /// ```notest
1348 /// UrlRelative::PassThrough
1349 /// ```
1350 pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self {
1351 self.url_relative = value;
1352 self
1353 }
1354
1355 /// Allows rewriting of all attributes using a callback.
1356 ///
1357 /// The callback takes name of the element, attribute and its value.
1358 /// Returns `None` to remove the attribute, or a value to use.
1359 ///
1360 /// Rewriting of attributes with URLs is done before `url_relative()`.
1361 ///
1362 /// # Panics
1363 ///
1364 /// If more than one callback is set.
1365 ///
1366 /// # Examples
1367 ///
1368 /// ```rust
1369 /// use ammonia::Builder;
1370 /// let a = Builder::new()
1371 /// .attribute_filter(|element, attribute, value| {
1372 /// match (element, attribute) {
1373 /// ("img", "src") => None,
1374 /// _ => Some(value.into())
1375 /// }
1376 /// })
1377 /// .link_rel(None)
1378 /// .clean("<a href=/><img alt=Home src=foo></a>")
1379 /// .to_string();
1380 /// assert_eq!(a,
1381 /// r#"<a href="/"><img alt="Home"></a>"#);
1382 /// ```
1383 pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
1384 where
1385 CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
1386 {
1387 assert!(
1388 self.attribute_filter.is_none(),
1389 "attribute_filter can be set only once"
1390 );
1391 self.attribute_filter = Some(Box::new(callback));
1392 self
1393 }
1394
1395 /// Returns `true` if the relative URL resolver is set to `Deny`.
1396 ///
1397 /// # Examples
1398 ///
1399 /// use ammonia::{Builder, UrlRelative};
1400 /// let mut a = Builder::default();
1401 /// a.url_relative(UrlRelative::Deny);
1402 /// assert!(a.is_url_relative_deny());
1403 /// a.url_relative(UrlRelative::PassThrough);
1404 /// assert!(!a.is_url_relative_deny());
1405 pub fn is_url_relative_deny(&self) -> bool {
1406 matches!(self.url_relative, UrlRelative::Deny)
1407 }
1408
1409 /// Returns `true` if the relative URL resolver is set to `PassThrough`.
1410 ///
1411 /// # Examples
1412 ///
1413 /// use ammonia::{Builder, UrlRelative};
1414 /// let mut a = Builder::default();
1415 /// a.url_relative(UrlRelative::Deny);
1416 /// assert!(!a.is_url_relative_pass_through());
1417 /// a.url_relative(UrlRelative::PassThrough);
1418 /// assert!(a.is_url_relative_pass_through());
1419 pub fn is_url_relative_pass_through(&self) -> bool {
1420 matches!(self.url_relative, UrlRelative::PassThrough)
1421 }
1422
1423 /// Returns `true` if the relative URL resolver is set to `Custom`.
1424 ///
1425 /// # Examples
1426 ///
1427 /// use ammonia::{Builder, UrlRelative};
1428 /// use std::borrow::Cow;
1429 /// fn test(a: &str) -> Option<Cow<str>> { None }
1430 /// # fn main() {
1431 /// let mut a = Builder::default();
1432 /// a.url_relative(UrlRelative::Custom(Box::new(test)));
1433 /// assert!(a.is_url_relative_custom());
1434 /// a.url_relative(UrlRelative::PassThrough);
1435 /// assert!(!a.is_url_relative_custom());
1436 /// a.url_relative(UrlRelative::Deny);
1437 /// assert!(!a.is_url_relative_custom());
1438 /// # }
1439 pub fn is_url_relative_custom(&self) -> bool {
1440 matches!(self.url_relative, UrlRelative::Custom(_))
1441 }
1442
1443 /// Configures a `rel` attribute that will be added on links.
1444 ///
1445 /// If `rel` is in the generic or tag attributes, this must be set to `None`.
1446 /// Common `rel` values to include:
1447 ///
1448 /// * `noopener`: This prevents [a particular type of XSS attack],
1449 /// and should usually be turned on for untrusted HTML.
1450 /// * `noreferrer`: This prevents the browser from [sending the source URL]
1451 /// to the website that is linked to.
1452 /// * `nofollow`: This prevents search engines from [using this link for
1453 /// ranking], which disincentivizes spammers.
1454 ///
1455 /// To turn on rel-insertion, call this function with a space-separated list.
1456 /// Ammonia does not parse rel-attributes;
1457 /// it just puts the given string into the attribute directly.
1458 ///
1459 /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/
1460 /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer
1461 /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow
1462 ///
1463 /// # Examples
1464 ///
1465 /// use ammonia::Builder;
1466 ///
1467 /// let a = Builder::new().link_rel(None)
1468 /// .clean("<a href=https://rust-lang.org/>Rust</a>")
1469 /// .to_string();
1470 /// assert_eq!(
1471 /// a,
1472 /// "<a href=\"https://rust-lang.org/\">Rust</a>");
1473 ///
1474 /// # Defaults
1475 ///
1476 /// ```notest
1477 /// Some("noopener noreferrer")
1478 /// ```
1479 pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
1480 self.link_rel = value;
1481 self
1482 }
1483
1484 /// Returns the settings for links' `rel` attribute, if one is set.
1485 ///
1486 /// # Examples
1487 ///
1488 /// use ammonia::{Builder, UrlRelative};
1489 /// let mut a = Builder::default();
1490 /// a.link_rel(Some("a b"));
1491 /// assert_eq!(a.get_link_rel(), Some("a b"));
1492 pub fn get_link_rel(&self) -> Option<&str> {
1493 self.link_rel
1494 }
1495
1496 /// Sets the CSS classes that are allowed on specific tags.
1497 ///
1498 /// The values is structured as a map from tag names to a set of class names.
1499 ///
1500 /// If the `class` attribute is itself whitelisted for a tag, then adding entries to
1501 /// this map will cause a panic.
1502 ///
1503 /// # Examples
1504 ///
1505 /// use ammonia::Builder;
1506 /// use maplit::{hashmap, hashset};
1507 ///
1508 /// # fn main() {
1509 /// let allowed_classes = hashmap![
1510 /// "code" => hashset!["rs", "ex", "c", "cxx", "js"]
1511 /// ];
1512 /// let a = Builder::new()
1513 /// .allowed_classes(allowed_classes)
1514 /// .clean("<code class=rs>fn main() {}</code>")
1515 /// .to_string();
1516 /// assert_eq!(
1517 /// a,
1518 /// "<code class=\"rs\">fn main() {}</code>");
1519 /// # }
1520 ///
1521 /// # Defaults
1522 ///
1523 /// The set of allowed classes is empty by default.
1524 pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
1525 self.allowed_classes = value;
1526 self
1527 }
1528
1529 /// Add additonal whitelisted classes without overwriting old ones.
1530 ///
1531 /// # Examples
1532 ///
1533 /// let a = ammonia::Builder::default()
1534 /// .add_allowed_classes("a", &["onebox"])
1535 /// .clean("<a href=/ class=onebox>mess</span>").to_string();
1536 /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a);
1537 pub fn add_allowed_classes<
1538 T: 'a + ?Sized + Borrow<str>,
1539 U: 'a + ?Sized + Borrow<str>,
1540 I: IntoIter<Item = &'a T>,
1541 >(
1542 &mut self,
1543 tag: &'a U,
1544 it: I,
1545 ) -> &mut Self {
1546 self.allowed_classes
1547 .entry(tag.borrow())
1548 .or_default()
1549 .extend(it.into_iter().map(Borrow::borrow));
1550 self
1551 }
1552
1553 /// Remove already-whitelisted attributes.
1554 ///
1555 /// Does nothing if the attribute is already gone.
1556 ///
1557 /// # Examples
1558 ///
1559 /// let a = ammonia::Builder::default()
1560 /// .add_allowed_classes("span", &["active"])
1561 /// .rm_allowed_classes("span", &["active"])
1562 /// .clean("<span class=active>").to_string();
1563 /// assert_eq!("<span class=\"\"></span>", a);
1564 pub fn rm_allowed_classes<
1565 'b,
1566 'c,
1567 T: 'b + ?Sized + Borrow<str>,
1568 U: 'c + ?Sized + Borrow<str>,
1569 I: IntoIter<Item = &'b T>,
1570 >(
1571 &mut self,
1572 tag: &'c U,
1573 it: I,
1574 ) -> &mut Self {
1575 if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
1576 for i in it {
1577 tag.remove(i.borrow());
1578 }
1579 }
1580 self
1581 }
1582
1583 /// Returns a copy of the set of whitelisted class attributes.
1584 ///
1585 /// # Examples
1586 ///
1587 /// use maplit::{hashmap, hashset};
1588 ///
1589 /// let allowed_classes = hashmap![
1590 /// "my-tag" => hashset!["my-class-1", "my-class-2"]
1591 /// ];
1592 ///
1593 /// let mut b = ammonia::Builder::default();
1594 /// b.allowed_classes(Clone::clone(&allowed_classes));
1595 /// assert_eq!(allowed_classes, b.clone_allowed_classes());
1596 pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
1597 self.allowed_classes.clone()
1598 }
1599
1600 /// Configures the handling of HTML comments.
1601 ///
1602 /// If this option is false, comments will be preserved.
1603 ///
1604 /// # Examples
1605 ///
1606 /// use ammonia::Builder;
1607 ///
1608 /// let a = Builder::new().strip_comments(false)
1609 /// .clean("<!-- yes -->")
1610 /// .to_string();
1611 /// assert_eq!(
1612 /// a,
1613 /// "<!-- yes -->");
1614 ///
1615 /// # Defaults
1616 ///
1617 /// `true`
1618 pub fn strip_comments(&mut self, value: bool) -> &mut Self {
1619 self.strip_comments = value;
1620 self
1621 }
1622
1623 /// Returns `true` if comment stripping is turned on.
1624 ///
1625 /// # Examples
1626 ///
1627 /// let mut a = ammonia::Builder::new();
1628 /// a.strip_comments(true);
1629 /// assert!(a.will_strip_comments());
1630 /// a.strip_comments(false);
1631 /// assert!(!a.will_strip_comments());
1632 pub fn will_strip_comments(&self) -> bool {
1633 self.strip_comments
1634 }
1635
1636 /// Prefixes all "id" attribute values with a given string. Note that the tag and
1637 /// attribute themselves must still be whitelisted.
1638 ///
1639 /// # Examples
1640 ///
1641 /// use ammonia::Builder;
1642 /// use maplit::hashset;
1643 ///
1644 /// # fn main() {
1645 /// let attributes = hashset!["id"];
1646 /// let a = Builder::new()
1647 /// .generic_attributes(attributes)
1648 /// .id_prefix(Some("safe-"))
1649 /// .clean("<b id=42>")
1650 /// .to_string();
1651 /// assert_eq!(a, "<b id=\"safe-42\"></b>");
1652 /// # }
1653
1654 ///
1655 /// # Defaults
1656 ///
1657 /// `None`
1658 pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
1659 self.id_prefix = value;
1660 self
1661 }
1662
1663 /// Only allows the specified properties in `style` attributes.
1664 ///
1665 /// Irrelevant if `style` is not an allowed attribute.
1666 ///
1667 /// Note that if style filtering is enabled style properties will be normalised e.g.
1668 /// invalid declarations and @rules will be removed, with only syntactically valid
1669 /// declarations kept.
1670 ///
1671 /// # Examples
1672 ///
1673 /// use ammonia::Builder;
1674 /// use maplit::hashset;
1675 ///
1676 /// # fn main() {
1677 /// let attributes = hashset!["style"];
1678 /// let properties = hashset!["color"];
1679 /// let a = Builder::new()
1680 /// .generic_attributes(attributes)
1681 /// .filter_style_properties(properties)
1682 /// .clean("<p style=\"font-weight: heavy; color: red\">my html</p>")
1683 /// .to_string();
1684 /// assert_eq!(a, "<p style=\"color:red\">my html</p>");
1685 /// # }
1686 pub fn filter_style_properties(&mut self, value: HashSet<&'a str>) -> &mut Self {
1687 self.style_properties = Some(value);
1688 self
1689 }
1690
1691 /// Constructs a [`Builder`] instance configured with the [default options].
1692 ///
1693 /// # Examples
1694 ///
1695 /// use ammonia::{Builder, Url, UrlRelative};
1696 /// # use std::error::Error;
1697 ///
1698 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1699 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1700 /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1701 ///
1702 /// let result = Builder::new() // <--
1703 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1704 /// .clean(input)
1705 /// .to_string();
1706 /// assert_eq!(result, output);
1707 /// # Ok(())
1708 /// # }
1709 /// # fn main() { do_main().unwrap() }
1710 ///
1711 /// [default options]: fn.clean.html
1712 /// [`Builder`]: struct.Builder.html
1713 pub fn new() -> Self {
1714 Self::default()
1715 }
1716
1717 /// Constructs a [`Builder`] instance configured with no allowed tags.
1718 ///
1719 /// # Examples
1720 ///
1721 /// use ammonia::{Builder, Url, UrlRelative};
1722 /// # use std::error::Error;
1723 ///
1724 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1725 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>.";
1726 /// let output = "This is an Ammonia example using the empty() function.";
1727 ///
1728 /// let result = Builder::empty() // <--
1729 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1730 /// .clean(input)
1731 /// .to_string();
1732 /// assert_eq!(result, output);
1733 /// # Ok(())
1734 /// # }
1735 /// # fn main() { do_main().unwrap() }
1736 ///
1737 /// [default options]: fn.clean.html
1738 /// [`Builder`]: struct.Builder.html
1739 pub fn empty() -> Self {
1740 Self {
1741 tags: hashset![],
1742 ..Self::default()
1743 }
1744 }
1745
1746 /// Sanitizes an HTML fragment in a string according to the configured options.
1747 ///
1748 /// # Examples
1749 ///
1750 /// use ammonia::{Builder, Url, UrlRelative};
1751 /// # use std::error::Error;
1752 ///
1753 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1754 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1755 /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1756 ///
1757 /// let result = Builder::new()
1758 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1759 /// .clean(input)
1760 /// .to_string(); // <--
1761 /// assert_eq!(result, output);
1762 /// # Ok(())
1763 /// # }
1764 /// # fn main() { do_main().unwrap() }
1765 pub fn clean(&self, src: &str) -> Document {
1766 let parser = Self::make_parser();
1767 let dom = parser.one(src);
1768 self.clean_dom(dom)
1769 }
1770
1771 /// Sanitizes an HTML fragment from a reader according to the configured options.
1772 ///
1773 /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just
1774 /// like when using [`String::from_utf8_lossy`].
1775 ///
1776 /// To avoid consuming the reader, a mutable reference can be passed to this method.
1777 ///
1778 /// # Examples
1779 ///
1780 /// use ammonia::Builder;
1781 /// # use std::error::Error;
1782 ///
1783 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1784 /// let a = Builder::new()
1785 /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b`
1786 /// .to_string();
1787 /// assert_eq!(a, "");
1788 /// # Ok(()) }
1789 /// # fn main() { do_main().unwrap() }
1790 ///
1791 /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy
1792 pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
1793 where
1794 R: io::Read,
1795 {
1796 let parser = Self::make_parser().from_utf8();
1797 let dom = parser.read_from(&mut src)?;
1798 Ok(self.clean_dom(dom))
1799 }
1800
1801 /// Clean a post-parsing DOM.
1802 ///
1803 /// This is not a public API because RcDom isn't really stable.
1804 /// We want to be able to take breaking changes to html5ever itself
1805 /// without having to break Ammonia's API.
1806 fn clean_dom(&self, dom: RcDom) -> Document {
1807 let mut stack = Vec::new();
1808 let mut removed = Vec::new();
1809 let link_rel = self
1810 .link_rel
1811 .map(|link_rel| format_tendril!("{}", link_rel));
1812 if link_rel.is_some() {
1813 assert!(self.generic_attributes.get("rel").is_none());
1814 assert!(self
1815 .tag_attributes
1816 .get("a")
1817 .and_then(|a| a.get("rel"))
1818 .is_none());
1819 }
1820 assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
1821 for tag_name in self.allowed_classes.keys() {
1822 assert!(self
1823 .tag_attributes
1824 .get(tag_name)
1825 .and_then(|a| a.get("class"))
1826 .is_none());
1827 }
1828 for tag_name in &self.clean_content_tags {
1829 assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
1830 assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
1831 }
1832 let body = {
1833 let children = dom.document.children.borrow();
1834 children[0].clone()
1835 };
1836 stack.extend(
1837 mem::take(&mut *body.children.borrow_mut())
1838 .into_iter()
1839 .rev(),
1840 );
1841 // This design approach is used to prevent pathological content from producing
1842 // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
1843 // of course, contains nodes that need to be dropped (we can't just drop them,
1844 // because they could have a very deep child tree).
1845 while let Some(mut node) = stack.pop() {
1846 if matches!(node.data, NodeData::Element { ref name, .. } if &*name.local == "selectedcontent" && name.ns == ns!(html)) &&
1847 self.is_within(node.clone(), ns!(html), "select")
1848 {
1849 for sub in node.children.borrow_mut().iter_mut() {
1850 sub.parent.replace(None);
1851 }
1852 *node.children.borrow_mut() = Vec::new();
1853 }
1854 let parent = node.parent
1855 .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
1856 .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
1857 let pass = self.clean_child(&mut node);
1858 self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix);
1859 if self.clean_node_content(&node) || !self.check_expected_namespace(&parent, &node) {
1860 removed.push(node);
1861 continue;
1862 }
1863 if pass {
1864 dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
1865 } else {
1866 for sub in node.children.borrow_mut().iter_mut() {
1867 sub.parent.replace(Some(Rc::downgrade(&parent)));
1868 }
1869 }
1870 stack.extend(
1871 mem::take(&mut *node.children.borrow_mut())
1872 .into_iter()
1873 .rev(),
1874 );
1875 if !pass {
1876 removed.push(node);
1877 }
1878 }
1879 // Now, imperatively clean up all of the child nodes.
1880 // Otherwise, we could wind up with a DoS, either caused by a memory leak,
1881 // or caused by a stack overflow.
1882 while let Some(node) = removed.pop() {
1883 removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]);
1884 }
1885 Document(dom)
1886 }
1887
1888 fn is_within(&self, mut child: Handle, ns: Namespace, tag: &str) -> bool {
1889 while let Some(parent) = child.parent.take() {
1890 child.parent.set(Some(parent.clone()));
1891 match child.data {
1892 NodeData::Element { ref name, .. } if name.ns == ns && &*name.local == tag => return true,
1893 _ => {
1894 if let Some(parent) = parent.upgrade() {
1895 child = parent;
1896 } else {
1897 return false;
1898 }
1899 }
1900 }
1901 }
1902 false
1903 }
1904
1905 /// Returns `true` if a node and all its content should be removed.
1906 fn clean_node_content(&self, node: &Handle) -> bool {
1907 match node.data {
1908 NodeData::Text { .. }
1909 | NodeData::Comment { .. }
1910 | NodeData::Doctype { .. }
1911 | NodeData::Document
1912 | NodeData::ProcessingInstruction { .. } => false,
1913 NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
1914 }
1915 }
1916
1917 /// Remove unwanted attributes, and check if the node should be kept or not.
1918 ///
1919 /// The root node doesn't need cleaning because we create the root node ourselves,
1920 /// and it doesn't get serialized, and ... it just exists to give the parser
1921 /// a context (in this case, a div-like block context).
1922 fn clean_child(&self, child: &mut Handle) -> bool {
1923 match child.data {
1924 NodeData::Text { .. } => true,
1925 NodeData::Comment { .. } => !self.strip_comments,
1926 NodeData::Doctype { .. }
1927 | NodeData::Document
1928 | NodeData::ProcessingInstruction { .. } => false,
1929 NodeData::Element {
1930 ref name,
1931 ref attrs,
1932 ..
1933 } => {
1934 if self.tags.contains(&*name.local) {
1935 let attr_filter = |attr: &html5ever::Attribute| {
1936 let whitelisted = self.generic_attributes.contains(&*attr.name.local)
1937 || self.generic_attribute_prefixes.as_ref().map(|prefixes| {
1938 prefixes.iter().any(|&p| attr.name.local.starts_with(p))
1939 }) == Some(true)
1940 || self
1941 .tag_attributes
1942 .get(&*name.local)
1943 .map(|ta| ta.contains(&*attr.name.local))
1944 == Some(true)
1945 || self
1946 .tag_attribute_values
1947 .get(&*name.local)
1948 .and_then(|tav| tav.get(&*attr.name.local))
1949 .map(|vs| {
1950 let attr_val = attr.value.to_lowercase();
1951 vs.iter().any(|v| v.to_lowercase() == attr_val)
1952 })
1953 == Some(true);
1954 if !whitelisted {
1955 // If the class attribute is not whitelisted,
1956 // but there is a whitelisted set of allowed_classes,
1957 // do not strip out the class attribute.
1958 // Banned classes will be filtered later.
1959 &*attr.name.local == "class"
1960 && self.allowed_classes.contains_key(&*name.local)
1961 } else if is_url_attr(&name.local, &attr.name.local) {
1962 let url = Url::parse(&attr.value);
1963 if let Ok(url) = url {
1964 self.url_schemes.contains(url.scheme())
1965 } else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
1966 !matches!(self.url_relative, UrlRelative::Deny)
1967 } else {
1968 false
1969 }
1970 } else {
1971 true
1972 }
1973 };
1974 attrs.borrow_mut().retain(attr_filter);
1975 true
1976 } else {
1977 false
1978 }
1979 }
1980 }
1981 }
1982
1983 // Check for unexpected namespace changes.
1984 //
1985 // The issue happens if developers added to the list of allowed tags any
1986 // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state,
1987 // that is:
1988 //
1989 // * title
1990 // * textarea
1991 // * xmp
1992 // * iframe
1993 // * noembed
1994 // * noframes
1995 // * plaintext
1996 // * noscript
1997 // * style
1998 // * script
1999 //
2000 // An example in the wild is Plume, that allows iframe [1]. So in next
2001 // examples I'll assume the following policy:
2002 //
2003 // Builder::new()
2004 // .add_tags(&["iframe"])
2005 //
2006 // In HTML namespace `<iframe>` is parsed specially; that is, its content is
2007 // treated as text. For instance, the following html:
2008 //
2009 // <iframe><a>test
2010 //
2011 // Is parsed into the following DOM tree:
2012 //
2013 // iframe
2014 // └─ #text: <a>test
2015 //
2016 // So iframe cannot have any children other than a text node.
2017 //
2018 // The same is not true, though, in "foreign content"; that is, within
2019 // <svg> or <math> tags. The following html:
2020 //
2021 // <svg><iframe><a>test
2022 //
2023 // is parsed differently:
2024 //
2025 // svg
2026 // └─ iframe
2027 // └─ a
2028 // └─ #text: test
2029 //
2030 // So in SVG namespace iframe can have children.
2031 //
2032 // Ammonia disallows <svg> but it keeps its content after deleting it. And
2033 // the parser internally keeps track of the namespace of the element. So
2034 // assume we have the following snippet:
2035 //
2036 // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test
2037 //
2038 // It is parsed into:
2039 //
2040 // svg
2041 // └─ iframe
2042 // └─ a title="</iframe><img src onerror=alert(1)>"
2043 // └─ #text: test
2044 //
2045 // This DOM tree is harmless from ammonia point of view because the piece
2046 // of code that looks like XSS is in a title attribute. Hence, the
2047 // resulting "safe" HTML from ammonia would be:
2048 //
2049 // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener
2050 // noreferrer">test</a></iframe>
2051 //
2052 // However, at this point, the information about namespace is lost, which
2053 // means that the browser will parse this snippet into:
2054 //
2055 // ├─ iframe
2056 // │ └─ #text: <a title="
2057 // ├─ img src="" onerror="alert(1)"
2058 // └─ #text: " rel="noopener noreferrer">test
2059 //
2060 // Leading to XSS.
2061 //
2062 // To solve this issue, check for unexpected namespace switches after cleanup.
2063 // Elements which change namespace at an unexpected point are removed.
2064 // This function returns `true` if `child` should be kept, and `false` if it
2065 // should be removed.
2066 //
2067 // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21
2068 fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
2069 let (parent, parent_attr, child) = match (&parent.data, &child.data) {
2070 (NodeData::Element { name: pn, attrs, .. }, NodeData::Element { name: cn, .. }) => (pn, attrs, cn),
2071 _ => return true,
2072 };
2073 // The only way to switch from html to svg is with the <svg> tag
2074 if parent.ns == ns!(html) && child.ns == ns!(svg) {
2075 child.local == local_name!("svg")
2076 // The only way to switch from html to mathml is with the <math> tag
2077 } else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
2078 child.local == local_name!("math")
2079 // The only way to switch from mathml to svg/html is with a text integration point
2080 } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
2081 // https://html.spec.whatwg.org/#mathml
2082 if &*parent.local == "annotation-xml" {
2083 let parent_attr = parent_attr.borrow();
2084 // https://html.spec.whatwg.org/#tree-construction
2085 if child.ns == ns!(html)
2086 && parent_attr
2087 .iter()
2088 .filter(|attr| attr.name.local == local_name!("encoding"))
2089 .all(|attr| {
2090 &*attr.value == "text/html" || &*attr.value == "application/xhtml+xml"
2091 })
2092 {
2093 is_html_tag(&child.local)
2094 && parent_attr
2095 .iter()
2096 .filter(|attr| attr.name.local == local_name!("encoding"))
2097 .count()
2098 == 1
2099 } else {
2100 child.local == local_name!("svg") && child.ns == ns!(svg)
2101 }
2102 } else {
2103 matches!(&*parent.local, "mi" | "mo" | "mn" | "ms" | "mtext")
2104 && if child.ns == ns!(html) {
2105 is_html_tag(&child.local)
2106 } else {
2107 true
2108 }
2109 }
2110
2111 // The only way to switch from svg to mathml/html is with an html integration point
2112 } else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
2113 // https://html.spec.whatwg.org/#svg-0
2114 matches!(&*parent.local, "foreignObject")
2115 && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2116 } else if child.ns == ns!(svg) {
2117 is_svg_tag(&child.local)
2118 } else if child.ns == ns!(mathml) {
2119 is_mathml_tag(&child.local)
2120 } else if child.ns == ns!(html) {
2121 is_html_tag(&child.local)
2122 } else {
2123 // There are no other supported ways to switch namespace
2124 parent.ns == child.ns
2125 }
2126 }
2127
2128 /// Add and transform special-cased attributes and elements.
2129 ///
2130 /// This function handles:
2131 ///
2132 /// * relative URL rewriting
2133 /// * adding `<a rel>` attributes
2134 /// * filtering out banned style properties
2135 /// * filtering out banned classes
2136 fn adjust_node_attributes(
2137 &self,
2138 child: &mut Handle,
2139 link_rel: &Option<StrTendril>,
2140 id_prefix: Option<&'a str>,
2141 ) {
2142 if let NodeData::Element {
2143 ref name,
2144 ref attrs,
2145 ..
2146 } = child.data
2147 {
2148 if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
2149 let mut attrs = attrs.borrow_mut();
2150 for (&set_name, &set_value) in set_attrs {
2151 // set the value of the attribute if the attribute is already present
2152 if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
2153 {
2154 if &*attr.value != set_value {
2155 attr.value = set_value.into();
2156 }
2157 } else {
2158 // otherwise, add the attribute
2159 let attr = Attribute {
2160 name: QualName::new(None, ns!(), set_name.into()),
2161 value: set_value.into(),
2162 };
2163 attrs.push(attr);
2164 }
2165 }
2166 }
2167 if let Some(ref link_rel) = *link_rel {
2168 if &*name.local == "a" {
2169 attrs.borrow_mut().push(Attribute {
2170 name: QualName::new(None, ns!(), local_name!("rel")),
2171 value: link_rel.clone(),
2172 })
2173 }
2174 }
2175 if let Some(ref id_prefix) = id_prefix {
2176 for attr in &mut *attrs.borrow_mut() {
2177 if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) {
2178 attr.value = format_tendril!("{}{}", id_prefix, attr.value);
2179 }
2180 }
2181 }
2182 if let Some(ref attr_filter) = self.attribute_filter {
2183 let mut drop_attrs = Vec::new();
2184 let mut attrs = attrs.borrow_mut();
2185 for (i, attr) in &mut attrs.iter_mut().enumerate() {
2186 let replace_with = if let Some(new) =
2187 attr_filter.filter(&name.local, &attr.name.local, &attr.value)
2188 {
2189 if *new != *attr.value {
2190 Some(format_tendril!("{}", new))
2191 } else {
2192 None // no need to replace the attr if filter returned the same value
2193 }
2194 } else {
2195 drop_attrs.push(i);
2196 None
2197 };
2198 if let Some(replace_with) = replace_with {
2199 attr.value = replace_with;
2200 }
2201 }
2202 for i in drop_attrs.into_iter().rev() {
2203 attrs.swap_remove(i);
2204 }
2205 }
2206 {
2207 let mut drop_attrs = Vec::new();
2208 let mut attrs = attrs.borrow_mut();
2209 for (i, attr) in attrs.iter_mut().enumerate() {
2210 if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) {
2211 let new_value = self.url_relative.evaluate(&attr.value);
2212 if let Some(new_value) = new_value {
2213 attr.value = new_value;
2214 } else {
2215 drop_attrs.push(i);
2216 }
2217 }
2218 }
2219 // Swap remove scrambles the vector after the current point.
2220 // We will not do anything except with items before the current point.
2221 // The `rev()` is, as such, necessary for correctness.
2222 // We could use regular `remove(usize)` and a forward iterator,
2223 // but that's slower.
2224 for i in drop_attrs.into_iter().rev() {
2225 attrs.swap_remove(i);
2226 }
2227 }
2228 if let Some(allowed_values) = &self.style_properties {
2229 for attr in &mut *attrs.borrow_mut() {
2230 if &attr.name.local == "style" {
2231 attr.value = style::filter_style_attribute(&attr.value, allowed_values).into();
2232 }
2233 }
2234 }
2235 if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
2236 for attr in &mut *attrs.borrow_mut() {
2237 if &attr.name.local == "class" {
2238 let mut classes = vec![];
2239 // https://html.spec.whatwg.org/#global-attributes:classes-2
2240 for class in attr.value.split_ascii_whitespace() {
2241 if allowed_values.contains(class) {
2242 classes.push(class.to_owned());
2243 }
2244 }
2245 attr.value = format_tendril!("{}", classes.join(" "));
2246 }
2247 }
2248 }
2249 }
2250 }
2251
2252 /// Initializes an HTML fragment parser.
2253 ///
2254 /// Ammonia conforms to the HTML5 fragment parsing rules,
2255 /// by parsing the given fragment as if it were included in a <div> tag.
2256 fn make_parser() -> html::Parser<RcDom> {
2257 html::parse_fragment(
2258 RcDom::default(),
2259 html::ParseOpts::default(),
2260 QualName::new(None, ns!(html), local_name!("div")),
2261 vec![],
2262 false,
2263 )
2264 }
2265}
2266
2267/// Given an element name and attribute name, determine if the given attribute contains a URL.
2268fn is_url_attr(element: &str, attr: &str) -> bool {
2269 attr == "href"
2270 || attr == "src"
2271 || (element == "form" && attr == "action")
2272 || (element == "object" && attr == "data")
2273 || ((element == "button" || element == "input") && attr == "formaction")
2274 || (element == "a" && attr == "ping")
2275 || (element == "video" && attr == "poster")
2276}
2277
2278fn is_html_tag(element: &str) -> bool {
2279 (!is_svg_tag(element) && !is_mathml_tag(element))
2280 || matches!(
2281 element,
2282 "title" | "style" | "font" | "a" | "script" | "span"
2283 )
2284}
2285
2286/// Given an element name, check if it's SVG
2287fn is_svg_tag(element: &str) -> bool {
2288 // https://svgwg.org/svg2-draft/eltindex.html
2289 matches!(
2290 element,
2291 "a" | "animate"
2292 | "animateMotion"
2293 | "animateTransform"
2294 | "circle"
2295 | "clipPath"
2296 | "defs"
2297 | "desc"
2298 | "discard"
2299 | "ellipse"
2300 | "feBlend"
2301 | "feColorMatrix"
2302 | "feComponentTransfer"
2303 | "feComposite"
2304 | "feConvolveMatrix"
2305 | "feDiffuseLighting"
2306 | "feDisplacementMap"
2307 | "feDistantLight"
2308 | "feDropShadow"
2309 | "feFlood"
2310 | "feFuncA"
2311 | "feFuncB"
2312 | "feFuncG"
2313 | "feFuncR"
2314 | "feGaussianBlur"
2315 | "feImage"
2316 | "feMerge"
2317 | "feMergeNode"
2318 | "feMorphology"
2319 | "feOffset"
2320 | "fePointLight"
2321 | "feSpecularLighting"
2322 | "feSpotLight"
2323 | "feTile"
2324 | "feTurbulence"
2325 | "filter"
2326 | "foreignObject"
2327 | "g"
2328 | "image"
2329 | "line"
2330 | "linearGradient"
2331 | "marker"
2332 | "mask"
2333 | "metadata"
2334 | "mpath"
2335 | "path"
2336 | "pattern"
2337 | "polygon"
2338 | "polyline"
2339 | "radialGradient"
2340 | "rect"
2341 | "script"
2342 | "set"
2343 | "stop"
2344 | "style"
2345 | "svg"
2346 | "switch"
2347 | "symbol"
2348 | "text"
2349 | "textPath"
2350 | "title"
2351 | "tspan"
2352 | "use"
2353 | "view"
2354 )
2355}
2356
2357/// Given an element name, check if it's Math
2358fn is_mathml_tag(element: &str) -> bool {
2359 // https://svgwg.org/svg2-draft/eltindex.html
2360 matches!(
2361 element,
2362 "abs"
2363 | "and"
2364 | "annotation"
2365 | "annotation-xml"
2366 | "apply"
2367 | "approx"
2368 | "arccos"
2369 | "arccosh"
2370 | "arccot"
2371 | "arccoth"
2372 | "arccsc"
2373 | "arccsch"
2374 | "arcsec"
2375 | "arcsech"
2376 | "arcsin"
2377 | "arcsinh"
2378 | "arctan"
2379 | "arctanh"
2380 | "arg"
2381 | "bind"
2382 | "bvar"
2383 | "card"
2384 | "cartesianproduct"
2385 | "cbytes"
2386 | "ceiling"
2387 | "cerror"
2388 | "ci"
2389 | "cn"
2390 | "codomain"
2391 | "complexes"
2392 | "compose"
2393 | "condition"
2394 | "conjugate"
2395 | "cos"
2396 | "cosh"
2397 | "cot"
2398 | "coth"
2399 | "cs"
2400 | "csc"
2401 | "csch"
2402 | "csymbol"
2403 | "curl"
2404 | "declare"
2405 | "degree"
2406 | "determinant"
2407 | "diff"
2408 | "divergence"
2409 | "divide"
2410 | "domain"
2411 | "domainofapplication"
2412 | "emptyset"
2413 | "eq"
2414 | "equivalent"
2415 | "eulergamma"
2416 | "exists"
2417 | "exp"
2418 | "exponentiale"
2419 | "factorial"
2420 | "factorof"
2421 | "false"
2422 | "floor"
2423 | "fn"
2424 | "forall"
2425 | "gcd"
2426 | "geq"
2427 | "grad"
2428 | "gt"
2429 | "ident"
2430 | "image"
2431 | "imaginary"
2432 | "imaginaryi"
2433 | "implies"
2434 | "in"
2435 | "infinity"
2436 | "int"
2437 | "integers"
2438 | "intersect"
2439 | "interval"
2440 | "inverse"
2441 | "lambda"
2442 | "laplacian"
2443 | "lcm"
2444 | "leq"
2445 | "limit"
2446 | "list"
2447 | "ln"
2448 | "log"
2449 | "logbase"
2450 | "lowlimit"
2451 | "lt"
2452 | "maction"
2453 | "maligngroup"
2454 | "malignmark"
2455 | "math"
2456 | "matrix"
2457 | "matrixrow"
2458 | "max"
2459 | "mean"
2460 | "median"
2461 | "menclose"
2462 | "merror"
2463 | "mfenced"
2464 | "mfrac"
2465 | "mglyph"
2466 | "mi"
2467 | "min"
2468 | "minus"
2469 | "mlabeledtr"
2470 | "mlongdiv"
2471 | "mmultiscripts"
2472 | "mn"
2473 | "mo"
2474 | "mode"
2475 | "moment"
2476 | "momentabout"
2477 | "mover"
2478 | "mpadded"
2479 | "mphantom"
2480 | "mprescripts"
2481 | "mroot"
2482 | "mrow"
2483 | "ms"
2484 | "mscarries"
2485 | "mscarry"
2486 | "msgroup"
2487 | "msline"
2488 | "mspace"
2489 | "msqrt"
2490 | "msrow"
2491 | "mstack"
2492 | "mstyle"
2493 | "msub"
2494 | "msubsup"
2495 | "msup"
2496 | "mtable"
2497 | "mtd"
2498 | "mtext"
2499 | "mtr"
2500 | "munder"
2501 | "munderover"
2502 | "naturalnumbers"
2503 | "neq"
2504 | "none"
2505 | "not"
2506 | "notanumber"
2507 | "notin"
2508 | "notprsubset"
2509 | "notsubset"
2510 | "or"
2511 | "otherwise"
2512 | "outerproduct"
2513 | "partialdiff"
2514 | "pi"
2515 | "piece"
2516 | "piecewise"
2517 | "plus"
2518 | "power"
2519 | "primes"
2520 | "product"
2521 | "prsubset"
2522 | "quotient"
2523 | "rationals"
2524 | "real"
2525 | "reals"
2526 | "reln"
2527 | "rem"
2528 | "root"
2529 | "scalarproduct"
2530 | "sdev"
2531 | "sec"
2532 | "sech"
2533 | "selector"
2534 | "semantics"
2535 | "sep"
2536 | "set"
2537 | "setdiff"
2538 | "share"
2539 | "sin"
2540 | "sinh"
2541 | "span"
2542 | "subset"
2543 | "sum"
2544 | "tan"
2545 | "tanh"
2546 | "tendsto"
2547 | "times"
2548 | "transpose"
2549 | "true"
2550 | "union"
2551 | "uplimit"
2552 | "variance"
2553 | "vector"
2554 | "vectorproduct"
2555 | "xor"
2556 )
2557}
2558
2559fn is_url_relative(url: &str) -> bool {
2560 matches!(
2561 Url::parse(url),
2562 Err(url::ParseError::RelativeUrlWithoutBase)
2563 )
2564}
2565
2566/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
2567///
2568/// This policy kicks in, if set, for any attribute named `src` or `href`,
2569/// as well as the `data` attribute of an `object` tag.
2570///
2571/// [relative URLs]: struct.Builder.html#method.url_relative
2572///
2573/// # Examples
2574///
2575/// ## `Deny`
2576///
2577/// * `<a href="test">` is a file-relative URL, and will be removed
2578/// * `<a href="/test">` is a domain-relative URL, and will be removed
2579/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed
2580/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept
2581///
2582/// ## `PassThrough`
2583///
2584/// No changes will be made to any URLs, except if a disallowed scheme is used.
2585///
2586/// ## `RewriteWithBase`
2587///
2588/// If the base is set to `http://notriddle.com/some-directory/some-file`
2589///
2590/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">`
2591/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">`
2592/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">`
2593/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is
2594///
2595/// ## `Custom`
2596///
2597/// Pass the relative URL to a function.
2598/// If it returns `Some(string)`, then that one gets used.
2599/// Otherwise, it will remove the attribute (like `Deny` does).
2600///
2601/// use std::borrow::Cow;
2602/// fn is_absolute_path(url: &str) -> bool {
2603/// let u = url.as_bytes();
2604/// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
2605/// // `/a/b/c` is an absolute path, and what we want to do stuff to.
2606/// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
2607/// }
2608/// fn evaluate(url: &str) -> Option<Cow<str>> {
2609/// if is_absolute_path(url) {
2610/// Some(Cow::Owned(String::from("/root") + url))
2611/// } else {
2612/// Some(Cow::Borrowed(url))
2613/// }
2614/// }
2615/// fn main() {
2616/// let a = ammonia::Builder::new()
2617/// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
2618/// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
2619/// .to_string();
2620/// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
2621/// }
2622///
2623/// This function is only applied to relative URLs.
2624/// To filter all of the URLs,
2625/// use the not-yet-implemented Content Security Policy.
2626#[non_exhaustive]
2627pub enum UrlRelative<'a> {
2628 /// Relative URLs will be completely stripped from the document.
2629 Deny,
2630 /// Relative URLs will be passed through unchanged.
2631 PassThrough,
2632 /// Relative URLs will be changed into absolute URLs, based on this base URL.
2633 RewriteWithBase(Url),
2634 /// Force absolute and relative paths into a particular directory.
2635 ///
2636 /// Since the resolver does not affect fully-qualified URLs, it doesn't
2637 /// prevent users from linking wherever they want. This feature only
2638 /// serves to make content more portable.
2639 ///
2640 /// # Examples
2641 ///
2642 /// <table>
2643 /// <thead>
2644 /// <tr>
2645 /// <th>root</th>
2646 /// <th>path</th>
2647 /// <th>url</th>
2648 /// <th>result</th>
2649 /// </tr>
2650 /// </thead>
2651 /// <tbody>
2652 /// <tr>
2653 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2654 /// <td>README.md</td>
2655 /// <td></td>
2656 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2657 /// </tr><tr>
2658 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2659 /// <td>README.md</td>
2660 /// <td>/</td>
2661 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2662 /// </tr><tr>
2663 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2664 /// <td>README.md</td>
2665 /// <td>/CONTRIBUTING.md</td>
2666 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2667 /// </tr><tr>
2668 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2669 /// <td>README.md</td>
2670 /// <td></td>
2671 /// <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td>
2672 /// </tr><tr>
2673 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2674 /// <td>README.md</td>
2675 /// <td>/</td>
2676 /// <td>https://github.com/rust-ammonia/ammonia/blob/</td>
2677 /// </tr><tr>
2678 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2679 /// <td>README.md</td>
2680 /// <td>/CONTRIBUTING.md</td>
2681 /// <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td>
2682 /// </tr><tr>
2683 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2684 /// <td></td>
2685 /// <td></td>
2686 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2687 /// </tr><tr>
2688 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2689 /// <td></td>
2690 /// <td>/</td>
2691 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2692 /// </tr><tr>
2693 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2694 /// <td></td>
2695 /// <td>/CONTRIBUTING.md</td>
2696 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2697 /// </tr><tr>
2698 /// <td>https://github.com/</td>
2699 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2700 /// <td></td>
2701 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2702 /// </tr><tr>
2703 /// <td>https://github.com/</td>
2704 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2705 /// <td>/</td>
2706 /// <td>https://github.com/</td>
2707 /// </tr><tr>
2708 /// <td>https://github.com/</td>
2709 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2710 /// <td>CONTRIBUTING.md</td>
2711 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2712 /// </tr><tr>
2713 /// <td>https://github.com/</td>
2714 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2715 /// <td>/CONTRIBUTING.md</td>
2716 /// <td>https://github.com/CONTRIBUTING.md</td>
2717 /// </tr>
2718 /// </tbody>
2719 /// </table>
2720 RewriteWithRoot {
2721 /// The URL that is treated as the root by the resolver.
2722 root: Url,
2723 /// The "current path" used to resolve relative paths.
2724 path: String,
2725 },
2726 /// Rewrite URLs with a custom function.
2727 Custom(Box<dyn UrlRelativeEvaluate<'a>>),
2728}
2729
2730impl<'a> UrlRelative<'a> {
2731 fn evaluate(&self, url: &str) -> Option<html5ever::tendril::StrTendril> {
2732 match self {
2733 UrlRelative::RewriteWithBase(ref url_base) => url_base
2734 .join(url)
2735 .ok()
2736 .and_then(|x| StrTendril::from_str(x.as_str()).ok()),
2737 UrlRelative::RewriteWithRoot { ref root, ref path } => {
2738 (match url.as_bytes() {
2739 // Scheme-relative URL
2740 [b'/', b'/', ..] => root.join(url),
2741 // Path-absolute URL
2742 b"/" => root.join("."),
2743 [b'/', ..] => root.join(&url[1..]),
2744 // Path-relative URL
2745 _ => root.join(path).and_then(|r| r.join(url)),
2746 })
2747 .ok()
2748 .and_then(|x| StrTendril::from_str(x.as_str()).ok())
2749 }
2750 UrlRelative::Custom(ref evaluate) => evaluate
2751 .evaluate(url)
2752 .as_ref()
2753 .map(Cow::as_ref)
2754 .map(StrTendril::from_str)
2755 .and_then(Result::ok),
2756 UrlRelative::PassThrough => StrTendril::from_str(url).ok(),
2757 UrlRelative::Deny => None,
2758 }
2759 }
2760}
2761
2762impl<'a> fmt::Debug for UrlRelative<'a> {
2763 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2764 match *self {
2765 UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
2766 UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
2767 UrlRelative::RewriteWithBase(ref base) => {
2768 write!(f, "UrlRelative::RewriteWithBase({})", base)
2769 }
2770 UrlRelative::RewriteWithRoot { ref root, ref path } => {
2771 write!(
2772 f,
2773 "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}"
2774 )
2775 }
2776 UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
2777 }
2778 }
2779}
2780
2781/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
2782///
2783/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
2784///
2785/// See [`url_relative`][url_relative] for more details.
2786///
2787/// [url_relative]: struct.Builder.html#method.url_relative
2788pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a {
2789 /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2790 fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>;
2791}
2792impl<'a, T> UrlRelativeEvaluate<'a> for T
2793where
2794 T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a,
2795{
2796 fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> {
2797 self(url)
2798 }
2799}
2800
2801impl fmt::Debug for dyn AttributeFilter {
2802 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2803 f.write_str("AttributeFilter")
2804 }
2805}
2806
2807/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
2808///
2809/// See [`attribute_filter`][attribute_filter] for more details.
2810///
2811/// [attribute_filter]: struct.Builder.html#method.attribute_filter
2812pub trait AttributeFilter: Send + Sync {
2813 /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2814 fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
2815}
2816
2817impl<T> AttributeFilter for T
2818where
2819 T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
2820{
2821 fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
2822 self(element, attribute, value)
2823 }
2824}
2825
2826/// A sanitized HTML document.
2827///
2828/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
2829/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
2830/// users to avoid buffering the serialized representation to a [`String`] when desired.
2831///
2832/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
2833///
2834/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
2835/// the complete fragment needs to be stored in memory during processing.
2836///
2837/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
2838/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2839///
2840/// # Examples
2841///
2842/// use ammonia::Builder;
2843///
2844/// let input = "<!-- comments will be stripped -->This is an Ammonia example.";
2845/// let output = "This is an Ammonia example.";
2846///
2847/// let document = Builder::new()
2848/// .clean(input);
2849/// assert_eq!(document.to_string(), output);
2850pub struct Document(RcDom);
2851
2852impl Document {
2853 /// Serializes a `Document` instance to a writer.
2854 ///
2855 /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
2856 ///
2857 /// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
2858 ///
2859 /// Note that the in-memory representation of `Document` is larger than the serialized
2860 /// `String`.
2861 ///
2862 /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2863 ///
2864 /// # Examples
2865 ///
2866 /// use ammonia::Builder;
2867 ///
2868 /// let input = "Some <style></style>HTML here";
2869 /// let expected = b"Some HTML here";
2870 ///
2871 /// let document = Builder::new()
2872 /// .clean(input);
2873 ///
2874 /// let mut sanitized = Vec::new();
2875 /// document.write_to(&mut sanitized)
2876 /// .expect("Writing to a string should not fail (except on OOM)");
2877 /// assert_eq!(sanitized, expected);
2878 pub fn write_to<W>(&self, writer: W) -> io::Result<()>
2879 where
2880 W: io::Write,
2881 {
2882 let opts = Self::serialize_opts();
2883 let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2884 serialize(writer, &inner, opts)
2885 }
2886
2887 /// Exposes the `Document` instance as an [`rcdom::Handle`].
2888 ///
2889 /// This method returns the inner object backing the `Document` instance. This allows
2890 /// making further changes to the DOM without introducing redundant serialization and
2891 /// parsing.
2892 ///
2893 /// Note that this method should be considered unstable and sits outside of the semver
2894 /// stability guarantees. It may change, break, or go away at any time, either because
2895 /// of `html5ever` changes or `ammonia` implementation changes.
2896 ///
2897 /// For this method to be accessible, a `cfg` flag is required. The easiest way is to
2898 /// use the `RUSTFLAGS` environment variable:
2899 ///
2900 /// ```text
2901 /// RUSTFLAGS='--cfg ammonia_unstable' cargo build
2902 /// ```
2903 ///
2904 /// on Unix-like platforms, or
2905 ///
2906 /// ```text
2907 /// set RUSTFLAGS=--cfg ammonia_unstable
2908 /// cargo build
2909 /// ```
2910 ///
2911 /// on Windows.
2912 ///
2913 /// This requirement also applies to crates that transitively depend on crates that use
2914 /// this flag.
2915 ///
2916 /// # Examples
2917 ///
2918 /// use ammonia::Builder;
2919 /// use ammonia::rcdom::SerializableHandle;
2920 /// use maplit::hashset;
2921 /// use html5ever::serialize::{serialize, SerializeOpts};
2922 ///
2923 /// # use std::error::Error;
2924 /// # fn do_main() -> Result<(), Box<dyn Error>> {
2925 /// let input = "<a>one link</a> and <a>one more</a>";
2926 /// let expected = "<a>one more</a> and <a>one link</a>";
2927 ///
2928 /// let document = Builder::new()
2929 /// .link_rel(None)
2930 /// .clean(input);
2931 ///
2932 /// let node = document.to_dom_node();
2933 /// node.children.borrow_mut().reverse();
2934 ///
2935 /// let mut buf = Vec::new();
2936 /// let handle: SerializableHandle = node.into();
2937 /// serialize(&mut buf, &handle, SerializeOpts::default())?;
2938 /// let output = String::from_utf8(buf)?;
2939 ///
2940 /// assert_eq!(output, expected);
2941 /// # Ok(())
2942 /// # }
2943 /// # fn main() { do_main().unwrap() }
2944 #[cfg(ammonia_unstable)]
2945 pub fn to_dom_node(&self) -> Handle {
2946 self.0.document.children.borrow()[0].clone()
2947 }
2948
2949 fn serialize_opts() -> SerializeOpts {
2950 SerializeOpts::default()
2951 }
2952}
2953
2954impl Clone for Document {
2955 fn clone(&self) -> Self {
2956 let parser = Builder::make_parser();
2957 let dom = parser.one(&self.to_string()[..]);
2958 Document(dom)
2959 }
2960}
2961
2962/// Convert a `Document` to stringified HTML.
2963///
2964/// Since [`Document`] implements [`Display`], it can be converted to a [`String`] using the
2965/// standard [`ToString::to_string`] method. This is the simplest way to use `ammonia`.
2966///
2967/// [`Document`]: ammonia::Document
2968/// [`Display`]: std::fmt::Display
2969/// [`ToString::to_string`]: std::string::ToString
2970///
2971/// # Examples
2972///
2973/// use ammonia::Builder;
2974///
2975/// let input = "Some <style></style>HTML here";
2976/// let output = "Some HTML here";
2977///
2978/// let document = Builder::new()
2979/// .clean(input);
2980/// assert_eq!(document.to_string(), output);
2981impl Display for Document {
2982 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2983 let opts = Self::serialize_opts();
2984 let mut ret_val = Vec::new();
2985 let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2986 serialize(&mut ret_val, &inner, opts)
2987 .expect("Writing to a string shouldn't fail (expect on OOM)");
2988 String::from_utf8(ret_val)
2989 .expect("html5ever only supports UTF8")
2990 .fmt(f)
2991 }
2992}
2993
2994impl fmt::Debug for Document {
2995 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2996 write!(f, "Document({})", self)
2997 }
2998}
2999
3000impl From<Document> for String {
3001 fn from(document: Document) -> Self {
3002 document.to_string()
3003 }
3004}
3005
3006#[cfg(test)]
3007mod test {
3008 use super::*;
3009 #[test]
3010 fn deeply_nested_whitelisted_does_not_cause_stack_overflow() {
3011 clean(&"<b>".repeat(60_000));
3012 }
3013 #[test]
3014 fn deeply_nested_blacklisted_does_not_cause_stack_overflow() {
3015 clean(&"<b-b>".repeat(60_000));
3016 }
3017 #[test]
3018 fn deeply_nested_alternating_does_not_cause_stack_overflow() {
3019 clean(&"<b-b>".repeat(35_000));
3020 }
3021 #[test]
3022 fn document_level_tags_cannot_be_whitelisted() {
3023 // Adding `html`, `head`, or `body` to the allowed tags has no effect
3024 // because the parser runs in fragment mode and strips them before
3025 // the sanitizer sees the tree. This test pins that documented
3026 // behavior; if it ever changes, the docs on `Builder::tags` need to
3027 // change too.
3028 let fragment =
3029 "<html><head>head content</head><body><div>test</div></body></html>";
3030 let result = Builder::default()
3031 .add_tags(["html", "head", "body"])
3032 .clean(fragment)
3033 .to_string();
3034 assert_eq!(result, "head content<div>test</div>");
3035 }
3036 #[test]
3037 fn included_angles() {
3038 let fragment = "1 < 2";
3039 let result = clean(fragment);
3040 assert_eq!(result, "1 < 2");
3041 }
3042 #[test]
3043 fn remove_script() {
3044 let fragment = "an <script>evil()</script> example";
3045 let result = clean(fragment);
3046 assert_eq!(result, "an example");
3047 }
3048 #[test]
3049 fn ignore_link() {
3050 let fragment = "a <a href=\"http://www.google.com\">good</a> example";
3051 let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
3052 good</a> example";
3053 let result = clean(fragment);
3054 assert_eq!(result, expected);
3055 }
3056 #[test]
3057 fn remove_unsafe_link() {
3058 let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
3059 let result = clean(fragment);
3060 assert_eq!(
3061 result,
3062 "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
3063 );
3064 }
3065 #[test]
3066 fn remove_js_link() {
3067 let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
3068 let result = clean(fragment);
3069 assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
3070 }
3071 #[test]
3072 fn tag_rebalance() {
3073 let fragment = "<b>AWESOME!";
3074 let result = clean(fragment);
3075 assert_eq!(result, "<b>AWESOME!</b>");
3076 }
3077 #[test]
3078 fn allow_url_relative() {
3079 let fragment = "<a href=test>Test</a>";
3080 let result = Builder::new()
3081 .url_relative(UrlRelative::PassThrough)
3082 .clean(fragment)
3083 .to_string();
3084 assert_eq!(
3085 result,
3086 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3087 );
3088 }
3089 #[test]
3090 fn rewrite_url_relative() {
3091 let fragment = "<a href=test>Test</a>";
3092 let result = Builder::new()
3093 .url_relative(UrlRelative::RewriteWithBase(
3094 Url::parse("http://example.com/").unwrap(),
3095 ))
3096 .clean(fragment)
3097 .to_string();
3098 assert_eq!(
3099 result,
3100 "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
3101 );
3102 }
3103 #[test]
3104 fn rewrite_url_relative_with_invalid_url() {
3105 // Reduced from https://github.com/Bauke/ammonia-crash-test
3106 let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
3107 let result = Builder::new()
3108 .url_relative(UrlRelative::RewriteWithBase(
3109 Url::parse("http://example.com/").unwrap(),
3110 ))
3111 .clean(fragment)
3112 .to_string();
3113 assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
3114 }
3115 #[test]
3116 fn attribute_filter_nop() {
3117 let fragment = "<a href=test>Test</a>";
3118 let result = Builder::new()
3119 .attribute_filter(|elem, attr, value| {
3120 assert_eq!("a", elem);
3121 assert!(
3122 matches!(
3123 (attr, value),
3124 ("href", "test") | ("rel", "noopener noreferrer")
3125 ),
3126 "{}",
3127 value.to_string()
3128 );
3129 Some(value.into())
3130 })
3131 .clean(fragment)
3132 .to_string();
3133 assert_eq!(
3134 result,
3135 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3136 );
3137 }
3138
3139 #[test]
3140 fn attribute_filter_drop() {
3141 let fragment = "Test<img alt=test src=imgtest>";
3142 let result = Builder::new()
3143 .attribute_filter(|elem, attr, value| {
3144 assert_eq!("img", elem);
3145 match (attr, value) {
3146 ("src", "imgtest") => None,
3147 ("alt", "test") => Some(value.into()),
3148 _ => panic!("unexpected"),
3149 }
3150 })
3151 .clean(fragment)
3152 .to_string();
3153 assert_eq!(result, r#"Test<img alt="test">"#);
3154 }
3155
3156 #[test]
3157 fn url_filter_absolute() {
3158 let fragment = "Test<img alt=test src=imgtest>";
3159 let result = Builder::new()
3160 .attribute_filter(|elem, attr, value| {
3161 assert_eq!("img", elem);
3162 match (attr, value) {
3163 ("src", "imgtest") => {
3164 Some(format!("https://example.com/images/{}", value).into())
3165 }
3166 ("alt", "test") => None,
3167 _ => panic!("unexpected"),
3168 }
3169 })
3170 .url_relative(UrlRelative::RewriteWithBase(
3171 Url::parse("http://wrong.invalid/").unwrap(),
3172 ))
3173 .clean(fragment)
3174 .to_string();
3175 assert_eq!(
3176 result,
3177 r#"Test<img src="https://example.com/images/imgtest">"#
3178 );
3179 }
3180
3181 #[test]
3182 fn url_filter_relative() {
3183 let fragment = "Test<img alt=test src=imgtest>";
3184 let result = Builder::new()
3185 .attribute_filter(|elem, attr, value| {
3186 assert_eq!("img", elem);
3187 match (attr, value) {
3188 ("src", "imgtest") => Some("rewrite".into()),
3189 ("alt", "test") => Some("altalt".into()),
3190 _ => panic!("unexpected"),
3191 }
3192 })
3193 .url_relative(UrlRelative::RewriteWithBase(
3194 Url::parse("https://example.com/base/#").unwrap(),
3195 ))
3196 .clean(fragment)
3197 .to_string();
3198 assert_eq!(
3199 result,
3200 r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
3201 );
3202 }
3203
3204 #[test]
3205 fn rewrite_url_relative_no_rel() {
3206 let fragment = "<a href=test>Test</a>";
3207 let result = Builder::new()
3208 .url_relative(UrlRelative::RewriteWithBase(
3209 Url::parse("http://example.com/").unwrap(),
3210 ))
3211 .link_rel(None)
3212 .clean(fragment)
3213 .to_string();
3214 assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
3215 }
3216 #[test]
3217 fn deny_url_relative() {
3218 let fragment = "<a href=test>Test</a>";
3219 let result = Builder::new()
3220 .url_relative(UrlRelative::Deny)
3221 .clean(fragment)
3222 .to_string();
3223 assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
3224 }
3225 #[test]
3226 fn replace_rel() {
3227 let fragment = "<a href=test rel=\"garbage\">Test</a>";
3228 let result = Builder::new()
3229 .url_relative(UrlRelative::PassThrough)
3230 .clean(fragment)
3231 .to_string();
3232 assert_eq!(
3233 result,
3234 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3235 );
3236 }
3237 #[test]
3238 fn consider_rel_still_banned() {
3239 let fragment = "<a href=test rel=\"garbage\">Test</a>";
3240 let result = Builder::new()
3241 .url_relative(UrlRelative::PassThrough)
3242 .link_rel(None)
3243 .clean(fragment)
3244 .to_string();
3245 assert_eq!(result, "<a href=\"test\">Test</a>");
3246 }
3247 #[test]
3248 fn object_data() {
3249 let fragment = "<span data=\"javascript:evil()\">Test</span>\
3250 <object data=\"javascript:evil()\"></object>M";
3251 let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
3252 let result = Builder::new()
3253 .tags(hashset!["span", "object"])
3254 .generic_attributes(hashset!["data"])
3255 .clean(fragment)
3256 .to_string();
3257 assert_eq!(result, expected);
3258 }
3259 #[test]
3260 fn remove_attributes() {
3261 let fragment = "<table border=\"1\"><tr></tr></table>";
3262 let result = Builder::new().clean(fragment);
3263 assert_eq!(
3264 result.to_string(),
3265 "<table><tbody><tr></tr></tbody></table>"
3266 );
3267 }
3268 #[test]
3269 fn quotes_in_attrs() {
3270 let fragment = "<b title='\"'>contents</b>";
3271 let result = clean(fragment);
3272 assert_eq!(result, "<b title=\""\">contents</b>");
3273 }
3274 #[test]
3275 #[should_panic]
3276 fn panic_if_rel_is_allowed_and_replaced_generic() {
3277 Builder::new()
3278 .link_rel(Some("noopener noreferrer"))
3279 .generic_attributes(hashset!["rel"])
3280 .clean("something");
3281 }
3282 #[test]
3283 #[should_panic]
3284 fn panic_if_rel_is_allowed_and_replaced_a() {
3285 Builder::new()
3286 .link_rel(Some("noopener noreferrer"))
3287 .tag_attributes(hashmap![
3288 "a" => hashset!["rel"],
3289 ])
3290 .clean("something");
3291 }
3292 #[test]
3293 fn no_panic_if_rel_is_allowed_and_replaced_span() {
3294 Builder::new()
3295 .link_rel(Some("noopener noreferrer"))
3296 .tag_attributes(hashmap![
3297 "span" => hashset!["rel"],
3298 ])
3299 .clean("<span rel=\"what\">s</span>");
3300 }
3301 #[test]
3302 fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
3303 Builder::new()
3304 .link_rel(None)
3305 .generic_attributes(hashset!["rel"])
3306 .clean("<a rel=\"what\">s</a>");
3307 }
3308 #[test]
3309 fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
3310 Builder::new()
3311 .link_rel(None)
3312 .tag_attributes(hashmap![
3313 "a" => hashset!["rel"],
3314 ])
3315 .clean("<a rel=\"what\">s</a>");
3316 }
3317 #[test]
3318 fn dont_close_void_elements() {
3319 let fragment = "<br>";
3320 let result = clean(fragment);
3321 assert_eq!(result.to_string(), "<br>");
3322 }
3323 #[should_panic]
3324 #[test]
3325 fn panic_on_allowed_classes_tag_attributes() {
3326 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3327 Builder::new()
3328 .link_rel(None)
3329 .tag_attributes(hashmap![
3330 "p" => hashset!["class"],
3331 "a" => hashset!["class"],
3332 ])
3333 .allowed_classes(hashmap![
3334 "p" => hashset!["foo", "bar"],
3335 "a" => hashset!["baz"],
3336 ])
3337 .clean(fragment);
3338 }
3339 #[should_panic]
3340 #[test]
3341 fn panic_on_allowed_classes_generic_attributes() {
3342 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3343 Builder::new()
3344 .link_rel(None)
3345 .generic_attributes(hashset!["class", "href", "some-foo"])
3346 .allowed_classes(hashmap![
3347 "p" => hashset!["foo", "bar"],
3348 "a" => hashset!["baz"],
3349 ])
3350 .clean(fragment);
3351 }
3352 #[test]
3353 fn remove_non_allowed_classes() {
3354 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3355 let result = Builder::new()
3356 .link_rel(None)
3357 .allowed_classes(hashmap![
3358 "p" => hashset!["foo", "bar"],
3359 "a" => hashset!["baz"],
3360 ])
3361 .clean(fragment);
3362 assert_eq!(
3363 result.to_string(),
3364 "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3365 );
3366 }
3367 #[test]
3368 fn remove_non_allowed_classes_with_tag_class() {
3369 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3370 let result = Builder::new()
3371 .link_rel(None)
3372 .tag_attributes(hashmap![
3373 "div" => hashset!["class"],
3374 ])
3375 .allowed_classes(hashmap![
3376 "p" => hashset!["foo", "bar"],
3377 "a" => hashset!["baz"],
3378 ])
3379 .clean(fragment);
3380 assert_eq!(
3381 result.to_string(),
3382 "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3383 );
3384 }
3385 #[test]
3386 fn allowed_classes_ascii_whitespace() {
3387 // According to https://infra.spec.whatwg.org/#ascii-whitespace,
3388 // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are
3389 // considered to be ASCII whitespace. Unicode whitespace characters
3390 // and VT (\x0B) aren't ASCII whitespace.
3391 let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
3392 let result = Builder::new()
3393 .allowed_classes(hashmap![
3394 "p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
3395 ])
3396 .clean(fragment);
3397 assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
3398 }
3399 #[test]
3400 fn remove_non_allowed_attributes_with_tag_attribute_values() {
3401 let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
3402 let result = Builder::new()
3403 .tag_attribute_values(hashmap![
3404 "p" => hashmap![
3405 "data-label" => hashset!["bar"],
3406 ],
3407 ])
3408 .tag_attributes(hashmap![
3409 "p" => hashset!["name"],
3410 ])
3411 .clean(fragment);
3412 assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
3413 }
3414 #[test]
3415 fn keep_allowed_attributes_with_tag_attribute_values() {
3416 let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
3417 let result = Builder::new()
3418 .tag_attribute_values(hashmap![
3419 "p" => hashmap![
3420 "data-label" => hashset!["bar"],
3421 ],
3422 ])
3423 .tag_attributes(hashmap![
3424 "p" => hashset!["name"],
3425 ])
3426 .clean(fragment);
3427 assert_eq!(
3428 result.to_string(),
3429 "<p data-label=\"bar\" name=\"foo\"></p>",
3430 );
3431 }
3432 #[test]
3433 fn tag_attribute_values_case_insensitive() {
3434 let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
3435 let result = Builder::new()
3436 .tags(hashset!["input"])
3437 .tag_attribute_values(hashmap![
3438 "input" => hashmap![
3439 "type" => hashset!["checkbox"],
3440 ],
3441 ])
3442 .tag_attributes(hashmap![
3443 "input" => hashset!["name"],
3444 ])
3445 .clean(fragment);
3446 assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
3447 }
3448 #[test]
3449 fn set_tag_attribute_values() {
3450 let fragment = "<a href=\"https://example.com/\">Link</a>";
3451 let result = Builder::new()
3452 .link_rel(None)
3453 .add_tag_attributes("a", &["target"])
3454 .set_tag_attribute_value("a", "target", "_blank")
3455 .clean(fragment);
3456 assert_eq!(
3457 result.to_string(),
3458 "<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
3459 );
3460 }
3461 #[test]
3462 fn update_existing_set_tag_attribute_values() {
3463 let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
3464 let result = Builder::new()
3465 .link_rel(None)
3466 .add_tag_attributes("a", &["target"])
3467 .set_tag_attribute_value("a", "target", "_blank")
3468 .clean(fragment);
3469 assert_eq!(
3470 result.to_string(),
3471 "<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
3472 );
3473 }
3474 #[test]
3475 fn unwhitelisted_set_tag_attribute_values() {
3476 let fragment = "<span>hi</span><my-elem>";
3477 let result = Builder::new()
3478 .set_tag_attribute_value("my-elem", "my-attr", "val")
3479 .clean(fragment);
3480 assert_eq!(result.to_string(), "<span>hi</span>",);
3481 }
3482 #[test]
3483 fn remove_entity_link() {
3484 let fragment = "<a href=\"javascript:a\
3485 lert('XSS')\">Click me!</a>";
3486 let result = clean(fragment);
3487 assert_eq!(
3488 result.to_string(),
3489 "<a rel=\"noopener noreferrer\">Click me!</a>"
3490 );
3491 }
3492 #[test]
3493 fn remove_relative_url_evaluate() {
3494 fn is_absolute_path(url: &str) -> bool {
3495 let u = url.as_bytes();
3496 // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3497 // `/a/b/c` is an absolute path, and what we want to do stuff to.
3498 u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3499 }
3500 fn is_banned(url: &str) -> bool {
3501 let u = url.as_bytes();
3502 u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3503 }
3504 fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3505 if is_absolute_path(url) {
3506 Some(Cow::Owned(String::from("/root") + url))
3507 } else if is_banned(url) {
3508 None
3509 } else {
3510 Some(Cow::Borrowed(url))
3511 }
3512 }
3513 let a = Builder::new()
3514 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3515 .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
3516 .to_string();
3517 assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
3518 }
3519 #[test]
3520 fn remove_relative_url_evaluate_b() {
3521 fn is_absolute_path(url: &str) -> bool {
3522 let u = url.as_bytes();
3523 // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3524 // `/a/b/c` is an absolute path, and what we want to do stuff to.
3525 u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3526 }
3527 fn is_banned(url: &str) -> bool {
3528 let u = url.as_bytes();
3529 u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3530 }
3531 fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3532 if is_absolute_path(url) {
3533 Some(Cow::Owned(String::from("/root") + url))
3534 } else if is_banned(url) {
3535 None
3536 } else {
3537 Some(Cow::Borrowed(url))
3538 }
3539 }
3540 let a = Builder::new()
3541 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3542 .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
3543 .to_string();
3544 assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
3545 }
3546 #[test]
3547 fn remove_relative_url_evaluate_c() {
3548 // Don't run on absolute URLs.
3549 fn evaluate(_: &str) -> Option<Cow<'_, str>> {
3550 return Some(Cow::Owned(String::from("invalid")));
3551 }
3552 let a = Builder::new()
3553 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3554 .clean("<a href=\"https://www.google.com/\">google</a>")
3555 .to_string();
3556 assert_eq!(
3557 a,
3558 "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
3559 );
3560 }
3561 #[test]
3562 fn clean_children_of_bad_element() {
3563 let fragment = "<bad><evil>a</evil>b</bad>";
3564 let result = Builder::new().clean(fragment);
3565 assert_eq!(result.to_string(), "ab");
3566 }
3567 #[test]
3568 fn reader_input() {
3569 let fragment = b"an <script>evil()</script> example";
3570 let result = Builder::new().clean_from_reader(&fragment[..]);
3571 assert!(result.is_ok());
3572 assert_eq!(result.unwrap().to_string(), "an example");
3573 }
3574 #[test]
3575 fn reader_non_utf8() {
3576 let fragment = b"non-utf8 \xF0\x90\x80string";
3577 let result = Builder::new().clean_from_reader(&fragment[..]);
3578 assert!(result.is_ok());
3579 assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
3580 }
3581 #[test]
3582 fn display_impl() {
3583 let fragment = r#"a <a>link</a>"#;
3584 let result = Builder::new().link_rel(None).clean(fragment);
3585 assert_eq!(format!("{}", result), "a <a>link</a>");
3586 }
3587 #[test]
3588 fn debug_impl() {
3589 let fragment = r#"a <a>link</a>"#;
3590 let result = Builder::new().link_rel(None).clean(fragment);
3591 assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
3592 }
3593 #[cfg(ammonia_unstable)]
3594 #[test]
3595 fn to_dom_node() {
3596 let fragment = r#"a <a>link</a>"#;
3597 let result = Builder::new().link_rel(None).clean(fragment);
3598 let _node = result.to_dom_node();
3599 }
3600 #[test]
3601 fn string_from_document() {
3602 let fragment = r#"a <a>link"#;
3603 let result = String::from(Builder::new().link_rel(None).clean(fragment));
3604 assert_eq!(format!("{}", result), "a <a>link</a>");
3605 }
3606 fn require_sync<T: Sync>(_: T) {}
3607 fn require_send<T: Send>(_: T) {}
3608 #[test]
3609 fn require_sync_and_send() {
3610 require_sync(Builder::new());
3611 require_send(Builder::new());
3612 }
3613 #[test]
3614 fn id_prefixed() {
3615 let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
3616 let result = String::from(
3617 Builder::new()
3618 .tag_attributes(hashmap![
3619 "a" => hashset!["id"],
3620 ])
3621 .id_prefix(Some("prefix-"))
3622 .clean(fragment),
3623 );
3624 assert_eq!(
3625 result.to_string(),
3626 "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
3627 );
3628 }
3629 #[test]
3630 fn id_already_prefixed() {
3631 let fragment = "<a id=\"prefix-hello\"></a>";
3632 let result = String::from(
3633 Builder::new()
3634 .tag_attributes(hashmap![
3635 "a" => hashset!["id"],
3636 ])
3637 .id_prefix(Some("prefix-"))
3638 .clean(fragment),
3639 );
3640 assert_eq!(
3641 result.to_string(),
3642 "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
3643 );
3644 }
3645 #[test]
3646 fn clean_content_tags() {
3647 let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
3648 let result = String::from(
3649 Builder::new()
3650 .clean_content_tags(hashset!["script"])
3651 .clean(fragment),
3652 );
3653 assert_eq!(result.to_string(), "");
3654 }
3655 #[test]
3656 fn only_clean_content_tags() {
3657 let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3658 let result = String::from(
3659 Builder::new()
3660 .clean_content_tags(hashset!["script"])
3661 .clean(fragment),
3662 );
3663 assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3664 }
3665 #[test]
3666 fn clean_removed_default_tag() {
3667 let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3668 let result = String::from(
3669 Builder::new()
3670 .rm_tags(hashset!["a"])
3671 .rm_tag_attributes("a", hashset!["href", "hreflang"])
3672 .clean_content_tags(hashset!["script"])
3673 .clean(fragment),
3674 );
3675 assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3676 }
3677 #[test]
3678 #[should_panic]
3679 fn panic_on_clean_content_tag_attribute() {
3680 Builder::new()
3681 .rm_tags(std::iter::once("a"))
3682 .clean_content_tags(hashset!["a"])
3683 .clean("");
3684 }
3685 #[test]
3686 #[should_panic]
3687 fn panic_on_clean_content_tag() {
3688 Builder::new().clean_content_tags(hashset!["a"]).clean("");
3689 }
3690
3691 #[test]
3692 fn clean_text_test() {
3693 assert_eq!(
3694 clean_text("<this> is <a test function"),
3695 "<this> is <a test function"
3696 );
3697 }
3698
3699 #[test]
3700 fn clean_text_spaces_test() {
3701 assert_eq!(clean_text("\x09\x0a\x0c\x20"), "	  ");
3702 }
3703
3704 #[test]
3705 fn ns_svg() {
3706 // https://github.com/cure53/DOMPurify/pull/495
3707 let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
3708 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3709 assert_eq!(result.to_string(), "");
3710
3711 let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
3712 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3713 assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3714
3715 let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
3716 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3717 assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3718
3719 let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
3720 let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
3721 assert_eq!(
3722 result.to_string(),
3723 "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
3724 );
3725 }
3726
3727 #[test]
3728 fn ns_svg_2() {
3729 let fragment = "<svg><foreignObject><table><path><xmp><!--</xmp><img title'--><img src=1 onerror=alert(1)>'>";
3730 let result = Builder::default()
3731 .strip_comments(false)
3732 .add_tags(&["svg","foreignObject","table","path","xmp"])
3733 .clean(fragment);
3734 assert_eq!(
3735 result.to_string(),
3736 "<svg><foreignObject><table></table></foreignObject></svg>"
3737 );
3738 }
3739
3740 #[test]
3741 fn ns_mathml() {
3742 // https://github.com/cure53/DOMPurify/pull/495
3743 let fragment = "<mglyph></mglyph>";
3744 let result = String::from(
3745 Builder::new()
3746 .add_tags(&["math", "mtext", "mglyph"])
3747 .clean(fragment),
3748 );
3749 assert_eq!(result.to_string(), "");
3750 let fragment = "<math><mtext><div><mglyph>";
3751 let result = String::from(
3752 Builder::new()
3753 .add_tags(&["math", "mtext", "mglyph"])
3754 .clean(fragment),
3755 );
3756 assert_eq!(
3757 result.to_string(),
3758 "<math><mtext><div></div></mtext></math>"
3759 );
3760 let fragment = "<math><mtext><mglyph>";
3761 let result = String::from(
3762 Builder::new()
3763 .add_tags(&["math", "mtext", "mglyph"])
3764 .clean(fragment),
3765 );
3766 assert_eq!(
3767 result.to_string(),
3768 "<math><mtext><mglyph></mglyph></mtext></math>"
3769 );
3770 }
3771
3772 #[test]
3773 fn ns_mathml_2() {
3774 let fragment = "<math><mtext><table><mglyph><xmp><!--</xmp><img title='--><img src=1 onerror=alert(1)>'>";
3775 let result = Builder::default()
3776 .strip_comments(false)
3777 .add_tags(&["math","mtext","table","mglyph","xmp"])
3778 .clean(fragment);
3779 assert_eq!(
3780 result.to_string(),
3781 "<math><mtext><table></table></mtext></math>"
3782 );
3783 }
3784
3785 #[test]
3786 fn ns_mathml_3() {
3787 // try without the attr
3788 let fragment = "<math><annotation-xml encoding='text/html'><xmp><!--</xmp><img title='--><img src=1 onerror=alert(1)>'>";
3789 let result = Builder::default()
3790 .strip_comments(false)
3791 .add_tags(&["math","annotation-xml","table","mglyph","xmp"])
3792 .clean(fragment);
3793 assert_eq!(
3794 result.to_string(),
3795 "<math><annotation-xml></annotation-xml></math>"
3796 );
3797 // now with the attr
3798 let fragment = "<math><annotation-xml encoding='text/html'><xmp><!--</xmp><img title='--><img src=1 onerror=alert(1)>'>";
3799 let result = Builder::default()
3800 .strip_comments(false)
3801 .add_tags(&["math","annotation-xml","table","mglyph","xmp"])
3802 .add_tag_attribute_values("annotation-xml", "encoding", ["text/html"])
3803 .clean(fragment);
3804 assert_eq!(
3805 result.to_string(),
3806 // yes, I tried it in Firefox, and the script didn't run
3807 r#"<math><annotation-xml encoding="text/html"><xmp><!--</xmp><img title="--><img src=1 onerror=alert(1)>"></annotation-xml></math>"#
3808 );
3809 // now with a tweaked attr
3810 let fragment = "<math><annotation-xml encoding='image/svg+xml'><xmp><!--</xmp><img title='--><img src=1 onerror=alert(1)>'>";
3811 let result = Builder::default()
3812 .strip_comments(false)
3813 .add_tags(&["math","annotation-xml","table","mglyph","xmp"])
3814 .add_tag_attribute_values("annotation-xml", "encoding", ["image/svg+xml"])
3815 .clean(fragment);
3816 assert_eq!(
3817 result.to_string(),
3818 // yes, I tried it in Firefox, and the script didn't run
3819 r#"<math><annotation-xml encoding="image/svg+xml"></annotation-xml></math>"#
3820 );
3821 // now with actual SVG
3822 let fragment = "<math><annotation-xml encoding='image/svg+xml'><svg>";
3823 let result = Builder::default()
3824 .strip_comments(false)
3825 .add_tags(&["math","annotation-xml","svg"])
3826 .add_tag_attribute_values("annotation-xml", "encoding", ["image/svg+xml"])
3827 .clean(fragment);
3828 assert_eq!(
3829 result.to_string(),
3830 // yes, I tried it in Firefox, and the script didn't run
3831 r#"<math><annotation-xml encoding="image/svg+xml"><svg></svg></annotation-xml></math>"#
3832 );
3833 }
3834
3835
3836 #[test]
3837 fn xml_processing_instruction() {
3838 // https://blog.slonser.info/posts/dompurify-node-type-confusion/
3839 let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3840 let result = String::from(Builder::new().clean(fragment));
3841 assert_eq!(result.to_string(), "");
3842
3843 let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3844 let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3845 assert_eq!(result.to_string(), "<svg></svg>");
3846
3847 let fragment = r##"<svg><?xml-stylesheet ><img src=x onerror="alert('Ammonia bypassed!!!')"> ?></svg>"##;
3848 let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3849 assert_eq!(result.to_string(), "<svg></svg><img src=\"x\"> ?>");
3850 }
3851
3852 #[test]
3853 fn generic_attribute_prefixes() {
3854 let prefix_data = ["data-"];
3855 let prefix_code = ["code-"];
3856 let mut b = Builder::new();
3857 let mut hs: HashSet<&'_ str> = HashSet::new();
3858 hs.insert("data-");
3859 assert!(b.generic_attribute_prefixes.is_none());
3860 b.generic_attribute_prefixes(hs);
3861 assert!(b.generic_attribute_prefixes.is_some());
3862 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3863 b.add_generic_attribute_prefixes(&prefix_data);
3864 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3865 b.add_generic_attribute_prefixes(&prefix_code);
3866 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
3867 b.rm_generic_attribute_prefixes(&prefix_code);
3868 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3869 b.rm_generic_attribute_prefixes(&prefix_code);
3870 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3871 b.rm_generic_attribute_prefixes(&prefix_data);
3872 assert!(b.generic_attribute_prefixes.is_none());
3873 }
3874
3875 #[test]
3876 fn selectedcontent() {
3877 // https://github.com/servo/html5ever/issues/712
3878 let fragment1 = r#"<select><selectedcontent></selectedcontent><option>X"#;
3879 let fragment2 = r#"<select><selectedcontent></selectedcontent><option>X</option></select>"#;
3880 let expected = r#"<select><selectedcontent></selectedcontent><option>X</option></select>"#;
3881 assert_eq!(String::from(Builder::new().add_tags(&["select", "selectedcontent", "option"]).clean(fragment1)), expected);
3882 assert_eq!(String::from(Builder::new().add_tags(&["select", "selectedcontent", "option"]).clean(fragment2)), expected);
3883 }
3884
3885 #[test]
3886 fn new_select_parse() {
3887 // https://github.com/whatwg/html/issues/10310#issuecomment-2304377029
3888 let fragment = r#"
3889<select><style></select><img src onerror=xss()></style></select>
3890 "#;
3891 let expected = r#"
3892<select></select>
3893 "#;
3894 assert_eq!(String::from(Builder::new().add_tags(&["select", "new-select"]).clean_content_tags(hashset!["style"]).clean(fragment)), expected);
3895 }
3896
3897 #[test]
3898 fn selectedcontent_not_in_select() {
3899 // https://github.com/whatwg/html/issues/10310#issuecomment-2304377029
3900 let fragment = r#"
3901<selectedcontent>first</selectedcontent>
3902<div><selectedcontent>second</selectedcontent></div>
3903<select><selectedcontent>third</selectedcontent></select>
3904 "#;
3905 let expected = r#"
3906<selectedcontent>first</selectedcontent>
3907<div><selectedcontent>second</selectedcontent></div>
3908<select><selectedcontent></selectedcontent></select>
3909 "#;
3910 assert_eq!(String::from(Builder::new().add_tags(&["select", "selectedcontent"]).clean(fragment)), expected);
3911 }
3912
3913 #[test]
3914 fn generic_attribute_prefixes_clean() {
3915 let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
3916 let result_cleaned = String::from(
3917 Builder::new()
3918 .add_tag_attributes("a", &["data-1"])
3919 .clean(fragment),
3920 );
3921 assert_eq!(
3922 result_cleaned,
3923 r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3924 );
3925 let result_allowed = String::from(
3926 Builder::new()
3927 .add_tag_attributes("a", &["data-1"])
3928 .add_generic_attribute_prefixes(&["data-"])
3929 .clean(fragment),
3930 );
3931 assert_eq!(
3932 result_allowed,
3933 r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3934 );
3935 let result_allowed = String::from(
3936 Builder::new()
3937 .add_tag_attributes("a", &["data-1", "code-1"])
3938 .add_generic_attribute_prefixes(&["data-", "code-"])
3939 .clean(fragment),
3940 );
3941 assert_eq!(
3942 result_allowed,
3943 r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3944 );
3945 }
3946 #[test]
3947 fn lesser_than_isnt_html() {
3948 let fragment = "1 < 2";
3949 assert!(!is_html(fragment));
3950 }
3951 #[test]
3952 fn dense_lesser_than_isnt_html() {
3953 let fragment = "1<2";
3954 assert!(!is_html(fragment));
3955 }
3956 #[test]
3957 fn what_about_number_elements() {
3958 let fragment = "foo<2>bar";
3959 assert!(!is_html(fragment));
3960 }
3961 #[test]
3962 fn turbofish_is_html_sadly() {
3963 let fragment = "Vec::<u8>::new()";
3964 assert!(is_html(fragment));
3965 }
3966 #[test]
3967 fn stop_grinning() {
3968 let fragment = "did you really believe me? <g>";
3969 assert!(is_html(fragment));
3970 }
3971 #[test]
3972 fn dont_be_bold() {
3973 let fragment = "<b>";
3974 assert!(is_html(fragment));
3975 }
3976
3977 #[test]
3978 fn rewrite_with_root() {
3979 let tests = [
3980 (
3981 "https://github.com/rust-ammonia/ammonia/blob/master/",
3982 "README.md",
3983 "",
3984 "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3985 ),
3986 (
3987 "https://github.com/rust-ammonia/ammonia/blob/master/",
3988 "README.md",
3989 "/",
3990 "https://github.com/rust-ammonia/ammonia/blob/master/",
3991 ),
3992 (
3993 "https://github.com/rust-ammonia/ammonia/blob/master/",
3994 "README.md",
3995 "/CONTRIBUTING.md",
3996 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3997 ),
3998 (
3999 "https://github.com/rust-ammonia/ammonia/blob/master",
4000 "README.md",
4001 "",
4002 "https://github.com/rust-ammonia/ammonia/blob/README.md",
4003 ),
4004 (
4005 "https://github.com/rust-ammonia/ammonia/blob/master",
4006 "README.md",
4007 "/",
4008 "https://github.com/rust-ammonia/ammonia/blob/",
4009 ),
4010 (
4011 "https://github.com/rust-ammonia/ammonia/blob/master",
4012 "README.md",
4013 "/CONTRIBUTING.md",
4014 "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md",
4015 ),
4016 (
4017 "https://github.com/rust-ammonia/ammonia/blob/master/",
4018 "",
4019 "",
4020 "https://github.com/rust-ammonia/ammonia/blob/master/",
4021 ),
4022 (
4023 "https://github.com/rust-ammonia/ammonia/blob/master/",
4024 "",
4025 "/",
4026 "https://github.com/rust-ammonia/ammonia/blob/master/",
4027 ),
4028 (
4029 "https://github.com/rust-ammonia/ammonia/blob/master/",
4030 "",
4031 "/CONTRIBUTING.md",
4032 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
4033 ),
4034 (
4035 "https://github.com/",
4036 "rust-ammonia/ammonia/blob/master/README.md",
4037 "",
4038 "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
4039 ),
4040 (
4041 "https://github.com/",
4042 "rust-ammonia/ammonia/blob/master/README.md",
4043 "/",
4044 "https://github.com/",
4045 ),
4046 (
4047 "https://github.com/",
4048 "rust-ammonia/ammonia/blob/master/README.md",
4049 "CONTRIBUTING.md",
4050 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
4051 ),
4052 (
4053 "https://github.com/",
4054 "rust-ammonia/ammonia/blob/master/README.md",
4055 "/CONTRIBUTING.md",
4056 "https://github.com/CONTRIBUTING.md",
4057 ),
4058 ];
4059 for (root, path, url, result) in tests {
4060 let h = format!(r#"<a href="{url}">test</a>"#);
4061 let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#);
4062 let a = Builder::new()
4063 .url_relative(UrlRelative::RewriteWithRoot {
4064 root: Url::parse(root).unwrap(),
4065 path: path.to_string(),
4066 })
4067 .clean(&h)
4068 .to_string();
4069 if r != a {
4070 println!(
4071 "failed to check ({root}, {path}, {url}, {result})\n{r} != {a}",
4072 r = r
4073 );
4074 assert_eq!(r, a);
4075 }
4076 }
4077 }
4078}