ammonia/lib.rs
1// Copyright (C) Michael Howell and others
2// this library is released under the same terms as Rust itself.
3
4#![deny(unsafe_code)]
5#![deny(missing_docs)]
6
7//! Ammonia is a whitelist-based HTML sanitization library. It is designed to
8//! prevent cross-site scripting, layout breaking, and clickjacking caused
9//! by untrusted user-provided HTML being mixed into a larger web page.
10//!
11//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do,
12//! so it is extremely resilient to syntactic obfuscation.
13//!
14//! Ammonia parses its input exactly according to the HTML5 specification;
15//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©.
16//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark].
17//!
18//! # Examples
19//!
20//! ```
21//! let result = ammonia::clean(
22//! "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>"
23//! );
24//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>");
25//! ```
26//!
27//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo"
28//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser"
29
30#[cfg(ammonia_unstable)]
31pub mod rcdom;
32
33#[cfg(not(ammonia_unstable))]
34mod rcdom;
35
36mod style;
37
38use html5ever::interface::Attribute;
39use html5ever::serialize::{serialize, SerializeOpts};
40use html5ever::tree_builder::{NodeOrText, TreeSink};
41use html5ever::{driver as html, local_name, ns, QualName};
42use maplit::{hashmap, hashset};
43use std::sync::LazyLock;
44use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
45use std::borrow::{Borrow, Cow};
46use std::cell::Cell;
47use std::cmp::max;
48use std::collections::{HashMap, HashSet};
49use std::fmt::{self, Display};
50use std::io;
51use std::iter::IntoIterator as IntoIter;
52use std::mem;
53use std::rc::Rc;
54use std::str::FromStr;
55use tendril::stream::TendrilSink;
56use tendril::StrTendril;
57use tendril::{format_tendril, ByteTendril};
58pub use url::Url;
59
60use html5ever::buffer_queue::BufferQueue;
61use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
62pub use url;
63
64static AMMONIA: LazyLock<Builder<'static>> = LazyLock::new(Builder::default);
65
66/// Clean HTML with a conservative set of defaults.
67///
68/// * [tags](struct.Builder.html#defaults)
69/// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1)
70/// * [attributes on specific tags](struct.Builder.html#defaults-2)
71/// * [attributes on all tags](struct.Builder.html#defaults-6)
72/// * [url schemes](struct.Builder.html#defaults-7)
73/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8)
74/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9)
75/// * all `class=""` settings are blocked by default
76/// * comments are stripped by default
77/// * no generic attribute prefixes are turned on by default
78/// * no specific tag-attribute-value settings are configured by default
79///
80/// [opener]: https://mathiasbynens.github.io/rel-noopener/
81/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer
82///
83/// # Examples
84///
85/// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS")
86pub fn clean(src: &str) -> String {
87 AMMONIA.clean(src).to_string()
88}
89
90/// Turn an arbitrary string into unformatted HTML.
91///
92/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`.
93/// It is as strict as possible, encoding every character that has special meaning to the
94/// HTML parser.
95///
96/// # Warnings
97///
98/// This function cannot be used to package strings into a `<script>` or `<style>` tag;
99/// you need a JavaScript or CSS escaper to do that.
100///
101/// // DO NOT DO THIS
102/// # use ammonia::clean_text;
103/// let untrusted = "Robert\"); abuse();//";
104/// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted));
105///
106/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded.
107/// If you want to build an editor that works the way most folks expect them to, you should put a
108/// newline at the beginning of the tag, like this:
109///
110/// # use ammonia::{Builder, clean_text};
111/// let untrusted = "\n\nhi!";
112/// let mut b = Builder::new();
113/// b.add_tags(&["textarea"]);
114/// // This is the bad version
115/// // The user put two newlines at the beginning, but the first one was removed
116/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string();
117/// assert_eq!("<textarea>\nhi!</textarea>", sanitized);
118/// // This is a good version
119/// // The user put two newlines at the beginning, and we add a third one,
120/// // so the result still has two
121/// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string();
122/// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized);
123/// // This version is also often considered good
124/// // For many applications, leading and trailing whitespace is probably unwanted
125/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string();
126/// assert_eq!("<textarea>hi!</textarea>", sanitized);
127///
128/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`.
129/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes
130/// and paragraph contents.
131pub fn clean_text(src: &str) -> String {
132 let mut ret_val = String::with_capacity(max(4, src.len()));
133 for c in src.chars() {
134 let replacement = match c {
135 // this character, when confronted, will start a tag
136 '<' => "<",
137 // in an unquoted attribute, will end the attribute value
138 '>' => ">",
139 // in an attribute surrounded by double quotes, this character will end the attribute value
140 '\"' => """,
141 // in an attribute surrounded by single quotes, this character will end the attribute value
142 '\'' => "'",
143 // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes
144 '`' => "`",
145 // in an unquoted attribute, this character will end the attribute
146 '/' => "/",
147 // starts an entity reference
148 '&' => "&",
149 // if at the beginning of an unquoted attribute, will get ignored
150 '=' => "=",
151 // will end an unquoted attribute
152 ' ' => " ",
153 '\t' => "	",
154 '\n' => " ",
155 '\x0c' => "",
156 '\r' => " ",
157 // a spec-compliant browser will perform this replacement anyway, but the middleware might not
158 '\0' => "�",
159 // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM
160 _ => {
161 ret_val.push(c);
162 continue;
163 }
164 };
165 ret_val.push_str(replacement);
166 }
167 ret_val
168}
169
170/// Determine if a given string contains HTML
171///
172/// This function is parses the full string into HTML and checks if the input contained any
173/// HTML syntax.
174///
175/// # Note
176/// This function will return positively for strings that contain invalid HTML syntax like
177/// `<g>` and even `Vec::<u8>::new()`.
178pub fn is_html(input: &str) -> bool {
179 let santok = SanitizationTokenizer::new();
180 let mut chunk = ByteTendril::new();
181 chunk.push_slice(input.as_bytes());
182 let mut input = BufferQueue::default();
183 input.push_back(chunk.try_reinterpret().unwrap());
184
185 let tok = Tokenizer::new(santok, Default::default());
186 let _ = tok.feed(&mut input);
187 tok.end();
188 tok.sink.was_sanitized.get()
189}
190
191#[derive(Clone)]
192struct SanitizationTokenizer {
193 was_sanitized: Cell<bool>,
194}
195
196impl SanitizationTokenizer {
197 pub fn new() -> SanitizationTokenizer {
198 SanitizationTokenizer {
199 was_sanitized: false.into(),
200 }
201 }
202}
203
204impl TokenSink for SanitizationTokenizer {
205 type Handle = ();
206 fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
207 match token {
208 Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
209 _ => {
210 self.was_sanitized.set(true);
211 }
212 }
213 TokenSinkResult::Continue
214 }
215 fn end(&self) {}
216}
217
218/// An HTML sanitizer.
219///
220/// Given a fragment of HTML, Ammonia will parse it according to the HTML5
221/// parsing algorithm and sanitize any disallowed tags or attributes. This
222/// algorithm also takes care of things like unclosed and (some) misnested
223/// tags.
224///
225/// # Examples
226///
227/// use ammonia::{Builder, UrlRelative};
228///
229/// let a = Builder::default()
230/// .link_rel(None)
231/// .url_relative(UrlRelative::PassThrough)
232/// .clean("<a href=/>test")
233/// .to_string();
234/// assert_eq!(
235/// a,
236/// "<a href=\"/\">test</a>");
237///
238/// # Panics
239///
240/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is
241/// configured with any of these (contradictory) settings:
242///
243/// * The `rel` attribute is added to [`generic_attributes`] or the
244/// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`.
245///
246/// For example, this is going to panic, since [`link_rel`] is set to
247/// `Some("noopener noreferrer")` by default,
248/// and it makes no sense to simultaneously say that the user is allowed to
249/// set their own `rel` attribute while saying that every link shall be set to
250/// a particular value:
251///
252/// ```should_panic
253/// use ammonia::Builder;
254/// use maplit::hashset;
255///
256/// # fn main() {
257/// Builder::default()
258/// .generic_attributes(hashset!["rel"])
259/// .clean("");
260/// # }
261/// ```
262///
263/// This, however, is perfectly valid:
264///
265/// ```
266/// use ammonia::Builder;
267/// use maplit::hashset;
268///
269/// # fn main() {
270/// Builder::default()
271/// .generic_attributes(hashset!["rel"])
272/// .link_rel(None)
273/// .clean("");
274/// # }
275/// ```
276///
277/// * The `class` attribute is in [`allowed_classes`] and is in the
278/// corresponding [`tag_attributes`] or in [`generic_attributes`].
279///
280/// This is done both to line up with the treatment of `rel`,
281/// and to prevent people from accidentally allowing arbitrary
282/// classes on a particular element.
283///
284/// This will panic:
285///
286/// ```should_panic
287/// use ammonia::Builder;
288/// use maplit::{hashmap, hashset};
289///
290/// # fn main() {
291/// Builder::default()
292/// .generic_attributes(hashset!["class"])
293/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
294/// .clean("");
295/// # }
296/// ```
297///
298/// This, however, is perfectly valid:
299///
300/// ```
301/// use ammonia::Builder;
302/// use maplit::{hashmap, hashset};
303///
304/// # fn main() {
305/// Builder::default()
306/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
307/// .clean("");
308/// # }
309/// ```
310///
311/// * A tag is in either [`tags`] or [`tag_attributes`] while also
312/// being in [`clean_content_tags`].
313///
314/// Both [`tags`] and [`tag_attributes`] are whitelists but
315/// [`clean_content_tags`] is a blacklist, so it doesn't make sense
316/// to have the same tag in both.
317///
318/// For example, this will panic, since the `aside` tag is in
319/// [`tags`] by default:
320///
321/// ```should_panic
322/// use ammonia::Builder;
323/// use maplit::hashset;
324///
325/// # fn main() {
326/// Builder::default()
327/// .clean_content_tags(hashset!["aside"])
328/// .clean("");
329/// # }
330/// ```
331///
332/// This, however, is valid:
333///
334/// ```
335/// use ammonia::Builder;
336/// use maplit::hashset;
337///
338/// # fn main() {
339/// Builder::default()
340/// .rm_tags(&["aside"])
341/// .clean_content_tags(hashset!["aside"])
342/// .clean("");
343/// # }
344/// ```
345///
346/// [`clean`]: #method.clean
347/// [`clean_from_reader`]: #method.clean_from_reader
348/// [`generic_attributes`]: #method.generic_attributes
349/// [`tag_attributes`]: #method.tag_attributes
350/// [`generic_attributes`]: #method.generic_attributes
351/// [`link_rel`]: #method.link_rel
352/// [`allowed_classes`]: #method.allowed_classes
353/// [`id_prefix`]: #method.id_prefix
354/// [`tags`]: #method.tags
355/// [`clean_content_tags`]: #method.clean_content_tags
356#[derive(Debug)]
357pub struct Builder<'a> {
358 tags: HashSet<&'a str>,
359 clean_content_tags: HashSet<&'a str>,
360 tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
361 tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
362 set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
363 generic_attributes: HashSet<&'a str>,
364 url_schemes: HashSet<&'a str>,
365 url_relative: UrlRelative<'a>,
366 attribute_filter: Option<Box<dyn AttributeFilter>>,
367 link_rel: Option<&'a str>,
368 allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
369 strip_comments: bool,
370 id_prefix: Option<&'a str>,
371 generic_attribute_prefixes: Option<HashSet<&'a str>>,
372 style_properties: Option<HashSet<&'a str>>,
373}
374
375impl<'a> Default for Builder<'a> {
376 fn default() -> Self {
377 #[rustfmt::skip]
378 let tags = hashset![
379 "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
380 "bdo", "blockquote", "br", "caption", "center", "cite", "code",
381 "col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
382 "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
383 "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
384 "ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
385 "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
386 "strike", "strong", "sub", "summary", "sup", "table", "tbody",
387 "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
388 ];
389 let clean_content_tags = hashset!["script", "style"];
390 let generic_attributes = hashset!["lang", "title"];
391 let tag_attributes = hashmap![
392 "a" => hashset![
393 "href", "hreflang"
394 ],
395 "bdo" => hashset![
396 "dir"
397 ],
398 "blockquote" => hashset![
399 "cite"
400 ],
401 "col" => hashset![
402 "align", "char", "charoff", "span"
403 ],
404 "colgroup" => hashset![
405 "align", "char", "charoff", "span"
406 ],
407 "del" => hashset![
408 "cite", "datetime"
409 ],
410 "hr" => hashset![
411 "align", "size", "width"
412 ],
413 "img" => hashset![
414 "align", "alt", "height", "src", "width"
415 ],
416 "ins" => hashset![
417 "cite", "datetime"
418 ],
419 "ol" => hashset![
420 "start"
421 ],
422 "q" => hashset![
423 "cite"
424 ],
425 "table" => hashset![
426 "align", "char", "charoff", "summary"
427 ],
428 "tbody" => hashset![
429 "align", "char", "charoff"
430 ],
431 "td" => hashset![
432 "align", "char", "charoff", "colspan", "headers", "rowspan"
433 ],
434 "tfoot" => hashset![
435 "align", "char", "charoff"
436 ],
437 "th" => hashset![
438 "align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
439 ],
440 "thead" => hashset![
441 "align", "char", "charoff"
442 ],
443 "tr" => hashset![
444 "align", "char", "charoff"
445 ],
446 ];
447 let tag_attribute_values = hashmap![];
448 let set_tag_attribute_values = hashmap![];
449 let url_schemes = hashset![
450 "bitcoin",
451 "ftp",
452 "ftps",
453 "geo",
454 "http",
455 "https",
456 "im",
457 "irc",
458 "ircs",
459 "magnet",
460 "mailto",
461 "mms",
462 "mx",
463 "news",
464 "nntp",
465 "openpgp4fpr",
466 "sip",
467 "sms",
468 "smsto",
469 "ssh",
470 "tel",
471 "url",
472 "webcal",
473 "wtai",
474 "xmpp"
475 ];
476 let allowed_classes = hashmap![];
477
478 Builder {
479 tags,
480 clean_content_tags,
481 tag_attributes,
482 tag_attribute_values,
483 set_tag_attribute_values,
484 generic_attributes,
485 url_schemes,
486 url_relative: UrlRelative::PassThrough,
487 attribute_filter: None,
488 link_rel: Some("noopener noreferrer"),
489 allowed_classes,
490 strip_comments: true,
491 id_prefix: None,
492 generic_attribute_prefixes: None,
493 style_properties: None,
494 }
495 }
496}
497
498impl<'a> Builder<'a> {
499 /// Sets the tags that are allowed.
500 ///
501 /// # Examples
502 ///
503 /// use ammonia::Builder;
504 /// use maplit::hashset;
505 ///
506 /// # fn main() {
507 /// let tags = hashset!["my-tag"];
508 /// let a = Builder::new()
509 /// .tags(tags)
510 /// .clean("<my-tag>")
511 /// .to_string();
512 /// assert_eq!(a, "<my-tag></my-tag>");
513 /// # }
514 ///
515 /// # Defaults
516 ///
517 /// ```notest
518 /// a, abbr, acronym, area, article, aside, b, bdi,
519 /// bdo, blockquote, br, caption, center, cite, code,
520 /// col, colgroup, data, dd, del, details, dfn, div,
521 /// dl, dt, em, figcaption, figure, footer, h1, h2,
522 /// h3, h4, h5, h6, header, hgroup, hr, i, img,
523 /// ins, kbd, li, map, mark, nav, ol, p, pre,
524 /// q, rp, rt, rtc, ruby, s, samp, small, span,
525 /// strike, strong, sub, summary, sup, table, tbody,
526 /// td, th, thead, time, tr, tt, u, ul, var, wbr
527 /// ```
528 pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
529 self.tags = value;
530 self
531 }
532
533 /// Add additonal whitelisted tags without overwriting old ones.
534 ///
535 /// Does nothing if the tag is already there.
536 ///
537 /// # Examples
538 ///
539 /// let a = ammonia::Builder::default()
540 /// .add_tags(&["my-tag"])
541 /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
542 /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a);
543 pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
544 &mut self,
545 it: I,
546 ) -> &mut Self {
547 self.tags.extend(it.into_iter().map(Borrow::borrow));
548 self
549 }
550
551 /// Remove already-whitelisted tags.
552 ///
553 /// Does nothing if the tags is already gone.
554 ///
555 /// # Examples
556 ///
557 /// let a = ammonia::Builder::default()
558 /// .rm_tags(&["span"])
559 /// .clean("<span></span>").to_string();
560 /// assert_eq!("", a);
561 pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
562 &mut self,
563 it: I,
564 ) -> &mut Self {
565 for i in it {
566 self.tags.remove(i.borrow());
567 }
568 self
569 }
570
571 /// Returns a copy of the set of whitelisted tags.
572 ///
573 /// # Examples
574 ///
575 /// use maplit::hashset;
576 ///
577 /// let tags = hashset!["my-tag-1", "my-tag-2"];
578 ///
579 /// let mut b = ammonia::Builder::default();
580 /// b.tags(Clone::clone(&tags));
581 /// assert_eq!(tags, b.clone_tags());
582 pub fn clone_tags(&self) -> HashSet<&'a str> {
583 self.tags.clone()
584 }
585
586 /// Sets the tags whose contents will be completely removed from the output.
587 ///
588 /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
589 /// a panic.
590 ///
591 /// # Examples
592 ///
593 /// use ammonia::Builder;
594 /// use maplit::hashset;
595 ///
596 /// # fn main() {
597 /// let tag_blacklist = hashset!["script", "style"];
598 /// let a = Builder::new()
599 /// .clean_content_tags(tag_blacklist)
600 /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>")
601 /// .to_string();
602 /// assert_eq!(a, "");
603 /// # }
604 ///
605 /// # Defaults
606 ///
607 /// ```notest
608 /// script, style
609 /// ```
610 pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
611 self.clean_content_tags = value;
612 self
613 }
614
615 /// Add additonal blacklisted clean-content tags without overwriting old ones.
616 ///
617 /// Does nothing if the tag is already there.
618 ///
619 /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
620 /// a panic.
621 ///
622 /// # Examples
623 ///
624 /// let a = ammonia::Builder::default()
625 /// .add_clean_content_tags(&["my-tag"])
626 /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string();
627 /// assert_eq!("<span>mess</span>", a);
628 pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
629 &mut self,
630 it: I,
631 ) -> &mut Self {
632 self.clean_content_tags
633 .extend(it.into_iter().map(Borrow::borrow));
634 self
635 }
636
637 /// Remove already-blacklisted clean-content tags.
638 ///
639 /// Does nothing if the tags aren't blacklisted.
640 ///
641 /// # Examples
642 /// use ammonia::Builder;
643 /// use maplit::hashset;
644 ///
645 /// # fn main() {
646 /// let tag_blacklist = hashset!["script"];
647 /// let a = ammonia::Builder::default()
648 /// .clean_content_tags(tag_blacklist)
649 /// .rm_clean_content_tags(&["script"])
650 /// .clean("<script>XSS</script>").to_string();
651 /// assert_eq!("XSS", a);
652 /// # }
653 pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
654 &mut self,
655 it: I,
656 ) -> &mut Self {
657 for i in it {
658 self.clean_content_tags.remove(i.borrow());
659 }
660 self
661 }
662
663 /// Returns a copy of the set of blacklisted clean-content tags.
664 ///
665 /// # Examples
666 /// # use maplit::hashset;
667 ///
668 /// let tags = hashset!["my-tag-1", "my-tag-2"];
669 ///
670 /// let mut b = ammonia::Builder::default();
671 /// b.clean_content_tags(Clone::clone(&tags));
672 /// assert_eq!(tags, b.clone_clean_content_tags());
673 pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
674 self.clean_content_tags.clone()
675 }
676
677 /// Sets the HTML attributes that are allowed on specific tags.
678 ///
679 /// The value is structured as a map from tag names to a set of attribute names.
680 ///
681 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
682 ///
683 /// # Examples
684 ///
685 /// use ammonia::Builder;
686 /// use maplit::{hashmap, hashset};
687 ///
688 /// # fn main() {
689 /// let tags = hashset!["my-tag"];
690 /// let tag_attributes = hashmap![
691 /// "my-tag" => hashset!["val"]
692 /// ];
693 /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes)
694 /// .clean("<my-tag val=1>")
695 /// .to_string();
696 /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>");
697 /// # }
698 ///
699 /// # Defaults
700 ///
701 /// ```notest
702 /// a =>
703 /// href, hreflang
704 /// bdo =>
705 /// dir
706 /// blockquote =>
707 /// cite
708 /// col =>
709 /// align, char, charoff, span
710 /// colgroup =>
711 /// align, char, charoff, span
712 /// del =>
713 /// cite, datetime
714 /// hr =>
715 /// align, size, width
716 /// img =>
717 /// align, alt, height, src, width
718 /// ins =>
719 /// cite, datetime
720 /// ol =>
721 /// start
722 /// q =>
723 /// cite
724 /// table =>
725 /// align, char, charoff, summary
726 /// tbody =>
727 /// align, char, charoff
728 /// td =>
729 /// align, char, charoff, colspan, headers, rowspan
730 /// tfoot =>
731 /// align, char, charoff
732 /// th =>
733 /// align, char, charoff, colspan, headers, rowspan, scope
734 /// thead =>
735 /// align, char, charoff
736 /// tr =>
737 /// align, char, charoff
738 /// ```
739 pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
740 self.tag_attributes = value;
741 self
742 }
743
744 /// Add additonal whitelisted tag-specific attributes without overwriting old ones.
745 ///
746 /// # Examples
747 ///
748 /// let a = ammonia::Builder::default()
749 /// .add_tags(&["my-tag"])
750 /// .add_tag_attributes("my-tag", &["my-attr"])
751 /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
752 /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
753 pub fn add_tag_attributes<
754 T: 'a + ?Sized + Borrow<str>,
755 U: 'a + ?Sized + Borrow<str>,
756 I: IntoIter<Item = &'a T>,
757 >(
758 &mut self,
759 tag: &'a U,
760 it: I,
761 ) -> &mut Self {
762 self.tag_attributes
763 .entry(tag.borrow())
764 .or_default()
765 .extend(it.into_iter().map(Borrow::borrow));
766 self
767 }
768
769 /// Remove already-whitelisted tag-specific attributes.
770 ///
771 /// Does nothing if the attribute is already gone.
772 ///
773 /// # Examples
774 ///
775 /// let a = ammonia::Builder::default()
776 /// .rm_tag_attributes("a", &["href"])
777 /// .clean("<a href=\"/\"></a>").to_string();
778 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
779 pub fn rm_tag_attributes<
780 'b,
781 'c,
782 T: 'b + ?Sized + Borrow<str>,
783 U: 'c + ?Sized + Borrow<str>,
784 I: IntoIter<Item = &'b T>,
785 >(
786 &mut self,
787 tag: &'c U,
788 it: I,
789 ) -> &mut Self {
790 if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
791 for i in it {
792 tag.remove(i.borrow());
793 }
794 }
795 self
796 }
797
798 /// Returns a copy of the set of whitelisted tag-specific attributes.
799 ///
800 /// # Examples
801 /// use maplit::{hashmap, hashset};
802 ///
803 /// let tag_attributes = hashmap![
804 /// "my-tag" => hashset!["my-attr-1", "my-attr-2"]
805 /// ];
806 ///
807 /// let mut b = ammonia::Builder::default();
808 /// b.tag_attributes(Clone::clone(&tag_attributes));
809 /// assert_eq!(tag_attributes, b.clone_tag_attributes());
810 pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
811 self.tag_attributes.clone()
812 }
813
814 /// Sets the values of HTML attributes that are allowed on specific tags.
815 ///
816 /// The value is structured as a map from tag names to a map from attribute names to a set of
817 /// attribute values.
818 ///
819 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
820 ///
821 /// # Examples
822 ///
823 /// use ammonia::Builder;
824 /// use maplit::{hashmap, hashset};
825 ///
826 /// # fn main() {
827 /// let tags = hashset!["my-tag"];
828 /// let tag_attribute_values = hashmap![
829 /// "my-tag" => hashmap![
830 /// "my-attr" => hashset!["val"],
831 /// ],
832 /// ];
833 /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values)
834 /// .clean("<my-tag my-attr=val>")
835 /// .to_string();
836 /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
837 /// # }
838 ///
839 /// # Defaults
840 ///
841 /// None.
842 pub fn tag_attribute_values(
843 &mut self,
844 value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
845 ) -> &mut Self {
846 self.tag_attribute_values = value;
847 self
848 }
849
850 /// Add additonal whitelisted tag-specific attribute values without overwriting old ones.
851 ///
852 /// # Examples
853 ///
854 /// let a = ammonia::Builder::default()
855 /// .add_tags(&["my-tag"])
856 /// .add_tag_attribute_values("my-tag", "my-attr", &[""])
857 /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
858 /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
859 pub fn add_tag_attribute_values<
860 T: 'a + ?Sized + Borrow<str>,
861 U: 'a + ?Sized + Borrow<str>,
862 V: 'a + ?Sized + Borrow<str>,
863 I: IntoIter<Item = &'a T>,
864 >(
865 &mut self,
866 tag: &'a U,
867 attribute: &'a V,
868 it: I,
869 ) -> &mut Self {
870 self.tag_attribute_values
871 .entry(tag.borrow())
872 .or_default()
873 .entry(attribute.borrow())
874 .or_default()
875 .extend(it.into_iter().map(Borrow::borrow));
876
877 self
878 }
879
880 /// Remove already-whitelisted tag-specific attribute values.
881 ///
882 /// Does nothing if the attribute or the value is already gone.
883 ///
884 /// # Examples
885 ///
886 /// let a = ammonia::Builder::default()
887 /// .rm_tag_attributes("a", &["href"])
888 /// .add_tag_attribute_values("a", "href", &["/"])
889 /// .rm_tag_attribute_values("a", "href", &["/"])
890 /// .clean("<a href=\"/\"></a>").to_string();
891 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
892 pub fn rm_tag_attribute_values<
893 'b,
894 'c,
895 T: 'b + ?Sized + Borrow<str>,
896 U: 'c + ?Sized + Borrow<str>,
897 V: 'c + ?Sized + Borrow<str>,
898 I: IntoIter<Item = &'b T>,
899 >(
900 &mut self,
901 tag: &'c U,
902 attribute: &'c V,
903 it: I,
904 ) -> &mut Self {
905 if let Some(attrs) = self
906 .tag_attribute_values
907 .get_mut(tag.borrow())
908 .and_then(|map| map.get_mut(attribute.borrow()))
909 {
910 for i in it {
911 attrs.remove(i.borrow());
912 }
913 }
914 self
915 }
916
917 /// Returns a copy of the set of whitelisted tag-specific attribute values.
918 ///
919 /// # Examples
920 ///
921 /// use maplit::{hashmap, hashset};
922 ///
923 /// let attribute_values = hashmap![
924 /// "my-attr-1" => hashset!["foo"],
925 /// "my-attr-2" => hashset!["baz", "bar"],
926 /// ];
927 /// let tag_attribute_values = hashmap![
928 /// "my-tag" => attribute_values
929 /// ];
930 ///
931 /// let mut b = ammonia::Builder::default();
932 /// b.tag_attribute_values(Clone::clone(&tag_attribute_values));
933 /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values());
934 pub fn clone_tag_attribute_values(
935 &self,
936 ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
937 self.tag_attribute_values.clone()
938 }
939
940 /// Sets the values of HTML attributes that are to be set on specific tags.
941 ///
942 /// The value is structured as a map from tag names to a map from attribute names to an
943 /// attribute value.
944 ///
945 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
946 ///
947 /// # Examples
948 ///
949 /// use ammonia::Builder;
950 /// use maplit::{hashmap, hashset};
951 ///
952 /// # fn main() {
953 /// let tags = hashset!["my-tag"];
954 /// let set_tag_attribute_values = hashmap![
955 /// "my-tag" => hashmap![
956 /// "my-attr" => "val",
957 /// ],
958 /// ];
959 /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values)
960 /// .clean("<my-tag>")
961 /// .to_string();
962 /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
963 /// # }
964 ///
965 /// # Defaults
966 ///
967 /// None.
968 pub fn set_tag_attribute_values(
969 &mut self,
970 value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
971 ) -> &mut Self {
972 self.set_tag_attribute_values = value;
973 self
974 }
975
976 /// Add an attribute value to set on a specific element.
977 ///
978 /// # Examples
979 ///
980 /// let a = ammonia::Builder::default()
981 /// .add_tags(&["my-tag"])
982 /// .set_tag_attribute_value("my-tag", "my-attr", "val")
983 /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
984 /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a);
985 pub fn set_tag_attribute_value<
986 T: 'a + ?Sized + Borrow<str>,
987 A: 'a + ?Sized + Borrow<str>,
988 V: 'a + ?Sized + Borrow<str>,
989 >(
990 &mut self,
991 tag: &'a T,
992 attribute: &'a A,
993 value: &'a V,
994 ) -> &mut Self {
995 self.set_tag_attribute_values
996 .entry(tag.borrow())
997 .or_default()
998 .insert(attribute.borrow(), value.borrow());
999 self
1000 }
1001
1002 /// Remove existing tag-specific attribute values to be set.
1003 ///
1004 /// Does nothing if the attribute is already gone.
1005 ///
1006 /// # Examples
1007 ///
1008 /// let a = ammonia::Builder::default()
1009 /// // this does nothing, since no value is set for this tag attribute yet
1010 /// .rm_set_tag_attribute_value("a", "target")
1011 /// .set_tag_attribute_value("a", "target", "_blank")
1012 /// .rm_set_tag_attribute_value("a", "target")
1013 /// .clean("<a href=\"/\"></a>").to_string();
1014 /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a);
1015 pub fn rm_set_tag_attribute_value<
1016 T: 'a + ?Sized + Borrow<str>,
1017 A: 'a + ?Sized + Borrow<str>,
1018 >(
1019 &mut self,
1020 tag: &'a T,
1021 attribute: &'a A,
1022 ) -> &mut Self {
1023 if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
1024 attributes.remove(attribute.borrow());
1025 }
1026 self
1027 }
1028
1029 /// Returns the value that will be set for the attribute on the element, if any.
1030 ///
1031 /// # Examples
1032 ///
1033 /// let mut b = ammonia::Builder::default();
1034 /// b.set_tag_attribute_value("a", "target", "_blank");
1035 /// let value = b.get_set_tag_attribute_value("a", "target");
1036 /// assert_eq!(value, Some("_blank"));
1037 pub fn get_set_tag_attribute_value<
1038 T: 'a + ?Sized + Borrow<str>,
1039 A: 'a + ?Sized + Borrow<str>,
1040 >(
1041 &self,
1042 tag: &'a T,
1043 attribute: &'a A,
1044 ) -> Option<&'a str> {
1045 self.set_tag_attribute_values
1046 .get(tag.borrow())
1047 .and_then(|map| map.get(attribute.borrow()))
1048 .copied()
1049 }
1050
1051 /// Returns a copy of the set of tag-specific attribute values to be set.
1052 ///
1053 /// # Examples
1054 ///
1055 /// use maplit::{hashmap, hashset};
1056 ///
1057 /// let attribute_values = hashmap![
1058 /// "my-attr-1" => "foo",
1059 /// "my-attr-2" => "bar",
1060 /// ];
1061 /// let set_tag_attribute_values = hashmap![
1062 /// "my-tag" => attribute_values,
1063 /// ];
1064 ///
1065 /// let mut b = ammonia::Builder::default();
1066 /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values));
1067 /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values());
1068 pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
1069 self.set_tag_attribute_values.clone()
1070 }
1071
1072 /// Sets the prefix of attributes that are allowed on any tag.
1073 ///
1074 /// # Examples
1075 ///
1076 /// use ammonia::Builder;
1077 /// use maplit::hashset;
1078 ///
1079 /// # fn main() {
1080 /// let prefixes = hashset!["data-"];
1081 /// let a = Builder::new()
1082 /// .generic_attribute_prefixes(prefixes)
1083 /// .clean("<b data-val=1>")
1084 /// .to_string();
1085 /// assert_eq!(a, "<b data-val=\"1\"></b>");
1086 /// # }
1087 ///
1088 /// # Defaults
1089 ///
1090 /// No attribute prefixes are allowed by default.
1091 pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1092 self.generic_attribute_prefixes = Some(value);
1093 self
1094 }
1095
1096 /// Add additional whitelisted attribute prefix without overwriting old ones.
1097 ///
1098 /// # Examples
1099 ///
1100 /// let a = ammonia::Builder::default()
1101 /// .add_generic_attribute_prefixes(&["my-"])
1102 /// .clean("<span my-attr>mess</span>").to_string();
1103 /// assert_eq!("<span my-attr=\"\">mess</span>", a);
1104 pub fn add_generic_attribute_prefixes<
1105 T: 'a + ?Sized + Borrow<str>,
1106 I: IntoIter<Item = &'a T>,
1107 >(
1108 &mut self,
1109 it: I,
1110 ) -> &mut Self {
1111 self.generic_attribute_prefixes
1112 .get_or_insert_with(HashSet::new)
1113 .extend(it.into_iter().map(Borrow::borrow));
1114 self
1115 }
1116
1117 /// Remove already-whitelisted attribute prefixes.
1118 ///
1119 /// Does nothing if the attribute prefix is already gone.
1120 ///
1121 /// # Examples
1122 ///
1123 /// let a = ammonia::Builder::default()
1124 /// .add_generic_attribute_prefixes(&["data-", "code-"])
1125 /// .rm_generic_attribute_prefixes(&["data-"])
1126 /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string();
1127 /// assert_eq!("<span code-test=\"foo\"></span>", a);
1128 pub fn rm_generic_attribute_prefixes<
1129 'b,
1130 T: 'b + ?Sized + Borrow<str>,
1131 I: IntoIter<Item = &'b T>,
1132 >(
1133 &mut self,
1134 it: I,
1135 ) -> &mut Self {
1136 if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
1137 for i in it {
1138 let _ = prefixes.remove(i.borrow());
1139 }
1140 prefixes.is_empty()
1141 }) {
1142 self.generic_attribute_prefixes = None;
1143 }
1144 self
1145 }
1146
1147 /// Returns a copy of the set of whitelisted attribute prefixes.
1148 ///
1149 /// # Examples
1150 ///
1151 /// use maplit::hashset;
1152 ///
1153 /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"];
1154 ///
1155 /// let mut b = ammonia::Builder::default();
1156 /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes));
1157 /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes());
1158 pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
1159 self.generic_attribute_prefixes.clone()
1160 }
1161
1162 /// Sets the attributes that are allowed on any tag.
1163 ///
1164 /// # Examples
1165 ///
1166 /// use ammonia::Builder;
1167 /// use maplit::hashset;
1168 ///
1169 /// # fn main() {
1170 /// let attributes = hashset!["data-val"];
1171 /// let a = Builder::new()
1172 /// .generic_attributes(attributes)
1173 /// .clean("<b data-val=1>")
1174 /// .to_string();
1175 /// assert_eq!(a, "<b data-val=\"1\"></b>");
1176 /// # }
1177 ///
1178 /// # Defaults
1179 ///
1180 /// ```notest
1181 /// lang, title
1182 /// ```
1183 pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1184 self.generic_attributes = value;
1185 self
1186 }
1187
1188 /// Add additonal whitelisted attributes without overwriting old ones.
1189 ///
1190 /// # Examples
1191 ///
1192 /// let a = ammonia::Builder::default()
1193 /// .add_generic_attributes(&["my-attr"])
1194 /// .clean("<span my-attr>mess</span>").to_string();
1195 /// assert_eq!("<span my-attr=\"\">mess</span>", a);
1196 pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1197 &mut self,
1198 it: I,
1199 ) -> &mut Self {
1200 self.generic_attributes
1201 .extend(it.into_iter().map(Borrow::borrow));
1202 self
1203 }
1204
1205 /// Remove already-whitelisted attributes.
1206 ///
1207 /// Does nothing if the attribute is already gone.
1208 ///
1209 /// # Examples
1210 ///
1211 /// let a = ammonia::Builder::default()
1212 /// .rm_generic_attributes(&["title"])
1213 /// .clean("<span title=\"cool\"></span>").to_string();
1214 /// assert_eq!("<span></span>", a);
1215 pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1216 &mut self,
1217 it: I,
1218 ) -> &mut Self {
1219 for i in it {
1220 self.generic_attributes.remove(i.borrow());
1221 }
1222 self
1223 }
1224
1225 /// Returns a copy of the set of whitelisted attributes.
1226 ///
1227 /// # Examples
1228 ///
1229 /// use maplit::hashset;
1230 ///
1231 /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"];
1232 ///
1233 /// let mut b = ammonia::Builder::default();
1234 /// b.generic_attributes(Clone::clone(&generic_attributes));
1235 /// assert_eq!(generic_attributes, b.clone_generic_attributes());
1236 pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
1237 self.generic_attributes.clone()
1238 }
1239
1240 /// Sets the URL schemes permitted on `href` and `src` attributes.
1241 ///
1242 /// # Examples
1243 ///
1244 /// use ammonia::Builder;
1245 /// use maplit::hashset;
1246 ///
1247 /// # fn main() {
1248 /// let url_schemes = hashset![
1249 /// "http", "https", "mailto", "magnet"
1250 /// ];
1251 /// let a = Builder::new().url_schemes(url_schemes)
1252 /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>")
1253 /// .to_string();
1254 ///
1255 /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
1256 /// // in the cleaned HTML.
1257 /// assert_eq!(a,
1258 /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>");
1259 /// # }
1260 ///
1261 /// # Defaults
1262 ///
1263 /// ```notest
1264 /// bitcoin, ftp, ftps, geo, http, https, im, irc,
1265 /// ircs, magnet, mailto, mms, mx, news, nntp,
1266 /// openpgp4fpr, sip, sms, smsto, ssh, tel, url,
1267 /// webcal, wtai, xmpp
1268 /// ```
1269 pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1270 self.url_schemes = value;
1271 self
1272 }
1273
1274 /// Add additonal whitelisted URL schemes without overwriting old ones.
1275 ///
1276 /// # Examples
1277 ///
1278 /// let a = ammonia::Builder::default()
1279 /// .add_url_schemes(&["my-scheme"])
1280 /// .clean("<a href=my-scheme:home>mess</span>").to_string();
1281 /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a);
1282 pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1283 &mut self,
1284 it: I,
1285 ) -> &mut Self {
1286 self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
1287 self
1288 }
1289
1290 /// Remove already-whitelisted attributes.
1291 ///
1292 /// Does nothing if the attribute is already gone.
1293 ///
1294 /// # Examples
1295 ///
1296 /// let a = ammonia::Builder::default()
1297 /// .rm_url_schemes(&["ftp"])
1298 /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string();
1299 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
1300 pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1301 &mut self,
1302 it: I,
1303 ) -> &mut Self {
1304 for i in it {
1305 self.url_schemes.remove(i.borrow());
1306 }
1307 self
1308 }
1309
1310 /// Returns a copy of the set of whitelisted URL schemes.
1311 ///
1312 /// # Examples
1313 /// use maplit::hashset;
1314 ///
1315 /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"];
1316 ///
1317 /// let mut b = ammonia::Builder::default();
1318 /// b.url_schemes(Clone::clone(&url_schemes));
1319 /// assert_eq!(url_schemes, b.clone_url_schemes());
1320 pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
1321 self.url_schemes.clone()
1322 }
1323
1324 /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny.
1325 ///
1326 /// # Examples
1327 ///
1328 /// use ammonia::{Builder, UrlRelative};
1329 ///
1330 /// let a = Builder::new().url_relative(UrlRelative::PassThrough)
1331 /// .clean("<a href=/>Home</a>")
1332 /// .to_string();
1333 ///
1334 /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
1335 /// // in the cleaned HTML.
1336 /// assert_eq!(
1337 /// a,
1338 /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>");
1339 ///
1340 /// # Defaults
1341 ///
1342 /// ```notest
1343 /// UrlRelative::PassThrough
1344 /// ```
1345 pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self {
1346 self.url_relative = value;
1347 self
1348 }
1349
1350 /// Allows rewriting of all attributes using a callback.
1351 ///
1352 /// The callback takes name of the element, attribute and its value.
1353 /// Returns `None` to remove the attribute, or a value to use.
1354 ///
1355 /// Rewriting of attributes with URLs is done before `url_relative()`.
1356 ///
1357 /// # Panics
1358 ///
1359 /// If more than one callback is set.
1360 ///
1361 /// # Examples
1362 ///
1363 /// ```rust
1364 /// use ammonia::Builder;
1365 /// let a = Builder::new()
1366 /// .attribute_filter(|element, attribute, value| {
1367 /// match (element, attribute) {
1368 /// ("img", "src") => None,
1369 /// _ => Some(value.into())
1370 /// }
1371 /// })
1372 /// .link_rel(None)
1373 /// .clean("<a href=/><img alt=Home src=foo></a>")
1374 /// .to_string();
1375 /// assert_eq!(a,
1376 /// r#"<a href="/"><img alt="Home"></a>"#);
1377 /// ```
1378 pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
1379 where
1380 CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
1381 {
1382 assert!(
1383 self.attribute_filter.is_none(),
1384 "attribute_filter can be set only once"
1385 );
1386 self.attribute_filter = Some(Box::new(callback));
1387 self
1388 }
1389
1390 /// Returns `true` if the relative URL resolver is set to `Deny`.
1391 ///
1392 /// # Examples
1393 ///
1394 /// use ammonia::{Builder, UrlRelative};
1395 /// let mut a = Builder::default();
1396 /// a.url_relative(UrlRelative::Deny);
1397 /// assert!(a.is_url_relative_deny());
1398 /// a.url_relative(UrlRelative::PassThrough);
1399 /// assert!(!a.is_url_relative_deny());
1400 pub fn is_url_relative_deny(&self) -> bool {
1401 matches!(self.url_relative, UrlRelative::Deny)
1402 }
1403
1404 /// Returns `true` if the relative URL resolver is set to `PassThrough`.
1405 ///
1406 /// # Examples
1407 ///
1408 /// use ammonia::{Builder, UrlRelative};
1409 /// let mut a = Builder::default();
1410 /// a.url_relative(UrlRelative::Deny);
1411 /// assert!(!a.is_url_relative_pass_through());
1412 /// a.url_relative(UrlRelative::PassThrough);
1413 /// assert!(a.is_url_relative_pass_through());
1414 pub fn is_url_relative_pass_through(&self) -> bool {
1415 matches!(self.url_relative, UrlRelative::PassThrough)
1416 }
1417
1418 /// Returns `true` if the relative URL resolver is set to `Custom`.
1419 ///
1420 /// # Examples
1421 ///
1422 /// use ammonia::{Builder, UrlRelative};
1423 /// use std::borrow::Cow;
1424 /// fn test(a: &str) -> Option<Cow<str>> { None }
1425 /// # fn main() {
1426 /// let mut a = Builder::default();
1427 /// a.url_relative(UrlRelative::Custom(Box::new(test)));
1428 /// assert!(a.is_url_relative_custom());
1429 /// a.url_relative(UrlRelative::PassThrough);
1430 /// assert!(!a.is_url_relative_custom());
1431 /// a.url_relative(UrlRelative::Deny);
1432 /// assert!(!a.is_url_relative_custom());
1433 /// # }
1434 pub fn is_url_relative_custom(&self) -> bool {
1435 matches!(self.url_relative, UrlRelative::Custom(_))
1436 }
1437
1438 /// Configures a `rel` attribute that will be added on links.
1439 ///
1440 /// If `rel` is in the generic or tag attributes, this must be set to `None`.
1441 /// Common `rel` values to include:
1442 ///
1443 /// * `noopener`: This prevents [a particular type of XSS attack],
1444 /// and should usually be turned on for untrusted HTML.
1445 /// * `noreferrer`: This prevents the browser from [sending the source URL]
1446 /// to the website that is linked to.
1447 /// * `nofollow`: This prevents search engines from [using this link for
1448 /// ranking], which disincentivizes spammers.
1449 ///
1450 /// To turn on rel-insertion, call this function with a space-separated list.
1451 /// Ammonia does not parse rel-attributes;
1452 /// it just puts the given string into the attribute directly.
1453 ///
1454 /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/
1455 /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer
1456 /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow
1457 ///
1458 /// # Examples
1459 ///
1460 /// use ammonia::Builder;
1461 ///
1462 /// let a = Builder::new().link_rel(None)
1463 /// .clean("<a href=https://rust-lang.org/>Rust</a>")
1464 /// .to_string();
1465 /// assert_eq!(
1466 /// a,
1467 /// "<a href=\"https://rust-lang.org/\">Rust</a>");
1468 ///
1469 /// # Defaults
1470 ///
1471 /// ```notest
1472 /// Some("noopener noreferrer")
1473 /// ```
1474 pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
1475 self.link_rel = value;
1476 self
1477 }
1478
1479 /// Returns the settings for links' `rel` attribute, if one is set.
1480 ///
1481 /// # Examples
1482 ///
1483 /// use ammonia::{Builder, UrlRelative};
1484 /// let mut a = Builder::default();
1485 /// a.link_rel(Some("a b"));
1486 /// assert_eq!(a.get_link_rel(), Some("a b"));
1487 pub fn get_link_rel(&self) -> Option<&str> {
1488 self.link_rel
1489 }
1490
1491 /// Sets the CSS classes that are allowed on specific tags.
1492 ///
1493 /// The values is structured as a map from tag names to a set of class names.
1494 ///
1495 /// If the `class` attribute is itself whitelisted for a tag, then adding entries to
1496 /// this map will cause a panic.
1497 ///
1498 /// # Examples
1499 ///
1500 /// use ammonia::Builder;
1501 /// use maplit::{hashmap, hashset};
1502 ///
1503 /// # fn main() {
1504 /// let allowed_classes = hashmap![
1505 /// "code" => hashset!["rs", "ex", "c", "cxx", "js"]
1506 /// ];
1507 /// let a = Builder::new()
1508 /// .allowed_classes(allowed_classes)
1509 /// .clean("<code class=rs>fn main() {}</code>")
1510 /// .to_string();
1511 /// assert_eq!(
1512 /// a,
1513 /// "<code class=\"rs\">fn main() {}</code>");
1514 /// # }
1515 ///
1516 /// # Defaults
1517 ///
1518 /// The set of allowed classes is empty by default.
1519 pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
1520 self.allowed_classes = value;
1521 self
1522 }
1523
1524 /// Add additonal whitelisted classes without overwriting old ones.
1525 ///
1526 /// # Examples
1527 ///
1528 /// let a = ammonia::Builder::default()
1529 /// .add_allowed_classes("a", &["onebox"])
1530 /// .clean("<a href=/ class=onebox>mess</span>").to_string();
1531 /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a);
1532 pub fn add_allowed_classes<
1533 T: 'a + ?Sized + Borrow<str>,
1534 U: 'a + ?Sized + Borrow<str>,
1535 I: IntoIter<Item = &'a T>,
1536 >(
1537 &mut self,
1538 tag: &'a U,
1539 it: I,
1540 ) -> &mut Self {
1541 self.allowed_classes
1542 .entry(tag.borrow())
1543 .or_default()
1544 .extend(it.into_iter().map(Borrow::borrow));
1545 self
1546 }
1547
1548 /// Remove already-whitelisted attributes.
1549 ///
1550 /// Does nothing if the attribute is already gone.
1551 ///
1552 /// # Examples
1553 ///
1554 /// let a = ammonia::Builder::default()
1555 /// .add_allowed_classes("span", &["active"])
1556 /// .rm_allowed_classes("span", &["active"])
1557 /// .clean("<span class=active>").to_string();
1558 /// assert_eq!("<span class=\"\"></span>", a);
1559 pub fn rm_allowed_classes<
1560 'b,
1561 'c,
1562 T: 'b + ?Sized + Borrow<str>,
1563 U: 'c + ?Sized + Borrow<str>,
1564 I: IntoIter<Item = &'b T>,
1565 >(
1566 &mut self,
1567 tag: &'c U,
1568 it: I,
1569 ) -> &mut Self {
1570 if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
1571 for i in it {
1572 tag.remove(i.borrow());
1573 }
1574 }
1575 self
1576 }
1577
1578 /// Returns a copy of the set of whitelisted class attributes.
1579 ///
1580 /// # Examples
1581 ///
1582 /// use maplit::{hashmap, hashset};
1583 ///
1584 /// let allowed_classes = hashmap![
1585 /// "my-tag" => hashset!["my-class-1", "my-class-2"]
1586 /// ];
1587 ///
1588 /// let mut b = ammonia::Builder::default();
1589 /// b.allowed_classes(Clone::clone(&allowed_classes));
1590 /// assert_eq!(allowed_classes, b.clone_allowed_classes());
1591 pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
1592 self.allowed_classes.clone()
1593 }
1594
1595 /// Configures the handling of HTML comments.
1596 ///
1597 /// If this option is false, comments will be preserved.
1598 ///
1599 /// # Examples
1600 ///
1601 /// use ammonia::Builder;
1602 ///
1603 /// let a = Builder::new().strip_comments(false)
1604 /// .clean("<!-- yes -->")
1605 /// .to_string();
1606 /// assert_eq!(
1607 /// a,
1608 /// "<!-- yes -->");
1609 ///
1610 /// # Defaults
1611 ///
1612 /// `true`
1613 pub fn strip_comments(&mut self, value: bool) -> &mut Self {
1614 self.strip_comments = value;
1615 self
1616 }
1617
1618 /// Returns `true` if comment stripping is turned on.
1619 ///
1620 /// # Examples
1621 ///
1622 /// let mut a = ammonia::Builder::new();
1623 /// a.strip_comments(true);
1624 /// assert!(a.will_strip_comments());
1625 /// a.strip_comments(false);
1626 /// assert!(!a.will_strip_comments());
1627 pub fn will_strip_comments(&self) -> bool {
1628 self.strip_comments
1629 }
1630
1631 /// Prefixes all "id" attribute values with a given string. Note that the tag and
1632 /// attribute themselves must still be whitelisted.
1633 ///
1634 /// # Examples
1635 ///
1636 /// use ammonia::Builder;
1637 /// use maplit::hashset;
1638 ///
1639 /// # fn main() {
1640 /// let attributes = hashset!["id"];
1641 /// let a = Builder::new()
1642 /// .generic_attributes(attributes)
1643 /// .id_prefix(Some("safe-"))
1644 /// .clean("<b id=42>")
1645 /// .to_string();
1646 /// assert_eq!(a, "<b id=\"safe-42\"></b>");
1647 /// # }
1648
1649 ///
1650 /// # Defaults
1651 ///
1652 /// `None`
1653 pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
1654 self.id_prefix = value;
1655 self
1656 }
1657
1658 /// Only allows the specified properties in `style` attributes.
1659 ///
1660 /// Irrelevant if `style` is not an allowed attribute.
1661 ///
1662 /// Note that if style filtering is enabled style properties will be normalised e.g.
1663 /// invalid declarations and @rules will be removed, with only syntactically valid
1664 /// declarations kept.
1665 ///
1666 /// # Examples
1667 ///
1668 /// use ammonia::Builder;
1669 /// use maplit::hashset;
1670 ///
1671 /// # fn main() {
1672 /// let attributes = hashset!["style"];
1673 /// let properties = hashset!["color"];
1674 /// let a = Builder::new()
1675 /// .generic_attributes(attributes)
1676 /// .filter_style_properties(properties)
1677 /// .clean("<p style=\"font-weight: heavy; color: red\">my html</p>")
1678 /// .to_string();
1679 /// assert_eq!(a, "<p style=\"color:red\">my html</p>");
1680 /// # }
1681 pub fn filter_style_properties(&mut self, value: HashSet<&'a str>) -> &mut Self {
1682 self.style_properties = Some(value);
1683 self
1684 }
1685
1686 /// Constructs a [`Builder`] instance configured with the [default options].
1687 ///
1688 /// # Examples
1689 ///
1690 /// use ammonia::{Builder, Url, UrlRelative};
1691 /// # use std::error::Error;
1692 ///
1693 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1694 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1695 /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1696 ///
1697 /// let result = Builder::new() // <--
1698 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1699 /// .clean(input)
1700 /// .to_string();
1701 /// assert_eq!(result, output);
1702 /// # Ok(())
1703 /// # }
1704 /// # fn main() { do_main().unwrap() }
1705 ///
1706 /// [default options]: fn.clean.html
1707 /// [`Builder`]: struct.Builder.html
1708 pub fn new() -> Self {
1709 Self::default()
1710 }
1711
1712 /// Constructs a [`Builder`] instance configured with no allowed tags.
1713 ///
1714 /// # Examples
1715 ///
1716 /// use ammonia::{Builder, Url, UrlRelative};
1717 /// # use std::error::Error;
1718 ///
1719 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1720 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>.";
1721 /// let output = "This is an Ammonia example using the empty() function.";
1722 ///
1723 /// let result = Builder::empty() // <--
1724 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1725 /// .clean(input)
1726 /// .to_string();
1727 /// assert_eq!(result, output);
1728 /// # Ok(())
1729 /// # }
1730 /// # fn main() { do_main().unwrap() }
1731 ///
1732 /// [default options]: fn.clean.html
1733 /// [`Builder`]: struct.Builder.html
1734 pub fn empty() -> Self {
1735 Self {
1736 tags: hashset![],
1737 ..Self::default()
1738 }
1739 }
1740
1741 /// Sanitizes an HTML fragment in a string according to the configured options.
1742 ///
1743 /// # Examples
1744 ///
1745 /// use ammonia::{Builder, Url, UrlRelative};
1746 /// # use std::error::Error;
1747 ///
1748 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1749 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1750 /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1751 ///
1752 /// let result = Builder::new()
1753 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1754 /// .clean(input)
1755 /// .to_string(); // <--
1756 /// assert_eq!(result, output);
1757 /// # Ok(())
1758 /// # }
1759 /// # fn main() { do_main().unwrap() }
1760 pub fn clean(&self, src: &str) -> Document {
1761 let parser = Self::make_parser();
1762 let dom = parser.one(src);
1763 self.clean_dom(dom)
1764 }
1765
1766 /// Sanitizes an HTML fragment from a reader according to the configured options.
1767 ///
1768 /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just
1769 /// like when using [`String::from_utf8_lossy`].
1770 ///
1771 /// To avoid consuming the reader, a mutable reference can be passed to this method.
1772 ///
1773 /// # Examples
1774 ///
1775 /// use ammonia::Builder;
1776 /// # use std::error::Error;
1777 ///
1778 /// # fn do_main() -> Result<(), Box<dyn Error>> {
1779 /// let a = Builder::new()
1780 /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b`
1781 /// .to_string();
1782 /// assert_eq!(a, "");
1783 /// # Ok(()) }
1784 /// # fn main() { do_main().unwrap() }
1785 ///
1786 /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy
1787 pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
1788 where
1789 R: io::Read,
1790 {
1791 let parser = Self::make_parser().from_utf8();
1792 let dom = parser.read_from(&mut src)?;
1793 Ok(self.clean_dom(dom))
1794 }
1795
1796 /// Clean a post-parsing DOM.
1797 ///
1798 /// This is not a public API because RcDom isn't really stable.
1799 /// We want to be able to take breaking changes to html5ever itself
1800 /// without having to break Ammonia's API.
1801 fn clean_dom(&self, dom: RcDom) -> Document {
1802 let mut stack = Vec::new();
1803 let mut removed = Vec::new();
1804 let link_rel = self
1805 .link_rel
1806 .map(|link_rel| format_tendril!("{}", link_rel));
1807 if link_rel.is_some() {
1808 assert!(self.generic_attributes.get("rel").is_none());
1809 assert!(self
1810 .tag_attributes
1811 .get("a")
1812 .and_then(|a| a.get("rel"))
1813 .is_none());
1814 }
1815 assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
1816 for tag_name in self.allowed_classes.keys() {
1817 assert!(self
1818 .tag_attributes
1819 .get(tag_name)
1820 .and_then(|a| a.get("class"))
1821 .is_none());
1822 }
1823 for tag_name in &self.clean_content_tags {
1824 assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
1825 assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
1826 }
1827 let body = {
1828 let children = dom.document.children.borrow();
1829 children[0].clone()
1830 };
1831 stack.extend(
1832 mem::take(&mut *body.children.borrow_mut())
1833 .into_iter()
1834 .rev(),
1835 );
1836 // This design approach is used to prevent pathological content from producing
1837 // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
1838 // of course, contains nodes that need to be dropped (we can't just drop them,
1839 // because they could have a very deep child tree).
1840 while let Some(mut node) = stack.pop() {
1841 let parent = node.parent
1842 .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
1843 .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
1844 if self.clean_node_content(&node) || !self.check_expected_namespace(&parent, &node) {
1845 removed.push(node);
1846 continue;
1847 }
1848 let pass = self.clean_child(&mut node);
1849 if pass {
1850 self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix);
1851 dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
1852 } else {
1853 for sub in node.children.borrow_mut().iter_mut() {
1854 sub.parent.replace(Some(Rc::downgrade(&parent)));
1855 }
1856 }
1857 stack.extend(
1858 mem::take(&mut *node.children.borrow_mut())
1859 .into_iter()
1860 .rev(),
1861 );
1862 if !pass {
1863 removed.push(node);
1864 }
1865 }
1866 // Now, imperatively clean up all of the child nodes.
1867 // Otherwise, we could wind up with a DoS, either caused by a memory leak,
1868 // or caused by a stack overflow.
1869 while let Some(node) = removed.pop() {
1870 removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]);
1871 }
1872 Document(dom)
1873 }
1874
1875 /// Returns `true` if a node and all its content should be removed.
1876 fn clean_node_content(&self, node: &Handle) -> bool {
1877 match node.data {
1878 NodeData::Text { .. }
1879 | NodeData::Comment { .. }
1880 | NodeData::Doctype { .. }
1881 | NodeData::Document
1882 | NodeData::ProcessingInstruction { .. } => false,
1883 NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
1884 }
1885 }
1886
1887 /// Remove unwanted attributes, and check if the node should be kept or not.
1888 ///
1889 /// The root node doesn't need cleaning because we create the root node ourselves,
1890 /// and it doesn't get serialized, and ... it just exists to give the parser
1891 /// a context (in this case, a div-like block context).
1892 fn clean_child(&self, child: &mut Handle) -> bool {
1893 match child.data {
1894 NodeData::Text { .. } => true,
1895 NodeData::Comment { .. } => !self.strip_comments,
1896 NodeData::Doctype { .. }
1897 | NodeData::Document
1898 | NodeData::ProcessingInstruction { .. } => false,
1899 NodeData::Element {
1900 ref name,
1901 ref attrs,
1902 ..
1903 } => {
1904 if self.tags.contains(&*name.local) {
1905 let attr_filter = |attr: &html5ever::Attribute| {
1906 let whitelisted = self.generic_attributes.contains(&*attr.name.local)
1907 || self.generic_attribute_prefixes.as_ref().map(|prefixes| {
1908 prefixes.iter().any(|&p| attr.name.local.starts_with(p))
1909 }) == Some(true)
1910 || self
1911 .tag_attributes
1912 .get(&*name.local)
1913 .map(|ta| ta.contains(&*attr.name.local))
1914 == Some(true)
1915 || self
1916 .tag_attribute_values
1917 .get(&*name.local)
1918 .and_then(|tav| tav.get(&*attr.name.local))
1919 .map(|vs| {
1920 let attr_val = attr.value.to_lowercase();
1921 vs.iter().any(|v| v.to_lowercase() == attr_val)
1922 })
1923 == Some(true);
1924 if !whitelisted {
1925 // If the class attribute is not whitelisted,
1926 // but there is a whitelisted set of allowed_classes,
1927 // do not strip out the class attribute.
1928 // Banned classes will be filtered later.
1929 &*attr.name.local == "class"
1930 && self.allowed_classes.contains_key(&*name.local)
1931 } else if is_url_attr(&name.local, &attr.name.local) {
1932 let url = Url::parse(&attr.value);
1933 if let Ok(url) = url {
1934 self.url_schemes.contains(url.scheme())
1935 } else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
1936 !matches!(self.url_relative, UrlRelative::Deny)
1937 } else {
1938 false
1939 }
1940 } else {
1941 true
1942 }
1943 };
1944 attrs.borrow_mut().retain(attr_filter);
1945 true
1946 } else {
1947 false
1948 }
1949 }
1950 }
1951 }
1952
1953 // Check for unexpected namespace changes.
1954 //
1955 // The issue happens if developers added to the list of allowed tags any
1956 // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state,
1957 // that is:
1958 //
1959 // * title
1960 // * textarea
1961 // * xmp
1962 // * iframe
1963 // * noembed
1964 // * noframes
1965 // * plaintext
1966 // * noscript
1967 // * style
1968 // * script
1969 //
1970 // An example in the wild is Plume, that allows iframe [1]. So in next
1971 // examples I'll assume the following policy:
1972 //
1973 // Builder::new()
1974 // .add_tags(&["iframe"])
1975 //
1976 // In HTML namespace `<iframe>` is parsed specially; that is, its content is
1977 // treated as text. For instance, the following html:
1978 //
1979 // <iframe><a>test
1980 //
1981 // Is parsed into the following DOM tree:
1982 //
1983 // iframe
1984 // └─ #text: <a>test
1985 //
1986 // So iframe cannot have any children other than a text node.
1987 //
1988 // The same is not true, though, in "foreign content"; that is, within
1989 // <svg> or <math> tags. The following html:
1990 //
1991 // <svg><iframe><a>test
1992 //
1993 // is parsed differently:
1994 //
1995 // svg
1996 // └─ iframe
1997 // └─ a
1998 // └─ #text: test
1999 //
2000 // So in SVG namespace iframe can have children.
2001 //
2002 // Ammonia disallows <svg> but it keeps its content after deleting it. And
2003 // the parser internally keeps track of the namespace of the element. So
2004 // assume we have the following snippet:
2005 //
2006 // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test
2007 //
2008 // It is parsed into:
2009 //
2010 // svg
2011 // └─ iframe
2012 // └─ a title="</iframe><img src onerror=alert(1)>"
2013 // └─ #text: test
2014 //
2015 // This DOM tree is harmless from ammonia point of view because the piece
2016 // of code that looks like XSS is in a title attribute. Hence, the
2017 // resulting "safe" HTML from ammonia would be:
2018 //
2019 // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener
2020 // noreferrer">test</a></iframe>
2021 //
2022 // However, at this point, the information about namespace is lost, which
2023 // means that the browser will parse this snippet into:
2024 //
2025 // ├─ iframe
2026 // │ └─ #text: <a title="
2027 // ├─ img src="" onerror="alert(1)"
2028 // └─ #text: " rel="noopener noreferrer">test
2029 //
2030 // Leading to XSS.
2031 //
2032 // To solve this issue, check for unexpected namespace switches after cleanup.
2033 // Elements which change namespace at an unexpected point are removed.
2034 // This function returns `true` if `child` should be kept, and `false` if it
2035 // should be removed.
2036 //
2037 // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21
2038 fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
2039 let (parent, child) = match (&parent.data, &child.data) {
2040 (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn),
2041 _ => return true,
2042 };
2043 // The only way to switch from html to svg is with the <svg> tag
2044 if parent.ns == ns!(html) && child.ns == ns!(svg) {
2045 child.local == local_name!("svg")
2046 // The only way to switch from html to mathml is with the <math> tag
2047 } else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
2048 child.local == local_name!("math")
2049 // The only way to switch from mathml to svg/html is with a text integration point
2050 } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
2051 // https://html.spec.whatwg.org/#mathml
2052 matches!(
2053 &*parent.local,
2054 "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml"
2055 ) && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2056 // The only way to switch from svg to mathml/html is with an html integration point
2057 } else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
2058 // https://html.spec.whatwg.org/#svg-0
2059 matches!(&*parent.local, "foreignObject")
2060 && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2061 } else if child.ns == ns!(svg) {
2062 is_svg_tag(&child.local)
2063 } else if child.ns == ns!(mathml) {
2064 is_mathml_tag(&child.local)
2065 } else if child.ns == ns!(html) {
2066 is_html_tag(&child.local)
2067 } else {
2068 // There are no other supported ways to switch namespace
2069 parent.ns == child.ns
2070 }
2071 }
2072
2073 /// Add and transform special-cased attributes and elements.
2074 ///
2075 /// This function handles:
2076 ///
2077 /// * relative URL rewriting
2078 /// * adding `<a rel>` attributes
2079 /// * filtering out banned style properties
2080 /// * filtering out banned classes
2081 fn adjust_node_attributes(
2082 &self,
2083 child: &mut Handle,
2084 link_rel: &Option<StrTendril>,
2085 id_prefix: Option<&'a str>,
2086 ) {
2087 if let NodeData::Element {
2088 ref name,
2089 ref attrs,
2090 ..
2091 } = child.data
2092 {
2093 if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
2094 let mut attrs = attrs.borrow_mut();
2095 for (&set_name, &set_value) in set_attrs {
2096 // set the value of the attribute if the attribute is already present
2097 if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
2098 {
2099 if &*attr.value != set_value {
2100 attr.value = set_value.into();
2101 }
2102 } else {
2103 // otherwise, add the attribute
2104 let attr = Attribute {
2105 name: QualName::new(None, ns!(), set_name.into()),
2106 value: set_value.into(),
2107 };
2108 attrs.push(attr);
2109 }
2110 }
2111 }
2112 if let Some(ref link_rel) = *link_rel {
2113 if &*name.local == "a" {
2114 attrs.borrow_mut().push(Attribute {
2115 name: QualName::new(None, ns!(), local_name!("rel")),
2116 value: link_rel.clone(),
2117 })
2118 }
2119 }
2120 if let Some(ref id_prefix) = id_prefix {
2121 for attr in &mut *attrs.borrow_mut() {
2122 if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) {
2123 attr.value = format_tendril!("{}{}", id_prefix, attr.value);
2124 }
2125 }
2126 }
2127 if let Some(ref attr_filter) = self.attribute_filter {
2128 let mut drop_attrs = Vec::new();
2129 let mut attrs = attrs.borrow_mut();
2130 for (i, attr) in &mut attrs.iter_mut().enumerate() {
2131 let replace_with = if let Some(new) =
2132 attr_filter.filter(&name.local, &attr.name.local, &attr.value)
2133 {
2134 if *new != *attr.value {
2135 Some(format_tendril!("{}", new))
2136 } else {
2137 None // no need to replace the attr if filter returned the same value
2138 }
2139 } else {
2140 drop_attrs.push(i);
2141 None
2142 };
2143 if let Some(replace_with) = replace_with {
2144 attr.value = replace_with;
2145 }
2146 }
2147 for i in drop_attrs.into_iter().rev() {
2148 attrs.swap_remove(i);
2149 }
2150 }
2151 {
2152 let mut drop_attrs = Vec::new();
2153 let mut attrs = attrs.borrow_mut();
2154 for (i, attr) in attrs.iter_mut().enumerate() {
2155 if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) {
2156 let new_value = self.url_relative.evaluate(&attr.value);
2157 if let Some(new_value) = new_value {
2158 attr.value = new_value;
2159 } else {
2160 drop_attrs.push(i);
2161 }
2162 }
2163 }
2164 // Swap remove scrambles the vector after the current point.
2165 // We will not do anything except with items before the current point.
2166 // The `rev()` is, as such, necessary for correctness.
2167 // We could use regular `remove(usize)` and a forward iterator,
2168 // but that's slower.
2169 for i in drop_attrs.into_iter().rev() {
2170 attrs.swap_remove(i);
2171 }
2172 }
2173 if let Some(allowed_values) = &self.style_properties {
2174 for attr in &mut *attrs.borrow_mut() {
2175 if &attr.name.local == "style" {
2176 attr.value = style::filter_style_attribute(&attr.value, allowed_values).into();
2177 }
2178 }
2179 }
2180 if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
2181 for attr in &mut *attrs.borrow_mut() {
2182 if &attr.name.local == "class" {
2183 let mut classes = vec![];
2184 // https://html.spec.whatwg.org/#global-attributes:classes-2
2185 for class in attr.value.split_ascii_whitespace() {
2186 if allowed_values.contains(class) {
2187 classes.push(class.to_owned());
2188 }
2189 }
2190 attr.value = format_tendril!("{}", classes.join(" "));
2191 }
2192 }
2193 }
2194 }
2195 }
2196
2197 /// Initializes an HTML fragment parser.
2198 ///
2199 /// Ammonia conforms to the HTML5 fragment parsing rules,
2200 /// by parsing the given fragment as if it were included in a <div> tag.
2201 fn make_parser() -> html::Parser<RcDom> {
2202 html::parse_fragment(
2203 RcDom::default(),
2204 html::ParseOpts::default(),
2205 QualName::new(None, ns!(html), local_name!("div")),
2206 vec![],
2207 false,
2208 )
2209 }
2210}
2211
2212/// Given an element name and attribute name, determine if the given attribute contains a URL.
2213fn is_url_attr(element: &str, attr: &str) -> bool {
2214 attr == "href"
2215 || attr == "src"
2216 || (element == "form" && attr == "action")
2217 || (element == "object" && attr == "data")
2218 || ((element == "button" || element == "input") && attr == "formaction")
2219 || (element == "a" && attr == "ping")
2220 || (element == "video" && attr == "poster")
2221}
2222
2223fn is_html_tag(element: &str) -> bool {
2224 (!is_svg_tag(element) && !is_mathml_tag(element))
2225 || matches!(
2226 element,
2227 "title" | "style" | "font" | "a" | "script" | "span"
2228 )
2229}
2230
2231/// Given an element name, check if it's SVG
2232fn is_svg_tag(element: &str) -> bool {
2233 // https://svgwg.org/svg2-draft/eltindex.html
2234 matches!(
2235 element,
2236 "a" | "animate"
2237 | "animateMotion"
2238 | "animateTransform"
2239 | "circle"
2240 | "clipPath"
2241 | "defs"
2242 | "desc"
2243 | "discard"
2244 | "ellipse"
2245 | "feBlend"
2246 | "feColorMatrix"
2247 | "feComponentTransfer"
2248 | "feComposite"
2249 | "feConvolveMatrix"
2250 | "feDiffuseLighting"
2251 | "feDisplacementMap"
2252 | "feDistantLight"
2253 | "feDropShadow"
2254 | "feFlood"
2255 | "feFuncA"
2256 | "feFuncB"
2257 | "feFuncG"
2258 | "feFuncR"
2259 | "feGaussianBlur"
2260 | "feImage"
2261 | "feMerge"
2262 | "feMergeNode"
2263 | "feMorphology"
2264 | "feOffset"
2265 | "fePointLight"
2266 | "feSpecularLighting"
2267 | "feSpotLight"
2268 | "feTile"
2269 | "feTurbulence"
2270 | "filter"
2271 | "foreignObject"
2272 | "g"
2273 | "image"
2274 | "line"
2275 | "linearGradient"
2276 | "marker"
2277 | "mask"
2278 | "metadata"
2279 | "mpath"
2280 | "path"
2281 | "pattern"
2282 | "polygon"
2283 | "polyline"
2284 | "radialGradient"
2285 | "rect"
2286 | "script"
2287 | "set"
2288 | "stop"
2289 | "style"
2290 | "svg"
2291 | "switch"
2292 | "symbol"
2293 | "text"
2294 | "textPath"
2295 | "title"
2296 | "tspan"
2297 | "use"
2298 | "view"
2299 )
2300}
2301
2302/// Given an element name, check if it's Math
2303fn is_mathml_tag(element: &str) -> bool {
2304 // https://svgwg.org/svg2-draft/eltindex.html
2305 matches!(
2306 element,
2307 "abs"
2308 | "and"
2309 | "annotation"
2310 | "annotation-xml"
2311 | "apply"
2312 | "approx"
2313 | "arccos"
2314 | "arccosh"
2315 | "arccot"
2316 | "arccoth"
2317 | "arccsc"
2318 | "arccsch"
2319 | "arcsec"
2320 | "arcsech"
2321 | "arcsin"
2322 | "arcsinh"
2323 | "arctan"
2324 | "arctanh"
2325 | "arg"
2326 | "bind"
2327 | "bvar"
2328 | "card"
2329 | "cartesianproduct"
2330 | "cbytes"
2331 | "ceiling"
2332 | "cerror"
2333 | "ci"
2334 | "cn"
2335 | "codomain"
2336 | "complexes"
2337 | "compose"
2338 | "condition"
2339 | "conjugate"
2340 | "cos"
2341 | "cosh"
2342 | "cot"
2343 | "coth"
2344 | "cs"
2345 | "csc"
2346 | "csch"
2347 | "csymbol"
2348 | "curl"
2349 | "declare"
2350 | "degree"
2351 | "determinant"
2352 | "diff"
2353 | "divergence"
2354 | "divide"
2355 | "domain"
2356 | "domainofapplication"
2357 | "emptyset"
2358 | "eq"
2359 | "equivalent"
2360 | "eulergamma"
2361 | "exists"
2362 | "exp"
2363 | "exponentiale"
2364 | "factorial"
2365 | "factorof"
2366 | "false"
2367 | "floor"
2368 | "fn"
2369 | "forall"
2370 | "gcd"
2371 | "geq"
2372 | "grad"
2373 | "gt"
2374 | "ident"
2375 | "image"
2376 | "imaginary"
2377 | "imaginaryi"
2378 | "implies"
2379 | "in"
2380 | "infinity"
2381 | "int"
2382 | "integers"
2383 | "intersect"
2384 | "interval"
2385 | "inverse"
2386 | "lambda"
2387 | "laplacian"
2388 | "lcm"
2389 | "leq"
2390 | "limit"
2391 | "list"
2392 | "ln"
2393 | "log"
2394 | "logbase"
2395 | "lowlimit"
2396 | "lt"
2397 | "maction"
2398 | "maligngroup"
2399 | "malignmark"
2400 | "math"
2401 | "matrix"
2402 | "matrixrow"
2403 | "max"
2404 | "mean"
2405 | "median"
2406 | "menclose"
2407 | "merror"
2408 | "mfenced"
2409 | "mfrac"
2410 | "mglyph"
2411 | "mi"
2412 | "min"
2413 | "minus"
2414 | "mlabeledtr"
2415 | "mlongdiv"
2416 | "mmultiscripts"
2417 | "mn"
2418 | "mo"
2419 | "mode"
2420 | "moment"
2421 | "momentabout"
2422 | "mover"
2423 | "mpadded"
2424 | "mphantom"
2425 | "mprescripts"
2426 | "mroot"
2427 | "mrow"
2428 | "ms"
2429 | "mscarries"
2430 | "mscarry"
2431 | "msgroup"
2432 | "msline"
2433 | "mspace"
2434 | "msqrt"
2435 | "msrow"
2436 | "mstack"
2437 | "mstyle"
2438 | "msub"
2439 | "msubsup"
2440 | "msup"
2441 | "mtable"
2442 | "mtd"
2443 | "mtext"
2444 | "mtr"
2445 | "munder"
2446 | "munderover"
2447 | "naturalnumbers"
2448 | "neq"
2449 | "none"
2450 | "not"
2451 | "notanumber"
2452 | "notin"
2453 | "notprsubset"
2454 | "notsubset"
2455 | "or"
2456 | "otherwise"
2457 | "outerproduct"
2458 | "partialdiff"
2459 | "pi"
2460 | "piece"
2461 | "piecewise"
2462 | "plus"
2463 | "power"
2464 | "primes"
2465 | "product"
2466 | "prsubset"
2467 | "quotient"
2468 | "rationals"
2469 | "real"
2470 | "reals"
2471 | "reln"
2472 | "rem"
2473 | "root"
2474 | "scalarproduct"
2475 | "sdev"
2476 | "sec"
2477 | "sech"
2478 | "selector"
2479 | "semantics"
2480 | "sep"
2481 | "set"
2482 | "setdiff"
2483 | "share"
2484 | "sin"
2485 | "sinh"
2486 | "span"
2487 | "subset"
2488 | "sum"
2489 | "tan"
2490 | "tanh"
2491 | "tendsto"
2492 | "times"
2493 | "transpose"
2494 | "true"
2495 | "union"
2496 | "uplimit"
2497 | "variance"
2498 | "vector"
2499 | "vectorproduct"
2500 | "xor"
2501 )
2502}
2503
2504fn is_url_relative(url: &str) -> bool {
2505 matches!(
2506 Url::parse(url),
2507 Err(url::ParseError::RelativeUrlWithoutBase)
2508 )
2509}
2510
2511/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
2512///
2513/// This policy kicks in, if set, for any attribute named `src` or `href`,
2514/// as well as the `data` attribute of an `object` tag.
2515///
2516/// [relative URLs]: struct.Builder.html#method.url_relative
2517///
2518/// # Examples
2519///
2520/// ## `Deny`
2521///
2522/// * `<a href="test">` is a file-relative URL, and will be removed
2523/// * `<a href="/test">` is a domain-relative URL, and will be removed
2524/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed
2525/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept
2526///
2527/// ## `PassThrough`
2528///
2529/// No changes will be made to any URLs, except if a disallowed scheme is used.
2530///
2531/// ## `RewriteWithBase`
2532///
2533/// If the base is set to `http://notriddle.com/some-directory/some-file`
2534///
2535/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">`
2536/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">`
2537/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">`
2538/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is
2539///
2540/// ## `Custom`
2541///
2542/// Pass the relative URL to a function.
2543/// If it returns `Some(string)`, then that one gets used.
2544/// Otherwise, it will remove the attribute (like `Deny` does).
2545///
2546/// use std::borrow::Cow;
2547/// fn is_absolute_path(url: &str) -> bool {
2548/// let u = url.as_bytes();
2549/// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
2550/// // `/a/b/c` is an absolute path, and what we want to do stuff to.
2551/// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
2552/// }
2553/// fn evaluate(url: &str) -> Option<Cow<str>> {
2554/// if is_absolute_path(url) {
2555/// Some(Cow::Owned(String::from("/root") + url))
2556/// } else {
2557/// Some(Cow::Borrowed(url))
2558/// }
2559/// }
2560/// fn main() {
2561/// let a = ammonia::Builder::new()
2562/// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
2563/// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
2564/// .to_string();
2565/// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
2566/// }
2567///
2568/// This function is only applied to relative URLs.
2569/// To filter all of the URLs,
2570/// use the not-yet-implemented Content Security Policy.
2571#[non_exhaustive]
2572pub enum UrlRelative<'a> {
2573 /// Relative URLs will be completely stripped from the document.
2574 Deny,
2575 /// Relative URLs will be passed through unchanged.
2576 PassThrough,
2577 /// Relative URLs will be changed into absolute URLs, based on this base URL.
2578 RewriteWithBase(Url),
2579 /// Force absolute and relative paths into a particular directory.
2580 ///
2581 /// Since the resolver does not affect fully-qualified URLs, it doesn't
2582 /// prevent users from linking wherever they want. This feature only
2583 /// serves to make content more portable.
2584 ///
2585 /// # Examples
2586 ///
2587 /// <table>
2588 /// <thead>
2589 /// <tr>
2590 /// <th>root</th>
2591 /// <th>path</th>
2592 /// <th>url</th>
2593 /// <th>result</th>
2594 /// </tr>
2595 /// </thead>
2596 /// <tbody>
2597 /// <tr>
2598 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2599 /// <td>README.md</td>
2600 /// <td></td>
2601 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2602 /// </tr><tr>
2603 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2604 /// <td>README.md</td>
2605 /// <td>/</td>
2606 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2607 /// </tr><tr>
2608 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2609 /// <td>README.md</td>
2610 /// <td>/CONTRIBUTING.md</td>
2611 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2612 /// </tr><tr>
2613 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2614 /// <td>README.md</td>
2615 /// <td></td>
2616 /// <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td>
2617 /// </tr><tr>
2618 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2619 /// <td>README.md</td>
2620 /// <td>/</td>
2621 /// <td>https://github.com/rust-ammonia/ammonia/blob/</td>
2622 /// </tr><tr>
2623 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2624 /// <td>README.md</td>
2625 /// <td>/CONTRIBUTING.md</td>
2626 /// <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td>
2627 /// </tr><tr>
2628 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2629 /// <td></td>
2630 /// <td></td>
2631 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2632 /// </tr><tr>
2633 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2634 /// <td></td>
2635 /// <td>/</td>
2636 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2637 /// </tr><tr>
2638 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2639 /// <td></td>
2640 /// <td>/CONTRIBUTING.md</td>
2641 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2642 /// </tr><tr>
2643 /// <td>https://github.com/</td>
2644 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2645 /// <td></td>
2646 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2647 /// </tr><tr>
2648 /// <td>https://github.com/</td>
2649 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2650 /// <td>/</td>
2651 /// <td>https://github.com/</td>
2652 /// </tr><tr>
2653 /// <td>https://github.com/</td>
2654 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2655 /// <td>CONTRIBUTING.md</td>
2656 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2657 /// </tr><tr>
2658 /// <td>https://github.com/</td>
2659 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2660 /// <td>/CONTRIBUTING.md</td>
2661 /// <td>https://github.com/CONTRIBUTING.md</td>
2662 /// </tr>
2663 /// </tbody>
2664 /// </table>
2665 RewriteWithRoot {
2666 /// The URL that is treated as the root by the resolver.
2667 root: Url,
2668 /// The "current path" used to resolve relative paths.
2669 path: String,
2670 },
2671 /// Rewrite URLs with a custom function.
2672 Custom(Box<dyn UrlRelativeEvaluate<'a>>),
2673}
2674
2675impl<'a> UrlRelative<'a> {
2676 fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> {
2677 match self {
2678 UrlRelative::RewriteWithBase(ref url_base) => url_base
2679 .join(url)
2680 .ok()
2681 .and_then(|x| StrTendril::from_str(x.as_str()).ok()),
2682 UrlRelative::RewriteWithRoot { ref root, ref path } => {
2683 (match url.as_bytes() {
2684 // Scheme-relative URL
2685 [b'/', b'/', ..] => root.join(url),
2686 // Path-absolute URL
2687 b"/" => root.join("."),
2688 [b'/', ..] => root.join(&url[1..]),
2689 // Path-relative URL
2690 _ => root.join(path).and_then(|r| r.join(url)),
2691 })
2692 .ok()
2693 .and_then(|x| StrTendril::from_str(x.as_str()).ok())
2694 }
2695 UrlRelative::Custom(ref evaluate) => evaluate
2696 .evaluate(url)
2697 .as_ref()
2698 .map(Cow::as_ref)
2699 .map(StrTendril::from_str)
2700 .and_then(Result::ok),
2701 UrlRelative::PassThrough => StrTendril::from_str(url).ok(),
2702 UrlRelative::Deny => None,
2703 }
2704 }
2705}
2706
2707impl<'a> fmt::Debug for UrlRelative<'a> {
2708 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2709 match *self {
2710 UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
2711 UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
2712 UrlRelative::RewriteWithBase(ref base) => {
2713 write!(f, "UrlRelative::RewriteWithBase({})", base)
2714 }
2715 UrlRelative::RewriteWithRoot { ref root, ref path } => {
2716 write!(
2717 f,
2718 "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}"
2719 )
2720 }
2721 UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
2722 }
2723 }
2724}
2725
2726/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
2727///
2728/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
2729///
2730/// See [`url_relative`][url_relative] for more details.
2731///
2732/// [url_relative]: struct.Builder.html#method.url_relative
2733pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a {
2734 /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2735 fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>;
2736}
2737impl<'a, T> UrlRelativeEvaluate<'a> for T
2738where
2739 T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a,
2740{
2741 fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> {
2742 self(url)
2743 }
2744}
2745
2746impl fmt::Debug for dyn AttributeFilter {
2747 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2748 f.write_str("AttributeFilter")
2749 }
2750}
2751
2752/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
2753///
2754/// See [`attribute_filter`][attribute_filter] for more details.
2755///
2756/// [attribute_filter]: struct.Builder.html#method.attribute_filter
2757pub trait AttributeFilter: Send + Sync {
2758 /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2759 fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
2760}
2761
2762impl<T> AttributeFilter for T
2763where
2764 T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
2765{
2766 fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
2767 self(element, attribute, value)
2768 }
2769}
2770
2771/// A sanitized HTML document.
2772///
2773/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
2774/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
2775/// users to avoid buffering the serialized representation to a [`String`] when desired.
2776///
2777/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
2778///
2779/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
2780/// the complete fragment needs to be stored in memory during processing.
2781///
2782/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
2783/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2784///
2785/// # Examples
2786///
2787/// use ammonia::Builder;
2788///
2789/// let input = "<!-- comments will be stripped -->This is an Ammonia example.";
2790/// let output = "This is an Ammonia example.";
2791///
2792/// let document = Builder::new()
2793/// .clean(input);
2794/// assert_eq!(document.to_string(), output);
2795pub struct Document(RcDom);
2796
2797impl Document {
2798 /// Serializes a `Document` instance to a writer.
2799 ///
2800 /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
2801 ///
2802 /// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
2803 ///
2804 /// Note that the in-memory representation of `Document` is larger than the serialized
2805 /// `String`.
2806 ///
2807 /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2808 ///
2809 /// # Examples
2810 ///
2811 /// use ammonia::Builder;
2812 ///
2813 /// let input = "Some <style></style>HTML here";
2814 /// let expected = b"Some HTML here";
2815 ///
2816 /// let document = Builder::new()
2817 /// .clean(input);
2818 ///
2819 /// let mut sanitized = Vec::new();
2820 /// document.write_to(&mut sanitized)
2821 /// .expect("Writing to a string should not fail (except on OOM)");
2822 /// assert_eq!(sanitized, expected);
2823 pub fn write_to<W>(&self, writer: W) -> io::Result<()>
2824 where
2825 W: io::Write,
2826 {
2827 let opts = Self::serialize_opts();
2828 let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2829 serialize(writer, &inner, opts)
2830 }
2831
2832 /// Exposes the `Document` instance as an [`rcdom::Handle`].
2833 ///
2834 /// This method returns the inner object backing the `Document` instance. This allows
2835 /// making further changes to the DOM without introducing redundant serialization and
2836 /// parsing.
2837 ///
2838 /// Note that this method should be considered unstable and sits outside of the semver
2839 /// stability guarantees. It may change, break, or go away at any time, either because
2840 /// of `html5ever` changes or `ammonia` implementation changes.
2841 ///
2842 /// For this method to be accessible, a `cfg` flag is required. The easiest way is to
2843 /// use the `RUSTFLAGS` environment variable:
2844 ///
2845 /// ```text
2846 /// RUSTFLAGS='--cfg ammonia_unstable' cargo build
2847 /// ```
2848 ///
2849 /// on Unix-like platforms, or
2850 ///
2851 /// ```text
2852 /// set RUSTFLAGS=--cfg ammonia_unstable
2853 /// cargo build
2854 /// ```
2855 ///
2856 /// on Windows.
2857 ///
2858 /// This requirement also applies to crates that transitively depend on crates that use
2859 /// this flag.
2860 ///
2861 /// # Examples
2862 ///
2863 /// use ammonia::Builder;
2864 /// use maplit::hashset;
2865 /// use html5ever::serialize::{serialize, SerializeOpts};
2866 ///
2867 /// # use std::error::Error;
2868 /// # fn do_main() -> Result<(), Box<dyn Error>> {
2869 /// let input = "<a>one link</a> and <a>one more</a>";
2870 /// let expected = "<a>one more</a> and <a>one link</a>";
2871 ///
2872 /// let document = Builder::new()
2873 /// .link_rel(None)
2874 /// .clean(input);
2875 ///
2876 /// let mut node = document.to_dom_node();
2877 /// node.children.borrow_mut().reverse();
2878 ///
2879 /// let mut buf = Vec::new();
2880 /// serialize(&mut buf, &node, SerializeOpts::default())?;
2881 /// let output = String::from_utf8(buf)?;
2882 ///
2883 /// assert_eq!(output, expected);
2884 /// # Ok(())
2885 /// # }
2886 /// # fn main() { do_main().unwrap() }
2887 #[cfg(ammonia_unstable)]
2888 pub fn to_dom_node(&self) -> Handle {
2889 self.0.document.children.borrow()[0].clone()
2890 }
2891
2892 fn serialize_opts() -> SerializeOpts {
2893 SerializeOpts::default()
2894 }
2895}
2896
2897impl Clone for Document {
2898 fn clone(&self) -> Self {
2899 let parser = Builder::make_parser();
2900 let dom = parser.one(&self.to_string()[..]);
2901 Document(dom)
2902 }
2903}
2904
2905/// Convert a `Document` to stringified HTML.
2906///
2907/// Since [`Document`] implements [`Display`], it can be converted to a [`String`] using the
2908/// standard [`ToString::to_string`] method. This is the simplest way to use `ammonia`.
2909///
2910/// [`Document`]: ammonia::Document
2911/// [`Display`]: std::fmt::Display
2912/// [`ToString::to_string`]: std::string::ToString
2913///
2914/// # Examples
2915///
2916/// use ammonia::Builder;
2917///
2918/// let input = "Some <style></style>HTML here";
2919/// let output = "Some HTML here";
2920///
2921/// let document = Builder::new()
2922/// .clean(input);
2923/// assert_eq!(document.to_string(), output);
2924impl Display for Document {
2925 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2926 let opts = Self::serialize_opts();
2927 let mut ret_val = Vec::new();
2928 let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2929 serialize(&mut ret_val, &inner, opts)
2930 .expect("Writing to a string shouldn't fail (expect on OOM)");
2931 String::from_utf8(ret_val)
2932 .expect("html5ever only supports UTF8")
2933 .fmt(f)
2934 }
2935}
2936
2937impl fmt::Debug for Document {
2938 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2939 write!(f, "Document({})", self)
2940 }
2941}
2942
2943impl From<Document> for String {
2944 fn from(document: Document) -> Self {
2945 document.to_string()
2946 }
2947}
2948
2949#[cfg(test)]
2950mod test {
2951 use super::*;
2952 #[test]
2953 fn deeply_nested_whitelisted() {
2954 clean(&"<b>".repeat(60_000));
2955 }
2956 #[test]
2957 fn deeply_nested_blacklisted() {
2958 clean(&"<b-b>".repeat(60_000));
2959 }
2960 #[test]
2961 fn deeply_nested_alternating() {
2962 clean(&"<b-b>".repeat(35_000));
2963 }
2964 #[test]
2965 fn included_angles() {
2966 let fragment = "1 < 2";
2967 let result = clean(fragment);
2968 assert_eq!(result, "1 < 2");
2969 }
2970 #[test]
2971 fn remove_script() {
2972 let fragment = "an <script>evil()</script> example";
2973 let result = clean(fragment);
2974 assert_eq!(result, "an example");
2975 }
2976 #[test]
2977 fn ignore_link() {
2978 let fragment = "a <a href=\"http://www.google.com\">good</a> example";
2979 let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
2980 good</a> example";
2981 let result = clean(fragment);
2982 assert_eq!(result, expected);
2983 }
2984 #[test]
2985 fn remove_unsafe_link() {
2986 let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
2987 let result = clean(fragment);
2988 assert_eq!(
2989 result,
2990 "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
2991 );
2992 }
2993 #[test]
2994 fn remove_js_link() {
2995 let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
2996 let result = clean(fragment);
2997 assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
2998 }
2999 #[test]
3000 fn tag_rebalance() {
3001 let fragment = "<b>AWESOME!";
3002 let result = clean(fragment);
3003 assert_eq!(result, "<b>AWESOME!</b>");
3004 }
3005 #[test]
3006 fn allow_url_relative() {
3007 let fragment = "<a href=test>Test</a>";
3008 let result = Builder::new()
3009 .url_relative(UrlRelative::PassThrough)
3010 .clean(fragment)
3011 .to_string();
3012 assert_eq!(
3013 result,
3014 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3015 );
3016 }
3017 #[test]
3018 fn rewrite_url_relative() {
3019 let fragment = "<a href=test>Test</a>";
3020 let result = Builder::new()
3021 .url_relative(UrlRelative::RewriteWithBase(
3022 Url::parse("http://example.com/").unwrap(),
3023 ))
3024 .clean(fragment)
3025 .to_string();
3026 assert_eq!(
3027 result,
3028 "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
3029 );
3030 }
3031 #[test]
3032 fn rewrite_url_relative_with_invalid_url() {
3033 // Reduced from https://github.com/Bauke/ammonia-crash-test
3034 let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
3035 let result = Builder::new()
3036 .url_relative(UrlRelative::RewriteWithBase(
3037 Url::parse("http://example.com/").unwrap(),
3038 ))
3039 .clean(fragment)
3040 .to_string();
3041 assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
3042 }
3043 #[test]
3044 fn attribute_filter_nop() {
3045 let fragment = "<a href=test>Test</a>";
3046 let result = Builder::new()
3047 .attribute_filter(|elem, attr, value| {
3048 assert_eq!("a", elem);
3049 assert!(
3050 matches!(
3051 (attr, value),
3052 ("href", "test") | ("rel", "noopener noreferrer")
3053 ),
3054 "{}",
3055 value.to_string()
3056 );
3057 Some(value.into())
3058 })
3059 .clean(fragment)
3060 .to_string();
3061 assert_eq!(
3062 result,
3063 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3064 );
3065 }
3066
3067 #[test]
3068 fn attribute_filter_drop() {
3069 let fragment = "Test<img alt=test src=imgtest>";
3070 let result = Builder::new()
3071 .attribute_filter(|elem, attr, value| {
3072 assert_eq!("img", elem);
3073 match (attr, value) {
3074 ("src", "imgtest") => None,
3075 ("alt", "test") => Some(value.into()),
3076 _ => panic!("unexpected"),
3077 }
3078 })
3079 .clean(fragment)
3080 .to_string();
3081 assert_eq!(result, r#"Test<img alt="test">"#);
3082 }
3083
3084 #[test]
3085 fn url_filter_absolute() {
3086 let fragment = "Test<img alt=test src=imgtest>";
3087 let result = Builder::new()
3088 .attribute_filter(|elem, attr, value| {
3089 assert_eq!("img", elem);
3090 match (attr, value) {
3091 ("src", "imgtest") => {
3092 Some(format!("https://example.com/images/{}", value).into())
3093 }
3094 ("alt", "test") => None,
3095 _ => panic!("unexpected"),
3096 }
3097 })
3098 .url_relative(UrlRelative::RewriteWithBase(
3099 Url::parse("http://wrong.invalid/").unwrap(),
3100 ))
3101 .clean(fragment)
3102 .to_string();
3103 assert_eq!(
3104 result,
3105 r#"Test<img src="https://example.com/images/imgtest">"#
3106 );
3107 }
3108
3109 #[test]
3110 fn url_filter_relative() {
3111 let fragment = "Test<img alt=test src=imgtest>";
3112 let result = Builder::new()
3113 .attribute_filter(|elem, attr, value| {
3114 assert_eq!("img", elem);
3115 match (attr, value) {
3116 ("src", "imgtest") => Some("rewrite".into()),
3117 ("alt", "test") => Some("altalt".into()),
3118 _ => panic!("unexpected"),
3119 }
3120 })
3121 .url_relative(UrlRelative::RewriteWithBase(
3122 Url::parse("https://example.com/base/#").unwrap(),
3123 ))
3124 .clean(fragment)
3125 .to_string();
3126 assert_eq!(
3127 result,
3128 r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
3129 );
3130 }
3131
3132 #[test]
3133 fn rewrite_url_relative_no_rel() {
3134 let fragment = "<a href=test>Test</a>";
3135 let result = Builder::new()
3136 .url_relative(UrlRelative::RewriteWithBase(
3137 Url::parse("http://example.com/").unwrap(),
3138 ))
3139 .link_rel(None)
3140 .clean(fragment)
3141 .to_string();
3142 assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
3143 }
3144 #[test]
3145 fn deny_url_relative() {
3146 let fragment = "<a href=test>Test</a>";
3147 let result = Builder::new()
3148 .url_relative(UrlRelative::Deny)
3149 .clean(fragment)
3150 .to_string();
3151 assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
3152 }
3153 #[test]
3154 fn replace_rel() {
3155 let fragment = "<a href=test rel=\"garbage\">Test</a>";
3156 let result = Builder::new()
3157 .url_relative(UrlRelative::PassThrough)
3158 .clean(fragment)
3159 .to_string();
3160 assert_eq!(
3161 result,
3162 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3163 );
3164 }
3165 #[test]
3166 fn consider_rel_still_banned() {
3167 let fragment = "<a href=test rel=\"garbage\">Test</a>";
3168 let result = Builder::new()
3169 .url_relative(UrlRelative::PassThrough)
3170 .link_rel(None)
3171 .clean(fragment)
3172 .to_string();
3173 assert_eq!(result, "<a href=\"test\">Test</a>");
3174 }
3175 #[test]
3176 fn object_data() {
3177 let fragment = "<span data=\"javascript:evil()\">Test</span>\
3178 <object data=\"javascript:evil()\"></object>M";
3179 let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
3180 let result = Builder::new()
3181 .tags(hashset!["span", "object"])
3182 .generic_attributes(hashset!["data"])
3183 .clean(fragment)
3184 .to_string();
3185 assert_eq!(result, expected);
3186 }
3187 #[test]
3188 fn remove_attributes() {
3189 let fragment = "<table border=\"1\"><tr></tr></table>";
3190 let result = Builder::new().clean(fragment);
3191 assert_eq!(
3192 result.to_string(),
3193 "<table><tbody><tr></tr></tbody></table>"
3194 );
3195 }
3196 #[test]
3197 fn quotes_in_attrs() {
3198 let fragment = "<b title='\"'>contents</b>";
3199 let result = clean(fragment);
3200 assert_eq!(result, "<b title=\""\">contents</b>");
3201 }
3202 #[test]
3203 #[should_panic]
3204 fn panic_if_rel_is_allowed_and_replaced_generic() {
3205 Builder::new()
3206 .link_rel(Some("noopener noreferrer"))
3207 .generic_attributes(hashset!["rel"])
3208 .clean("something");
3209 }
3210 #[test]
3211 #[should_panic]
3212 fn panic_if_rel_is_allowed_and_replaced_a() {
3213 Builder::new()
3214 .link_rel(Some("noopener noreferrer"))
3215 .tag_attributes(hashmap![
3216 "a" => hashset!["rel"],
3217 ])
3218 .clean("something");
3219 }
3220 #[test]
3221 fn no_panic_if_rel_is_allowed_and_replaced_span() {
3222 Builder::new()
3223 .link_rel(Some("noopener noreferrer"))
3224 .tag_attributes(hashmap![
3225 "span" => hashset!["rel"],
3226 ])
3227 .clean("<span rel=\"what\">s</span>");
3228 }
3229 #[test]
3230 fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
3231 Builder::new()
3232 .link_rel(None)
3233 .generic_attributes(hashset!["rel"])
3234 .clean("<a rel=\"what\">s</a>");
3235 }
3236 #[test]
3237 fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
3238 Builder::new()
3239 .link_rel(None)
3240 .tag_attributes(hashmap![
3241 "a" => hashset!["rel"],
3242 ])
3243 .clean("<a rel=\"what\">s</a>");
3244 }
3245 #[test]
3246 fn dont_close_void_elements() {
3247 let fragment = "<br>";
3248 let result = clean(fragment);
3249 assert_eq!(result.to_string(), "<br>");
3250 }
3251 #[should_panic]
3252 #[test]
3253 fn panic_on_allowed_classes_tag_attributes() {
3254 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3255 Builder::new()
3256 .link_rel(None)
3257 .tag_attributes(hashmap![
3258 "p" => hashset!["class"],
3259 "a" => hashset!["class"],
3260 ])
3261 .allowed_classes(hashmap![
3262 "p" => hashset!["foo", "bar"],
3263 "a" => hashset!["baz"],
3264 ])
3265 .clean(fragment);
3266 }
3267 #[should_panic]
3268 #[test]
3269 fn panic_on_allowed_classes_generic_attributes() {
3270 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3271 Builder::new()
3272 .link_rel(None)
3273 .generic_attributes(hashset!["class", "href", "some-foo"])
3274 .allowed_classes(hashmap![
3275 "p" => hashset!["foo", "bar"],
3276 "a" => hashset!["baz"],
3277 ])
3278 .clean(fragment);
3279 }
3280 #[test]
3281 fn remove_non_allowed_classes() {
3282 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3283 let result = Builder::new()
3284 .link_rel(None)
3285 .allowed_classes(hashmap![
3286 "p" => hashset!["foo", "bar"],
3287 "a" => hashset!["baz"],
3288 ])
3289 .clean(fragment);
3290 assert_eq!(
3291 result.to_string(),
3292 "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3293 );
3294 }
3295 #[test]
3296 fn remove_non_allowed_classes_with_tag_class() {
3297 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3298 let result = Builder::new()
3299 .link_rel(None)
3300 .tag_attributes(hashmap![
3301 "div" => hashset!["class"],
3302 ])
3303 .allowed_classes(hashmap![
3304 "p" => hashset!["foo", "bar"],
3305 "a" => hashset!["baz"],
3306 ])
3307 .clean(fragment);
3308 assert_eq!(
3309 result.to_string(),
3310 "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3311 );
3312 }
3313 #[test]
3314 fn allowed_classes_ascii_whitespace() {
3315 // According to https://infra.spec.whatwg.org/#ascii-whitespace,
3316 // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are
3317 // considered to be ASCII whitespace. Unicode whitespace characters
3318 // and VT (\x0B) aren't ASCII whitespace.
3319 let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
3320 let result = Builder::new()
3321 .allowed_classes(hashmap![
3322 "p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
3323 ])
3324 .clean(fragment);
3325 assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
3326 }
3327 #[test]
3328 fn remove_non_allowed_attributes_with_tag_attribute_values() {
3329 let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
3330 let result = Builder::new()
3331 .tag_attribute_values(hashmap![
3332 "p" => hashmap![
3333 "data-label" => hashset!["bar"],
3334 ],
3335 ])
3336 .tag_attributes(hashmap![
3337 "p" => hashset!["name"],
3338 ])
3339 .clean(fragment);
3340 assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
3341 }
3342 #[test]
3343 fn keep_allowed_attributes_with_tag_attribute_values() {
3344 let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
3345 let result = Builder::new()
3346 .tag_attribute_values(hashmap![
3347 "p" => hashmap![
3348 "data-label" => hashset!["bar"],
3349 ],
3350 ])
3351 .tag_attributes(hashmap![
3352 "p" => hashset!["name"],
3353 ])
3354 .clean(fragment);
3355 assert_eq!(
3356 result.to_string(),
3357 "<p data-label=\"bar\" name=\"foo\"></p>",
3358 );
3359 }
3360 #[test]
3361 fn tag_attribute_values_case_insensitive() {
3362 let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
3363 let result = Builder::new()
3364 .tags(hashset!["input"])
3365 .tag_attribute_values(hashmap![
3366 "input" => hashmap![
3367 "type" => hashset!["checkbox"],
3368 ],
3369 ])
3370 .tag_attributes(hashmap![
3371 "input" => hashset!["name"],
3372 ])
3373 .clean(fragment);
3374 assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
3375 }
3376 #[test]
3377 fn set_tag_attribute_values() {
3378 let fragment = "<a href=\"https://example.com/\">Link</a>";
3379 let result = Builder::new()
3380 .link_rel(None)
3381 .add_tag_attributes("a", &["target"])
3382 .set_tag_attribute_value("a", "target", "_blank")
3383 .clean(fragment);
3384 assert_eq!(
3385 result.to_string(),
3386 "<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
3387 );
3388 }
3389 #[test]
3390 fn update_existing_set_tag_attribute_values() {
3391 let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
3392 let result = Builder::new()
3393 .link_rel(None)
3394 .add_tag_attributes("a", &["target"])
3395 .set_tag_attribute_value("a", "target", "_blank")
3396 .clean(fragment);
3397 assert_eq!(
3398 result.to_string(),
3399 "<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
3400 );
3401 }
3402 #[test]
3403 fn unwhitelisted_set_tag_attribute_values() {
3404 let fragment = "<span>hi</span><my-elem>";
3405 let result = Builder::new()
3406 .set_tag_attribute_value("my-elem", "my-attr", "val")
3407 .clean(fragment);
3408 assert_eq!(result.to_string(), "<span>hi</span>",);
3409 }
3410 #[test]
3411 fn remove_entity_link() {
3412 let fragment = "<a href=\"javascript:a\
3413 lert('XSS')\">Click me!</a>";
3414 let result = clean(fragment);
3415 assert_eq!(
3416 result.to_string(),
3417 "<a rel=\"noopener noreferrer\">Click me!</a>"
3418 );
3419 }
3420 #[test]
3421 fn remove_relative_url_evaluate() {
3422 fn is_absolute_path(url: &str) -> bool {
3423 let u = url.as_bytes();
3424 // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3425 // `/a/b/c` is an absolute path, and what we want to do stuff to.
3426 u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3427 }
3428 fn is_banned(url: &str) -> bool {
3429 let u = url.as_bytes();
3430 u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3431 }
3432 fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3433 if is_absolute_path(url) {
3434 Some(Cow::Owned(String::from("/root") + url))
3435 } else if is_banned(url) {
3436 None
3437 } else {
3438 Some(Cow::Borrowed(url))
3439 }
3440 }
3441 let a = Builder::new()
3442 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3443 .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
3444 .to_string();
3445 assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
3446 }
3447 #[test]
3448 fn remove_relative_url_evaluate_b() {
3449 fn is_absolute_path(url: &str) -> bool {
3450 let u = url.as_bytes();
3451 // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3452 // `/a/b/c` is an absolute path, and what we want to do stuff to.
3453 u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3454 }
3455 fn is_banned(url: &str) -> bool {
3456 let u = url.as_bytes();
3457 u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3458 }
3459 fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3460 if is_absolute_path(url) {
3461 Some(Cow::Owned(String::from("/root") + url))
3462 } else if is_banned(url) {
3463 None
3464 } else {
3465 Some(Cow::Borrowed(url))
3466 }
3467 }
3468 let a = Builder::new()
3469 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3470 .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
3471 .to_string();
3472 assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
3473 }
3474 #[test]
3475 fn remove_relative_url_evaluate_c() {
3476 // Don't run on absolute URLs.
3477 fn evaluate(_: &str) -> Option<Cow<'_, str>> {
3478 return Some(Cow::Owned(String::from("invalid")));
3479 }
3480 let a = Builder::new()
3481 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3482 .clean("<a href=\"https://www.google.com/\">google</a>")
3483 .to_string();
3484 assert_eq!(
3485 a,
3486 "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
3487 );
3488 }
3489 #[test]
3490 fn clean_children_of_bad_element() {
3491 let fragment = "<bad><evil>a</evil>b</bad>";
3492 let result = Builder::new().clean(fragment);
3493 assert_eq!(result.to_string(), "ab");
3494 }
3495 #[test]
3496 fn reader_input() {
3497 let fragment = b"an <script>evil()</script> example";
3498 let result = Builder::new().clean_from_reader(&fragment[..]);
3499 assert!(result.is_ok());
3500 assert_eq!(result.unwrap().to_string(), "an example");
3501 }
3502 #[test]
3503 fn reader_non_utf8() {
3504 let fragment = b"non-utf8 \xF0\x90\x80string";
3505 let result = Builder::new().clean_from_reader(&fragment[..]);
3506 assert!(result.is_ok());
3507 assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
3508 }
3509 #[test]
3510 fn display_impl() {
3511 let fragment = r#"a <a>link</a>"#;
3512 let result = Builder::new().link_rel(None).clean(fragment);
3513 assert_eq!(format!("{}", result), "a <a>link</a>");
3514 }
3515 #[test]
3516 fn debug_impl() {
3517 let fragment = r#"a <a>link</a>"#;
3518 let result = Builder::new().link_rel(None).clean(fragment);
3519 assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
3520 }
3521 #[cfg(ammonia_unstable)]
3522 #[test]
3523 fn to_dom_node() {
3524 let fragment = r#"a <a>link</a>"#;
3525 let result = Builder::new().link_rel(None).clean(fragment);
3526 let _node = result.to_dom_node();
3527 }
3528 #[test]
3529 fn string_from_document() {
3530 let fragment = r#"a <a>link"#;
3531 let result = String::from(Builder::new().link_rel(None).clean(fragment));
3532 assert_eq!(format!("{}", result), "a <a>link</a>");
3533 }
3534 fn require_sync<T: Sync>(_: T) {}
3535 fn require_send<T: Send>(_: T) {}
3536 #[test]
3537 fn require_sync_and_send() {
3538 require_sync(Builder::new());
3539 require_send(Builder::new());
3540 }
3541 #[test]
3542 fn id_prefixed() {
3543 let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
3544 let result = String::from(
3545 Builder::new()
3546 .tag_attributes(hashmap![
3547 "a" => hashset!["id"],
3548 ])
3549 .id_prefix(Some("prefix-"))
3550 .clean(fragment),
3551 );
3552 assert_eq!(
3553 result.to_string(),
3554 "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
3555 );
3556 }
3557 #[test]
3558 fn id_already_prefixed() {
3559 let fragment = "<a id=\"prefix-hello\"></a>";
3560 let result = String::from(
3561 Builder::new()
3562 .tag_attributes(hashmap![
3563 "a" => hashset!["id"],
3564 ])
3565 .id_prefix(Some("prefix-"))
3566 .clean(fragment),
3567 );
3568 assert_eq!(
3569 result.to_string(),
3570 "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
3571 );
3572 }
3573 #[test]
3574 fn clean_content_tags() {
3575 let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
3576 let result = String::from(
3577 Builder::new()
3578 .clean_content_tags(hashset!["script"])
3579 .clean(fragment),
3580 );
3581 assert_eq!(result.to_string(), "");
3582 }
3583 #[test]
3584 fn only_clean_content_tags() {
3585 let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3586 let result = String::from(
3587 Builder::new()
3588 .clean_content_tags(hashset!["script"])
3589 .clean(fragment),
3590 );
3591 assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3592 }
3593 #[test]
3594 fn clean_removed_default_tag() {
3595 let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3596 let result = String::from(
3597 Builder::new()
3598 .rm_tags(hashset!["a"])
3599 .rm_tag_attributes("a", hashset!["href", "hreflang"])
3600 .clean_content_tags(hashset!["script"])
3601 .clean(fragment),
3602 );
3603 assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3604 }
3605 #[test]
3606 #[should_panic]
3607 fn panic_on_clean_content_tag_attribute() {
3608 Builder::new()
3609 .rm_tags(std::iter::once("a"))
3610 .clean_content_tags(hashset!["a"])
3611 .clean("");
3612 }
3613 #[test]
3614 #[should_panic]
3615 fn panic_on_clean_content_tag() {
3616 Builder::new().clean_content_tags(hashset!["a"]).clean("");
3617 }
3618
3619 #[test]
3620 fn clean_text_test() {
3621 assert_eq!(
3622 clean_text("<this> is <a test function"),
3623 "<this> is <a test function"
3624 );
3625 }
3626
3627 #[test]
3628 fn clean_text_spaces_test() {
3629 assert_eq!(clean_text("\x09\x0a\x0c\x20"), "	  ");
3630 }
3631
3632 #[test]
3633 fn ns_svg() {
3634 // https://github.com/cure53/DOMPurify/pull/495
3635 let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
3636 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3637 assert_eq!(result.to_string(), "");
3638
3639 let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
3640 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3641 assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3642
3643 let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
3644 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3645 assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3646
3647 let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
3648 let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
3649 assert_eq!(
3650 result.to_string(),
3651 "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
3652 );
3653 }
3654
3655 #[test]
3656 fn ns_svg_2() {
3657 let fragment = "<svg><foreignObject><table><path><xmp><!--</xmp><img title'--><img src=1 onerror=alert(1)>'>";
3658 let result = Builder::default()
3659 .strip_comments(false)
3660 .add_tags(&["svg","foreignObject","table","path","xmp"])
3661 .clean(fragment);
3662 assert_eq!(
3663 result.to_string(),
3664 "<svg><foreignObject><table></table></foreignObject></svg>"
3665 );
3666 }
3667
3668 #[test]
3669 fn ns_mathml() {
3670 // https://github.com/cure53/DOMPurify/pull/495
3671 let fragment = "<mglyph></mglyph>";
3672 let result = String::from(
3673 Builder::new()
3674 .add_tags(&["math", "mtext", "mglyph"])
3675 .clean(fragment),
3676 );
3677 assert_eq!(result.to_string(), "");
3678 let fragment = "<math><mtext><div><mglyph>";
3679 let result = String::from(
3680 Builder::new()
3681 .add_tags(&["math", "mtext", "mglyph"])
3682 .clean(fragment),
3683 );
3684 assert_eq!(
3685 result.to_string(),
3686 "<math><mtext><div></div></mtext></math>"
3687 );
3688 let fragment = "<math><mtext><mglyph>";
3689 let result = String::from(
3690 Builder::new()
3691 .add_tags(&["math", "mtext", "mglyph"])
3692 .clean(fragment),
3693 );
3694 assert_eq!(
3695 result.to_string(),
3696 "<math><mtext><mglyph></mglyph></mtext></math>"
3697 );
3698 }
3699
3700 #[test]
3701 fn ns_mathml_2() {
3702 let fragment = "<math><mtext><table><mglyph><xmp><!--</xmp><img title='--><img src=1 onerror=alert(1)>'>";
3703 let result = Builder::default()
3704 .strip_comments(false)
3705 .add_tags(&["math","mtext","table","mglyph","xmp"])
3706 .clean(fragment);
3707 assert_eq!(
3708 result.to_string(),
3709 "<math><mtext><table></table></mtext></math>"
3710 );
3711 }
3712
3713
3714 #[test]
3715 fn xml_processing_instruction() {
3716 // https://blog.slonser.info/posts/dompurify-node-type-confusion/
3717 let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3718 let result = String::from(Builder::new().clean(fragment));
3719 assert_eq!(result.to_string(), "");
3720
3721 let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3722 let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3723 assert_eq!(result.to_string(), "<svg></svg>");
3724
3725 let fragment = r##"<svg><?xml-stylesheet ><img src=x onerror="alert('Ammonia bypassed!!!')"> ?></svg>"##;
3726 let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3727 assert_eq!(result.to_string(), "<svg></svg><img src=\"x\"> ?>");
3728 }
3729
3730 #[test]
3731 fn generic_attribute_prefixes() {
3732 let prefix_data = ["data-"];
3733 let prefix_code = ["code-"];
3734 let mut b = Builder::new();
3735 let mut hs: HashSet<&'_ str> = HashSet::new();
3736 hs.insert("data-");
3737 assert!(b.generic_attribute_prefixes.is_none());
3738 b.generic_attribute_prefixes(hs);
3739 assert!(b.generic_attribute_prefixes.is_some());
3740 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3741 b.add_generic_attribute_prefixes(&prefix_data);
3742 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3743 b.add_generic_attribute_prefixes(&prefix_code);
3744 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
3745 b.rm_generic_attribute_prefixes(&prefix_code);
3746 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3747 b.rm_generic_attribute_prefixes(&prefix_code);
3748 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3749 b.rm_generic_attribute_prefixes(&prefix_data);
3750 assert!(b.generic_attribute_prefixes.is_none());
3751 }
3752
3753 #[test]
3754 fn generic_attribute_prefixes_clean() {
3755 let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
3756 let result_cleaned = String::from(
3757 Builder::new()
3758 .add_tag_attributes("a", &["data-1"])
3759 .clean(fragment),
3760 );
3761 assert_eq!(
3762 result_cleaned,
3763 r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3764 );
3765 let result_allowed = String::from(
3766 Builder::new()
3767 .add_tag_attributes("a", &["data-1"])
3768 .add_generic_attribute_prefixes(&["data-"])
3769 .clean(fragment),
3770 );
3771 assert_eq!(
3772 result_allowed,
3773 r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3774 );
3775 let result_allowed = String::from(
3776 Builder::new()
3777 .add_tag_attributes("a", &["data-1", "code-1"])
3778 .add_generic_attribute_prefixes(&["data-", "code-"])
3779 .clean(fragment),
3780 );
3781 assert_eq!(
3782 result_allowed,
3783 r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3784 );
3785 }
3786 #[test]
3787 fn lesser_than_isnt_html() {
3788 let fragment = "1 < 2";
3789 assert!(!is_html(fragment));
3790 }
3791 #[test]
3792 fn dense_lesser_than_isnt_html() {
3793 let fragment = "1<2";
3794 assert!(!is_html(fragment));
3795 }
3796 #[test]
3797 fn what_about_number_elements() {
3798 let fragment = "foo<2>bar";
3799 assert!(!is_html(fragment));
3800 }
3801 #[test]
3802 fn turbofish_is_html_sadly() {
3803 let fragment = "Vec::<u8>::new()";
3804 assert!(is_html(fragment));
3805 }
3806 #[test]
3807 fn stop_grinning() {
3808 let fragment = "did you really believe me? <g>";
3809 assert!(is_html(fragment));
3810 }
3811 #[test]
3812 fn dont_be_bold() {
3813 let fragment = "<b>";
3814 assert!(is_html(fragment));
3815 }
3816
3817 #[test]
3818 fn rewrite_with_root() {
3819 let tests = [
3820 (
3821 "https://github.com/rust-ammonia/ammonia/blob/master/",
3822 "README.md",
3823 "",
3824 "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3825 ),
3826 (
3827 "https://github.com/rust-ammonia/ammonia/blob/master/",
3828 "README.md",
3829 "/",
3830 "https://github.com/rust-ammonia/ammonia/blob/master/",
3831 ),
3832 (
3833 "https://github.com/rust-ammonia/ammonia/blob/master/",
3834 "README.md",
3835 "/CONTRIBUTING.md",
3836 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3837 ),
3838 (
3839 "https://github.com/rust-ammonia/ammonia/blob/master",
3840 "README.md",
3841 "",
3842 "https://github.com/rust-ammonia/ammonia/blob/README.md",
3843 ),
3844 (
3845 "https://github.com/rust-ammonia/ammonia/blob/master",
3846 "README.md",
3847 "/",
3848 "https://github.com/rust-ammonia/ammonia/blob/",
3849 ),
3850 (
3851 "https://github.com/rust-ammonia/ammonia/blob/master",
3852 "README.md",
3853 "/CONTRIBUTING.md",
3854 "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md",
3855 ),
3856 (
3857 "https://github.com/rust-ammonia/ammonia/blob/master/",
3858 "",
3859 "",
3860 "https://github.com/rust-ammonia/ammonia/blob/master/",
3861 ),
3862 (
3863 "https://github.com/rust-ammonia/ammonia/blob/master/",
3864 "",
3865 "/",
3866 "https://github.com/rust-ammonia/ammonia/blob/master/",
3867 ),
3868 (
3869 "https://github.com/rust-ammonia/ammonia/blob/master/",
3870 "",
3871 "/CONTRIBUTING.md",
3872 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3873 ),
3874 (
3875 "https://github.com/",
3876 "rust-ammonia/ammonia/blob/master/README.md",
3877 "",
3878 "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3879 ),
3880 (
3881 "https://github.com/",
3882 "rust-ammonia/ammonia/blob/master/README.md",
3883 "/",
3884 "https://github.com/",
3885 ),
3886 (
3887 "https://github.com/",
3888 "rust-ammonia/ammonia/blob/master/README.md",
3889 "CONTRIBUTING.md",
3890 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3891 ),
3892 (
3893 "https://github.com/",
3894 "rust-ammonia/ammonia/blob/master/README.md",
3895 "/CONTRIBUTING.md",
3896 "https://github.com/CONTRIBUTING.md",
3897 ),
3898 ];
3899 for (root, path, url, result) in tests {
3900 let h = format!(r#"<a href="{url}">test</a>"#);
3901 let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#);
3902 let a = Builder::new()
3903 .url_relative(UrlRelative::RewriteWithRoot {
3904 root: Url::parse(root).unwrap(),
3905 path: path.to_string(),
3906 })
3907 .clean(&h)
3908 .to_string();
3909 if r != a {
3910 println!(
3911 "failed to check ({root}, {path}, {url}, {result})\n{r} != {a}",
3912 r = r
3913 );
3914 assert_eq!(r, a);
3915 }
3916 }
3917 }
3918}