From 0611aac45fae5a8f23d86f388b4b47b23cb7fd2a Mon Sep 17 00:00:00 2001
From: Nicole Tietz-Sokolskaya <me@ntietz.com>
Date: Mon, 3 Jun 2024 18:15:52 +0000
Subject: [PATCH] Escape HTML while parsing Markdown documents to remove XSS
 vulnerabilities (#4)

Here, I opted to use the Markdown parser's detection of HTML so that we don't add another library. This does limit users somewhat, because it means that *no* inline HTML is allowed, but I think this is acceptable: this is a platform for project management, not general-purpose publishing, so inline HTML is probably not necessary. There is a clear upgrade path in the future to add sanitizing instead of escaping tags, if we want.

This approach also gives us a clear place to plug in detection of extra things, like custom `@` tags or other features.

Reviewed-on: https://git.kittencollective.com/nicole/pique/pulls/4
---
 src/models/documents.rs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/models/documents.rs b/src/models/documents.rs
index 5674e8a..40ca3f5 100644
--- a/src/models/documents.rs
+++ b/src/models/documents.rs
@@ -25,8 +25,21 @@ impl Document {
 
         let parser = markdown::Parser::new_ext(&self.content, options);
 
+        // If we just process things as they are, we are vulnerable to XSS
+        // attacks, since users can inject any HTML they'd like. To prevent
+        // this, we convert any parsed HTML to just text. In the future, we can
+        // instead sanitize the HTML using something like
+        // [ammonia](https://crates.io/crates/ammonia) to make the HTML safer.
+        // Draws inspiration from
+        // [pulldown-cmark/pulldown-cmark#608](https://github.com/pulldown-cmark/pulldown-cmark/issues/608)
+        let escaped = parser.into_iter().map(|event| match event {
+            markdown::Event::Html(html) => markdown::Event::Text(html),
+            markdown::Event::InlineHtml(html) => markdown::Event::Text(html),
+            _ => event,
+        });
+
         let mut html_output = String::new();
-        markdown::html::push_html(&mut html_output, parser);
+        markdown::html::push_html(&mut html_output, escaped);
 
         html_output
     }