From 38086b981663e7c1d5434ecf79be92d0df111917 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 06:11:08 +0000 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[HIGH]?= =?UTF-8?q?=20Fix=20XSS=20via=20un-sanitized=20go-readability=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: lucasew <15693688+lucasew@users.noreply.github.com> --- .jules/sentinel.md | 1 + api/index.go | 38 ++++++++++++++++++++++++++++++++++---- api/xss_test.go | 39 +++++++++++++++++++++++++++++++++++++++ go.mod | 3 +++ go.sum | 6 ++++++ 5 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 api/xss_test.go diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 2c991fb..2fe1f70 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -13,3 +13,4 @@ **Learning:** `ip.IsPrivate()` and `ip.IsLoopback()` are not sufficient to block all local traffic. The concept of "Unspecified" addresses (all zeros) must also be explicitly handled when validating IPs for SSRF protection in Go. **Prevention:** When implementing a safe dialer to prevent SSRF, always include `ip.IsUnspecified()` in the list of blocked IP characteristics, in addition to private, loopback, and link-local addresses. +- 2024-03-01: [XSS via go-readability] go-readability output is not safe for direct rendering and must be sanitized using bluemonday before outputting HTML. diff --git a/api/index.go b/api/index.go index bf63012..5215fd7 100644 --- a/api/index.go +++ b/api/index.go @@ -26,6 +26,7 @@ import ( "codeberg.org/readeck/go-readability/v2" "github.com/mattn/godown" + "github.com/microcosm-cc/bluemonday" "golang.org/x/net/html" ) @@ -78,6 +79,15 @@ var ( */ ReadabilityParser = readability.NewParser() + /** + * HTMLSanitizer is the shared instance of the bluemonday HTML sanitizer. + * + * It uses the UGCPolicy to allow common, safe HTML tags and attributes + * while stripping out potentially malicious content like scripts and + * event handlers (e.g., onclick, onerror) to prevent XSS attacks. + */ + HTMLSanitizer = bluemonday.UGCPolicy() + // httpClient used for fetching remote articles with timeouts and redirect policy httpClient = &http.Client{ Transport: &http.Transport{ @@ -310,19 +320,29 @@ func Handler(w http.ResponseWriter, r *http.Request) { */ type formatHandler func(w http.ResponseWriter, article readability.Article, buf *bytes.Buffer) +/** + * sanitizeHTML strips malicious content from an HTML string. + */ +func sanitizeHTML(html string) string { + return HTMLSanitizer.Sanitize(html) +} + /** * formatHTML renders the article using the standard HTML template. * This is the default view for human consumption. */ func formatHTML(w http.ResponseWriter, article readability.Article, contentBuf *bytes.Buffer) { w.Header().Set("Content-Type", "text/html; charset=utf-8") + + sanitizedContent := sanitizeHTML(contentBuf.String()) + // inject safe HTML content data := struct { Title string Content template.HTML }{ Title: article.Title(), - Content: template.HTML(contentBuf.String()), + Content: template.HTML(sanitizedContent), } if err := DefaultTemplate.Execute(w, data); err != nil { // at this point, we can't write a JSON error, so we log it @@ -336,7 +356,11 @@ func formatHTML(w http.ResponseWriter, article readability.Article, contentBuf * */ func formatMarkdown(w http.ResponseWriter, _ readability.Article, buf *bytes.Buffer) { w.Header().Set("Content-Type", "text/markdown") - if err := godown.Convert(w, buf, nil); err != nil { + + sanitizedContent := sanitizeHTML(buf.String()) + sanitizedBuf := bytes.NewBufferString(sanitizedContent) + + if err := godown.Convert(w, sanitizedBuf, nil); err != nil { log.Printf("error converting to markdown: %v", err) } } @@ -347,9 +371,12 @@ func formatMarkdown(w http.ResponseWriter, _ readability.Article, buf *bytes.Buf */ func formatJSON(w http.ResponseWriter, article readability.Article, buf *bytes.Buffer) { w.Header().Set("Content-Type", "application/json") + + sanitizedContent := sanitizeHTML(buf.String()) + if err := json.NewEncoder(w).Encode(map[string]string{ "title": article.Title(), - "content": buf.String(), + "content": sanitizedContent, }); err != nil { log.Printf("error encoding json: %v", err) } @@ -360,7 +387,10 @@ func formatJSON(w http.ResponseWriter, article readability.Article, buf *bytes.B */ func formatText(w http.ResponseWriter, _ readability.Article, buf *bytes.Buffer) { w.Header().Set("Content-Type", "text/plain; charset=utf-8") - if _, err := w.Write(buf.Bytes()); err != nil { + + sanitizedContent := sanitizeHTML(buf.String()) + + if _, err := w.Write([]byte(sanitizedContent)); err != nil { log.Printf("error writing text response: %v", err) } } diff --git a/api/xss_test.go b/api/xss_test.go new file mode 100644 index 0000000..2d093b6 --- /dev/null +++ b/api/xss_test.go @@ -0,0 +1,39 @@ +package handler + +import ( + "bytes" + "strings" + "testing" +) + +// mockArticle implements readability.Article for testing +type mockArticle struct { + title string + html string +} + +func (m mockArticle) Title() string { + return m.title +} + +func (m mockArticle) RenderHTML(w *bytes.Buffer) error { + w.WriteString(m.html) + return nil +} + +func TestXSSHTMLSanitization(t *testing.T) { + // A mock article with malicious content + maliciousHTML := `

Hello

` + article := mockArticle{title: "Test", html: maliciousHTML} + + buf := &bytes.Buffer{} + if err := article.RenderHTML(buf); err != nil { + t.Fatalf("failed to render html: %v", err) + } + + sanitizedHTML := sanitizeHTML(buf.String()) + + if strings.Contains(sanitizedHTML, "