From 7f5e8c9c5fe4521149fe8c0313363226fcdded70 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 13 Feb 2026 07:22:27 +0000 Subject: [PATCH] Refactor: modularize API handler into internal packages Decomposed the monolithic `api/index.go` into domain-specific internal packages to improve maintainability, testability, and separation of concerns. - `internal/transport`: HTTP client configuration and SSRF protection (`NewSafeClient`). - `internal/article`: Article fetching and parsing logic (`Fetch`). - `internal/request`: Request parsing, validation, and URL reconstruction (`NormalizeURL`, `ReconstructURL`, `GetFormat`). - `internal/formatter`: Output formatting and rendering (`Render`, `Template`). - Updated `api/index.go` to serve as a clean orchestration layer using these packages. - Migrated and split tests into their respective package test files. - Added integration test in `api/index_test.go` to verify wiring. This refactoring adheres to the Single Responsibility Principle and makes the codebase easier to navigate and extend. Co-authored-by: lucasew <15693688+lucasew@users.noreply.github.com> --- api/index.go | 444 +----------------------------- api/index_test.go | 123 +-------- api/llm_test.go | 59 ---- api/reconstruct_test.go | 103 ------- internal/article/fetch.go | 100 +++++++ internal/article/fetch_test.go | 47 ++++ internal/formatter/render.go | 109 ++++++++ internal/request/utils.go | 155 +++++++++++ internal/request/utils_test.go | 170 ++++++++++++ internal/transport/client.go | 64 +++++ internal/transport/client_test.go | 54 ++++ 11 files changed, 719 insertions(+), 709 deletions(-) delete mode 100644 api/llm_test.go delete mode 100644 api/reconstruct_test.go create mode 100644 internal/article/fetch.go create mode 100644 internal/article/fetch_test.go create mode 100644 internal/formatter/render.go create mode 100644 internal/request/utils.go create mode 100644 internal/request/utils_test.go create mode 100644 internal/transport/client.go create mode 100644 internal/transport/client_test.go diff --git a/api/index.go b/api/index.go index 378c310..7f2ab57 100644 --- a/api/index.go +++ b/api/index.go @@ -11,261 +11,20 @@ import ( "bytes" "context" "encoding/json" - "errors" - "fmt" - "html/template" - "io" "log" - "math/rand" - "net" "net/http" - "net/url" - "strings" - "syscall" "time" - "codeberg.org/readeck/go-readability/v2" - "github.com/mattn/godown" - "golang.org/x/net/html" + "github.com/lucasew/readability-web/internal/article" + "github.com/lucasew/readability-web/internal/formatter" + "github.com/lucasew/readability-web/internal/request" + "github.com/lucasew/readability-web/internal/transport" ) const ( - maxRedirects = 5 - httpClientTimeout = 10 * time.Second - maxBodySize = int64(2 * 1024 * 1024) // 2 MiB - dialerTimeout = 30 * time.Second - dialerKeepAlive = 30 * time.Second - handlerTimeout = 5 * time.Second + handlerTimeout = 5 * time.Second ) -/** - * Template is the raw HTML template string used for rendering the article. - * - * It provides a minimal HTML5 structure and includes the Sakura CSS library - * for a clean, typography-focused reading experience without distractions. - * The template expects a struct with Title and Content fields. - */ -const Template = ` - - - - - - - - - -

{{.Title}}

- {{.Content}} - - -` - -var ( - /** - * DefaultTemplate is the parsed Go template instance. - * - * It is initialized at startup to avoid the overhead of parsing the template - * on every request, ensuring faster response times. - */ - DefaultTemplate = template.Must(template.New("article").Parse(Template)) - - /** - * ReadabilityParser is the shared instance of the readability parser. - * - * It is reusable and thread-safe, allowing concurrent processing of multiple - * requests without the need to create new parser instances. - */ - ReadabilityParser = readability.NewParser() - - // httpClient used for fetching remote articles with timeouts and redirect policy - httpClient = &http.Client{ - Transport: &http.Transport{ - DialContext: newSafeDialer().DialContext, - }, - Timeout: httpClientTimeout, - CheckRedirect: func(_ *http.Request, via []*http.Request) error { - if len(via) >= maxRedirects { - return fmt.Errorf("stopped after %d redirects", maxRedirects) - } - return nil - }, - } -) - -/** - * newSafeDialer creates a custom net.Dialer that prevents Server-Side Request Forgery (SSRF). - * - * It validates the resolved IP address before connecting, ensuring that it is not: - * - A private network address (e.g., 192.168.x.x, 10.x.x.x) - * - A loopback address (e.g., 127.0.0.1) - * - An unspecified address (e.g., 0.0.0.0) - * - * This validation happens *after* DNS resolution but *before* the connection is established. - * This prevents Time-of-Check Time-of-Use (TOCTOU) attacks where a domain could - * resolve to a safe IP during check but switch to a private IP during connection. - * - * This is critical for preventing the application from accessing internal services or metadata services - * (like AWS EC2 metadata) running on the same network. - */ -func newSafeDialer() *net.Dialer { - dialer := &net.Dialer{ - Timeout: dialerTimeout, - KeepAlive: dialerKeepAlive, - Control: func(_, address string, _ syscall.RawConn) error { - host, _, err := net.SplitHostPort(address) - if err != nil { - return err - } - ips, err := net.LookupIP(host) - if err != nil { - return err - } - for _, ip := range ips { - if ip.IsPrivate() || ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsUnspecified() { - return errors.New("refusing to connect to private network address") - } - } - return nil - }, - } - return dialer -} - -/** - * userAgentPool contains a list of real browser User-Agent strings. - * - * We rotate through these to mimic legitimate traffic, as many websites block requests - * from default HTTP clients (like Go-http-client) or known bot User-Agents. - * This list requires periodic maintenance to stay current with browser versions. - */ -var userAgentPool = []string{ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0", - "Mozilla/5.0 (iPhone; CPU iPhone OS 18_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", -} - -/** - * llmUserAgents contains a list of substring identifiers for known LLM bots and crawlers. - * - * This list is used to detect requests from AI agents (like GPTBot, Claude, etc.) - * so the application can automatically serve a token-efficient format (Markdown) - * instead of full HTML. - */ -var llmUserAgents = []string{ - "gptbot", - "chatgpt", - "claude", - "googlebot", - "bingbot", - "anthropic", - "perplexity", - "claudebot", - "github-copilot", -} - -/** - * getRandomUserAgent returns a random User-Agent string from the pool. - * - * Rotating User-Agents helps to evade simple anti-bot measures that block requests - * based on static or default Go HTTP client User-Agents. - */ -func getRandomUserAgent() string { - return userAgentPool[rand.Intn(len(userAgentPool))] -} - -/** - * fetchAndParse retrieves the content from the target URL and parses it using the readability library. - * - * Key behaviors: - * - Spoofs User-Agent and other browser headers to avoid blocking. - * - Forwards Accept-Language from the client to respect language preferences. - * - Sets security headers (Sec-Fetch-*) to look like a navigation request. - * - Limits the response body size to maxBodySize to prevent Out-Of-Memory (OOM) crashes on large pages. - * - Uses a custom httpClient with SSRF protection. - */ -func fetchAndParse(ctx context.Context, link *url.URL, r *http.Request) (readability.Article, error) { - req, err := http.NewRequestWithContext(ctx, "GET", link.String(), nil) - if err != nil { - return readability.Article{}, err - } - - // Always spoof everything to look like a real browser - ua := getRandomUserAgent() - req.Header.Set("User-Agent", ua) - req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") - - // Fallback headers from client request - if lang := r.Header.Get("Accept-Language"); lang != "" { - req.Header.Set("Accept-Language", lang) - } else { - req.Header.Set("Accept-Language", "en-US,en;q=0.9") - } - - req.Header.Set("Cache-Control", "no-cache") - req.Header.Set("Pragma", "no-cache") - req.Header.Set("Sec-Ch-Ua-Mobile", "?0") - req.Header.Set("Sec-Fetch-Dest", "document") - req.Header.Set("Sec-Fetch-Mode", "navigate") - req.Header.Set("Sec-Fetch-Site", "none") - req.Header.Set("Sec-Fetch-User", "?1") - req.Header.Set("Upgrade-Insecure-Requests", "1") - - res, err := httpClient.Do(req) - if err != nil { - return readability.Article{}, err - } - defer res.Body.Close() - - // limit body size to prevent OOM - reader := io.LimitReader(res.Body, maxBodySize) - node, err := html.Parse(reader) - if err != nil { - return readability.Article{}, err - } - - return ReadabilityParser.ParseDocument(node, link) -} - -/** - * normalizeAndValidateURL cleans and validates the user-provided URL. - * - * It handles common normalization issues, such as: - * - Missing scheme (defaults to https://). - * - Malformed schemes caused by some proxies (e.g., http:/example.com -> http://example.com). - * - * It also restricts the scheme to 'http' or 'https' to prevent usage of other protocols like 'file://' or 'gopher://'. - */ -func normalizeAndValidateURL(rawLink string) (*url.URL, error) { - if rawLink == "" { - return nil, errors.New("url parameter is empty") - } - - // Fix browser/proxy normalization of :// to :/ - if strings.HasPrefix(rawLink, "http:/") && !strings.HasPrefix(rawLink, "http://") { - rawLink = "http://" + rawLink[6:] - } else if strings.HasPrefix(rawLink, "https:/") && !strings.HasPrefix(rawLink, "https://") { - rawLink = "https://" + rawLink[7:] - } - - // add scheme if missing - if !strings.Contains(rawLink, "://") { - // default to https if no scheme provided - rawLink = fmt.Sprintf("https://%s", rawLink) - } - link, err := url.Parse(rawLink) - if err != nil { - return nil, fmt.Errorf("invalid URL: %w", err) - } - // only allow http(s) - if link.Scheme != "http" && link.Scheme != "https" { - return nil, errors.New("unsupported URL scheme") - } - return link, nil -} - /** * securityHeadersMiddleware applies a baseline of security headers to every response. * @@ -300,183 +59,6 @@ func Handler(w http.ResponseWriter, r *http.Request) { securityHeadersMiddleware(http.HandlerFunc(handler)).ServeHTTP(w, r) } -/** - * formatHandler defines the function signature for handling different output formats. - * - * Implementations are responsible for: - * 1. Setting the appropriate Content-Type header. - * 2. Encoding the article content (HTML, JSON, Markdown, etc.) into the response writer. - * 3. Handling any encoding errors (logging them, as headers are already written). - */ -type formatHandler func(w http.ResponseWriter, article readability.Article, buf *bytes.Buffer) - -/** - * formatHTML renders the article using the standard HTML template. - * This is the default view for human consumption. - */ -func formatHTML(w http.ResponseWriter, article readability.Article, contentBuf *bytes.Buffer) { - w.Header().Set("Content-Type", "text/html; charset=utf-8") - // inject safe HTML content - data := struct { - Title string - Content template.HTML - }{ - Title: article.Title(), - Content: template.HTML(contentBuf.String()), - } - if err := DefaultTemplate.Execute(w, data); err != nil { - // at this point, we can't write a JSON error, so we log it - log.Printf("error executing HTML template: %v", err) - } -} - -/** - * formatMarkdown converts the article content to Markdown. - * Useful for LLMs or note-taking applications. - */ -func formatMarkdown(w http.ResponseWriter, _ readability.Article, buf *bytes.Buffer) { - w.Header().Set("Content-Type", "text/markdown") - if err := godown.Convert(w, buf, nil); err != nil { - log.Printf("error converting to markdown: %v", err) - } -} - -/** - * formatJSON returns the raw title and HTML content in a JSON object. - * Useful for programmatic consumption where the client wants to handle rendering. - */ -func formatJSON(w http.ResponseWriter, article readability.Article, buf *bytes.Buffer) { - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(map[string]string{ - "title": article.Title(), - "content": buf.String(), - }); err != nil { - log.Printf("error encoding json: %v", err) - } -} - -/** - * formatText returns the plain text content, stripped of HTML tags. - */ -func formatText(w http.ResponseWriter, _ readability.Article, buf *bytes.Buffer) { - w.Header().Set("Content-Type", "text/plain; charset=utf-8") - if _, err := w.Write(buf.Bytes()); err != nil { - log.Printf("error writing text response: %v", err) - } -} - -/** - * formatters maps format names (including aliases) to their respective handler functions. - * - * This design allows for easy extensibility of output formats. New formats can be - * added by implementing a formatHandler and registering it here. - */ -var formatters = map[string]formatHandler{ - "html": formatHTML, - "md": formatMarkdown, - "markdown": formatMarkdown, - "json": formatJSON, - "text": formatText, - "txt": formatText, -} - -/** - * isLLM attempts to detect if the request is originated from a known LLM crawler or tool. - * - * It checks the User-Agent string against a list of known identifiers (e.g., GPTBot, Claude). - * This allows the application to default to a machine-friendly format (Markdown) automatically. - */ -func isLLM(r *http.Request) bool { - ua := strings.ToLower(r.UserAgent()) - for _, s := range llmUserAgents { - if strings.Contains(ua, s) { - return true - } - } - return false -} - -/** - * getFormat determines the desired output format based on request signals. - * - * Priority order: - * 1. Query parameter 'format' (explicit override). - * 2. Accept Header (content negotiation). - * 3. LLM Detection (auto-switch to Markdown for bots). - * 4. Default to 'html'. - */ -func getFormat(r *http.Request) string { - // 1. Priority: Query parameter - format := r.URL.Query().Get("format") - if format != "" { - return format - } - - // 2. Priority: Accept Header - accept := strings.ToLower(r.Header.Get("Accept")) - if strings.Contains(accept, "application/json") { - return "json" - } - if strings.Contains(accept, "text/markdown") || strings.Contains(accept, "text/x-markdown") { - return "md" - } - if strings.Contains(accept, "text/plain") { - return "text" - } - if strings.Contains(accept, "text/html") { - return "html" - } - - // 3. Priority: LLM Detection (defaults to markdown) - if isLLM(r) { - return "md" - } - - return "html" -} - -/** - * reconstructTargetURL handles query parameter extraction quirks caused by Vercel rewrites. - * - * When Vercel rewrites a path like `/api/extract?url=http://example.com?foo=bar`, - * the `url` query parameter might be cleanly separated from `foo=bar`. - * This function merges stray query parameters back into the target URL to ensure - * the full original URL is processed. - */ -func reconstructTargetURL(r *http.Request) string { - rawLink := r.URL.Query().Get("url") - if rawLink == "" { - return "" - } - - // Reconstruct URL if it was split by query parameters during rewrite - u, err := url.Parse(rawLink) - if err != nil { - return rawLink - } - - targetQuery := u.Query() - originalQuery := r.URL.Query() - hasChanges := false - for k, vs := range originalQuery { - // Skip 'url' and 'format' as they are control parameters for this API, - // not part of the target website's query string. - // Including them would cause recursion or invalid target URLs. - if k == "url" || k == "format" { - continue - } - hasChanges = true - for _, v := range vs { - targetQuery.Add(k, v) - } - } - if hasChanges { - u.RawQuery = targetQuery.Encode() - return u.String() - } - return rawLink -} - /** * handler implements the core request processing pipeline. * @@ -489,12 +71,12 @@ func reconstructTargetURL(r *http.Request) string { * 6. Format: Outputs the result in the requested format (HTML, Markdown, JSON, etc.). */ func handler(w http.ResponseWriter, r *http.Request) { - rawLink := reconstructTargetURL(r) + rawLink := request.ReconstructURL(r) - format := getFormat(r) + format := request.GetFormat(r) log.Printf("request: %s %s", format, rawLink) - link, err := normalizeAndValidateURL(rawLink) + link, err := request.NormalizeURL(rawLink) if err != nil { log.Printf("error normalizing URL %q: %v", rawLink, err) writeError(w, http.StatusBadRequest, "Invalid URL provided") @@ -504,7 +86,8 @@ func handler(w http.ResponseWriter, r *http.Request) { ctx, cancel := context.WithTimeout(r.Context(), handlerTimeout) defer cancel() - article, err := fetchAndParse(ctx, link, r) + safeClient := transport.NewSafeClient() + art, err := article.Fetch(ctx, link, r, safeClient) if err != nil { log.Printf("error fetching or parsing URL %q: %v", rawLink, err) writeError(w, http.StatusUnprocessableEntity, "Failed to process URL") @@ -512,17 +95,16 @@ func handler(w http.ResponseWriter, r *http.Request) { } contentBuf := &bytes.Buffer{} - if err := article.RenderHTML(contentBuf); err != nil { + if err := art.RenderHTML(contentBuf); err != nil { writeError(w, http.StatusInternalServerError, "failed to render article content") return } - formatter, found := formatters[format] - if !found { + if err := formatter.Render(w, art, contentBuf, format); err != nil { + log.Printf("error rendering response: %v", err) writeError(w, http.StatusBadRequest, "invalid format") return } - formatter(w, article, contentBuf) } /** diff --git a/api/index_test.go b/api/index_test.go index e606bf1..7f817eb 100644 --- a/api/index_test.go +++ b/api/index_test.go @@ -1,128 +1,19 @@ package handler import ( - "context" "net/http" "net/http/httptest" - "net/url" - "strings" "testing" ) -func TestNormalizeAndValidateURL(t *testing.T) { - tests := []struct { - raw string - want string // expected host (with scheme) - shouldErr bool - }{ - {"", "", true}, - {"example.com", "https://example.com", false}, - {"http://foo.bar", "http://foo.bar", false}, - {"https:/go.dev/play", "https://go.dev", false}, - {"http:/example.com", "http://example.com", false}, - {"ftp://foo.bar", "", true}, - } - for _, tt := range tests { - u, err := normalizeAndValidateURL(tt.raw) - if tt.shouldErr { - if err == nil { - t.Errorf("normalizeAndValidateURL(%q) expected error, got none", tt.raw) - } - continue - } - if err != nil { - t.Errorf("normalizeAndValidateURL(%q) unexpected error: %v", tt.raw, err) - continue - } - got := u.Scheme + "://" + u.Host - if got != tt.want { - t.Errorf("normalizeAndValidateURL(%q) = %q; want %q", tt.raw, got, tt.want) - } - } -} - -func TestFetchAndParse(t *testing.T) { - // Serve a minimal HTML page - htmlBody := `Test Title

Hello World

` - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - if _, err := w.Write([]byte(htmlBody)); err != nil { - t.Errorf("failed to write response: %v", err) - } - })) - defer srv.Close() - - // Override httpClient to use server's client - oldClient := httpClient - httpClient = srv.Client() - defer func() { httpClient = oldClient }() - - u, err := url.Parse(srv.URL) - if err != nil { - t.Fatalf("failed to parse server URL: %v", err) - } - ctx := context.Background() - req := httptest.NewRequest("GET", "/", nil) - art, err := fetchAndParse(ctx, u, req) - if err != nil { - t.Fatalf("fetchAndParse returned error: %v", err) - } - if art.Title() != "Test Title" { - t.Errorf("Article.Title() = %q; want %q", art.Title(), "Test Title") - } - - var content strings.Builder - err = art.RenderHTML(&content) - if err != nil { - t.Fatalf("failed to render article content: %v", err) - } - - if !strings.Contains(content.String(), "

Hello World") { - t.Errorf("Article.Content missing expected paragraph, got: %q", content.String()) - } -} - -/** - * TestSSRFProtection confirms that the custom dialer correctly blocks connections - * to private and loopback IP addresses. - * - * This is a critical security control to prevent the application from being used - * as a proxy to attack internal infrastructure (SSRF). - */ -func TestSSRFProtection(t *testing.T) { - // a dummy server that should never be reached - srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { - t.Fatal("dialer did not block private IP, connection was made") - })) - defer srv.Close() +func TestHandler_InvalidURL(t *testing.T) { + req := httptest.NewRequest("GET", "/api?url=", nil) + w := httptest.NewRecorder() - // get loopback address of the server - // srv.URL will be something like http://127.0.0.1:54321 - // we want to test if the dialer blocks the connection to 127.0.0.1 - // so, we don't use the server's client, we use our own httpClient - req, err := http.NewRequest("GET", srv.URL, nil) - if err != nil { - t.Fatalf("failed to create request: %v", err) - } + Handler(w, req) - _, err = httpClient.Do(req) - if err == nil { - t.Fatal("expected an error when dialing a private IP, but got none") - } - // check if the error is the one we expect from our dialer - // the error is wrapped, so we need to check for the substring - if !strings.Contains(err.Error(), "refusing to connect to private network address") { - t.Errorf("expected error to contain 'refusing to connect to private network address', but got: %v", err) - } - - // Test Unspecified IP (0.0.0.0) bypass attempt - // We manually construct a URL with 0.0.0.0 and a port (it doesn't need to be open for the check to fire) - unspecifiedURL := "http://0.0.0.0:8080" - reqUnspecified, _ := http.NewRequest("GET", unspecifiedURL, nil) - _, err = httpClient.Do(reqUnspecified) - if err == nil { - t.Fatal("expected an error when dialing 0.0.0.0, but got none") - } - if !strings.Contains(err.Error(), "refusing to connect to private network address") { - t.Errorf("expected error for 0.0.0.0 to contain 'refusing to connect to private network address', but got: %v", err) + resp := w.Result() + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("Handler() status = %v; want %v", resp.StatusCode, http.StatusBadRequest) } } diff --git a/api/llm_test.go b/api/llm_test.go deleted file mode 100644 index 1079558..0000000 --- a/api/llm_test.go +++ /dev/null @@ -1,59 +0,0 @@ -package handler - -import ( - "net/http/httptest" - "testing" -) - -/** - * TestIsLLM verifies the detection of Large Language Model (LLM) bots. - * - * This ensures that when an LLM (like GPTBot) accesses the service, it - * automatically receives Markdown content, which is more token-efficient - * and easier for the model to process than full HTML. - */ -func TestIsLLM(t *testing.T) { - tests := []struct { - ua string - want bool - }{ - {"Mozilla/5.0 (compatible; GPTBot/1.0; +https://openai.com/gptbot)", true}, - {"ChatGPT-User/1.0", true}, - {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", false}, - } - - for _, tt := range tests { - req := httptest.NewRequest("GET", "/", nil) - req.Header.Set("User-Agent", tt.ua) - if got := isLLM(req); got != tt.want { - t.Errorf("isLLM(UA=%q) = %v; want %v", tt.ua, tt.want, got) - } - } -} - -func TestGetFormat(t *testing.T) { - tests := []struct { - urlStr string - ua string - accept string - want string - }{ - {"/api?url=...&format=json", "", "", "json"}, - {"/api?url=...", "ChatGPT-User/1.0", "", "md"}, - {"/api?url=...", "Mozilla/5.0", "", "html"}, - {"/api?url=...", "Mozilla/5.0", "application/json", "json"}, - {"/api?url=...", "Mozilla/5.0", "text/markdown", "md"}, - {"/api?url=...", "Mozilla/5.0", "text/plain", "text"}, - // Query param should override Accept - {"/api?url=...&format=txt", "Mozilla/5.0", "application/json", "txt"}, - } - - for _, tt := range tests { - req := httptest.NewRequest("GET", tt.urlStr, nil) - req.Header.Set("User-Agent", tt.ua) - req.Header.Set("Accept", tt.accept) - if got := getFormat(req); got != tt.want { - t.Errorf("getFormat(%q, UA=%q, Accept=%q) = %q; want %q", tt.urlStr, tt.ua, tt.accept, got, tt.want) - } - } -} diff --git a/api/reconstruct_test.go b/api/reconstruct_test.go deleted file mode 100644 index c510265..0000000 --- a/api/reconstruct_test.go +++ /dev/null @@ -1,103 +0,0 @@ -package handler - -import ( - "net/http" - "net/url" - "reflect" - "testing" -) - -/** - * TestReconstructTargetURL verifies the logic for reassembling URLs that have been - * split by Vercel's rewrite rules. - * - * When Vercel rewrites a request like `/api?url=http://example.com?foo=bar`, - * it parses the query string *before* passing it to the Go handler. This often - * results in `url=http://example.com` and `foo=bar` being treated as separate - * parameters, rather than `foo=bar` being part of the `url` value. - * - * The reconstruction logic detects these "stray" parameters and merges them - * back into the target URL to ensure the fetcher requests the correct resource. - */ -func TestReconstructTargetURL(t *testing.T) { - tests := []struct { - name string - query string - expected string - }{ - { - name: "simple url", - query: "?url=http://example.com", - expected: "http://example.com", - }, - { - name: "url with encoded params", - query: "?url=http%3A%2F%2Fexample.com%3Ffoo%3Dbar", - expected: "http://example.com?foo=bar", - }, - { - name: "split params", - query: "?url=http://example.com&foo=bar&baz=qux", - expected: "http://example.com?foo=bar&baz=qux", - }, - { - name: "split params with existing params", - query: "?url=http://example.com?a=b&c=d", - expected: "http://example.com?a=b&c=d", - }, - { - name: "mixed params", - query: "?url=http%3A%2F%2Fexample.com%3Fa%3Db&c=d", - expected: "http://example.com?a=b&c=d", - }, - { - name: "ignore format param", - query: "?url=http://example.com&format=json&foo=bar", - expected: "http://example.com?foo=bar", - }, - { - name: "empty url", - query: "?format=json", - expected: "", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - u, _ := url.Parse("http://localhost/api" + tt.query) - r := &http.Request{URL: u} - got := reconstructTargetURL(r) - - if got == "" && tt.expected == "" { - return - } - - gotU, _ := url.Parse(got) - expU, _ := url.Parse(tt.expected) - - if gotU == nil || expU == nil { - if got != tt.expected { - t.Errorf("reconstructTargetURL() = %v, want %v", got, tt.expected) - } - return - } - - if gotU.Scheme != expU.Scheme || gotU.Host != expU.Host || gotU.Path != expU.Path { - t.Errorf("reconstructTargetURL() base mismatch = %v, want %v", got, tt.expected) - } - - gotQ := gotU.Query() - expQ := expU.Query() - - if len(gotQ) != len(expQ) { - t.Errorf("reconstructTargetURL() query length mismatch = %v, want %v", gotQ, expQ) - } - - for k, v := range expQ { - if !reflect.DeepEqual(gotQ[k], v) { - t.Errorf("reconstructTargetURL() param %s mismatch = %v, want %v", k, gotQ[k], v) - } - } - }) - } -} diff --git a/internal/article/fetch.go b/internal/article/fetch.go new file mode 100644 index 0000000..d1699e3 --- /dev/null +++ b/internal/article/fetch.go @@ -0,0 +1,100 @@ +package article + +import ( + "context" + "io" + "math/rand" + "net/http" + "net/url" + + "codeberg.org/readeck/go-readability/v2" + "golang.org/x/net/html" +) + +const maxBodySize = int64(2 * 1024 * 1024) // 2 MiB + +/** + * ReadabilityParser is the shared instance of the readability parser. + * + * It is reusable and thread-safe, allowing concurrent processing of multiple + * requests without the need to create new parser instances. + */ +var ReadabilityParser = readability.NewParser() + +/** + * userAgentPool contains a list of real browser User-Agent strings. + * + * We rotate through these to mimic legitimate traffic, as many websites block requests + * from default HTTP clients (like Go-http-client) or known bot User-Agents. + * This list requires periodic maintenance to stay current with browser versions. + */ +var userAgentPool = []string{ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", +} + +/** + * getRandomUserAgent returns a random User-Agent string from the pool. + * + * Rotating User-Agents helps to evade simple anti-bot measures that block requests + * based on static or default Go HTTP client User-Agents. + */ +func getRandomUserAgent() string { + return userAgentPool[rand.Intn(len(userAgentPool))] +} + +/** + * Fetch retrieves the content from the target URL and parses it using the readability library. + * + * Key behaviors: + * - Spoofs User-Agent and other browser headers to avoid blocking. + * - Forwards Accept-Language from the client to respect language preferences. + * - Sets security headers (Sec-Fetch-*) to look like a navigation request. + * - Limits the response body size to maxBodySize to prevent Out-Of-Memory (OOM) crashes on large pages. + * - Uses the provided httpClient which should have SSRF protection configured. + */ +func Fetch(ctx context.Context, link *url.URL, r *http.Request, client *http.Client) (readability.Article, error) { + req, err := http.NewRequestWithContext(ctx, "GET", link.String(), nil) + if err != nil { + return readability.Article{}, err + } + + // Always spoof everything to look like a real browser + ua := getRandomUserAgent() + req.Header.Set("User-Agent", ua) + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") + + // Fallback headers from client request + if lang := r.Header.Get("Accept-Language"); lang != "" { + req.Header.Set("Accept-Language", lang) + } else { + req.Header.Set("Accept-Language", "en-US,en;q=0.9") + } + + req.Header.Set("Cache-Control", "no-cache") + req.Header.Set("Pragma", "no-cache") + req.Header.Set("Sec-Ch-Ua-Mobile", "?0") + req.Header.Set("Sec-Fetch-Dest", "document") + req.Header.Set("Sec-Fetch-Mode", "navigate") + req.Header.Set("Sec-Fetch-Site", "none") + req.Header.Set("Sec-Fetch-User", "?1") + req.Header.Set("Upgrade-Insecure-Requests", "1") + + res, err := client.Do(req) + if err != nil { + return readability.Article{}, err + } + defer res.Body.Close() + + // limit body size to prevent OOM + reader := io.LimitReader(res.Body, maxBodySize) + node, err := html.Parse(reader) + if err != nil { + return readability.Article{}, err + } + + return ReadabilityParser.ParseDocument(node, link) +} diff --git a/internal/article/fetch_test.go b/internal/article/fetch_test.go new file mode 100644 index 0000000..a2c043b --- /dev/null +++ b/internal/article/fetch_test.go @@ -0,0 +1,47 @@ +package article + +import ( + "context" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" +) + +func TestFetch(t *testing.T) { + // Serve a minimal HTML page + htmlBody := `Test Title

Hello World

` + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + if _, err := w.Write([]byte(htmlBody)); err != nil { + t.Errorf("failed to write response: %v", err) + } + })) + defer srv.Close() + + u, err := url.Parse(srv.URL) + if err != nil { + t.Fatalf("failed to parse server URL: %v", err) + } + ctx := context.Background() + req := httptest.NewRequest("GET", "/", nil) + + // Use server's client which is configured to talk to the test server + art, err := Fetch(ctx, u, req, srv.Client()) + if err != nil { + t.Fatalf("Fetch returned error: %v", err) + } + if art.Title() != "Test Title" { + t.Errorf("Article.Title() = %q; want %q", art.Title(), "Test Title") + } + + var content strings.Builder + err = art.RenderHTML(&content) + if err != nil { + t.Fatalf("failed to render article content: %v", err) + } + + if !strings.Contains(content.String(), "

Hello World") { + t.Errorf("Article.Content missing expected paragraph, got: %q", content.String()) + } +} diff --git a/internal/formatter/render.go b/internal/formatter/render.go new file mode 100644 index 0000000..862862c --- /dev/null +++ b/internal/formatter/render.go @@ -0,0 +1,109 @@ +package formatter + +import ( + "bytes" + "encoding/json" + "fmt" + "html/template" + "log" + "net/http" + + "codeberg.org/readeck/go-readability/v2" + "github.com/mattn/godown" +) + +/** + * Template is the raw HTML template string used for rendering the article. + * + * It provides a minimal HTML5 structure and includes the Sakura CSS library + * for a clean, typography-focused reading experience without distractions. + * The template expects a struct with Title and Content fields. + */ +const Template = ` + + + + + + + + + +

{{.Title}}

+ {{.Content}} + + +` + +/** + * DefaultTemplate is the parsed Go template instance. + * + * It is initialized at startup to avoid the overhead of parsing the template + * on every request, ensuring faster response times. + */ +var DefaultTemplate = template.Must(template.New("article").Parse(Template)) + +/** + * formatHandler defines the function signature for handling different output formats. + */ +type formatHandler func(w http.ResponseWriter, article readability.Article, buf *bytes.Buffer) + +var formatters = map[string]formatHandler{ + "html": formatHTML, + "md": formatMarkdown, + "markdown": formatMarkdown, + "json": formatJSON, + "text": formatText, + "txt": formatText, +} + +/** + * Render outputs the article in the requested format. + */ +func Render(w http.ResponseWriter, article readability.Article, contentBuf *bytes.Buffer, format string) error { + formatter, found := formatters[format] + if !found { + return fmt.Errorf("invalid format: %s", format) + } + formatter(w, article, contentBuf) + return nil +} + +func formatHTML(w http.ResponseWriter, article readability.Article, contentBuf *bytes.Buffer) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + // inject safe HTML content + data := struct { + Title string + Content template.HTML + }{ + Title: article.Title(), + Content: template.HTML(contentBuf.String()), + } + if err := DefaultTemplate.Execute(w, data); err != nil { + log.Printf("error executing HTML template: %v", err) + } +} + +func formatMarkdown(w http.ResponseWriter, _ readability.Article, buf *bytes.Buffer) { + w.Header().Set("Content-Type", "text/markdown") + if err := godown.Convert(w, buf, nil); err != nil { + log.Printf("error converting to markdown: %v", err) + } +} + +func formatJSON(w http.ResponseWriter, article readability.Article, buf *bytes.Buffer) { + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(map[string]string{ + "title": article.Title(), + "content": buf.String(), + }); err != nil { + log.Printf("error encoding json: %v", err) + } +} + +func formatText(w http.ResponseWriter, _ readability.Article, buf *bytes.Buffer) { + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + if _, err := w.Write(buf.Bytes()); err != nil { + log.Printf("error writing text response: %v", err) + } +} diff --git a/internal/request/utils.go b/internal/request/utils.go new file mode 100644 index 0000000..abbb759 --- /dev/null +++ b/internal/request/utils.go @@ -0,0 +1,155 @@ +package request + +import ( + "errors" + "fmt" + "net/http" + "net/url" + "strings" +) + +var llmUserAgents = []string{ + "gptbot", + "chatgpt", + "claude", + "googlebot", + "bingbot", + "anthropic", + "perplexity", + "claudebot", + "github-copilot", +} + +/** + * IsLLM attempts to detect if the request is originated from a known LLM crawler or tool. + * + * It checks the User-Agent string against a list of known identifiers (e.g., GPTBot, Claude). + * This allows the application to default to a machine-friendly format (Markdown) automatically. + */ +func IsLLM(r *http.Request) bool { + ua := strings.ToLower(r.UserAgent()) + for _, s := range llmUserAgents { + if strings.Contains(ua, s) { + return true + } + } + return false +} + +/** + * GetFormat determines the desired output format based on request signals. + * + * Priority order: + * 1. Query parameter 'format' (explicit override). + * 2. Accept Header (content negotiation). + * 3. LLM Detection (auto-switch to Markdown for bots). + * 4. Default to 'html'. + */ +func GetFormat(r *http.Request) string { + // 1. Priority: Query parameter + format := r.URL.Query().Get("format") + if format != "" { + return format + } + + // 2. Priority: Accept Header + accept := strings.ToLower(r.Header.Get("Accept")) + if strings.Contains(accept, "application/json") { + return "json" + } + if strings.Contains(accept, "text/markdown") || strings.Contains(accept, "text/x-markdown") { + return "md" + } + if strings.Contains(accept, "text/plain") { + return "text" + } + if strings.Contains(accept, "text/html") { + return "html" + } + + // 3. Priority: LLM Detection (defaults to markdown) + if IsLLM(r) { + return "md" + } + + return "html" +} + +/** + * ReconstructURL handles query parameter extraction quirks caused by Vercel rewrites. + * + * When Vercel rewrites a path like `/api/extract?url=http://example.com?foo=bar`, + * the `url` query parameter might be cleanly separated from `foo=bar`. + * This function merges stray query parameters back into the target URL to ensure + * the full original URL is processed. + */ +func ReconstructURL(r *http.Request) string { + rawLink := r.URL.Query().Get("url") + if rawLink == "" { + return "" + } + + // Reconstruct URL if it was split by query parameters during rewrite + u, err := url.Parse(rawLink) + if err != nil { + return rawLink + } + + targetQuery := u.Query() + originalQuery := r.URL.Query() + hasChanges := false + for k, vs := range originalQuery { + // Skip 'url' and 'format' as they are control parameters for this API, + // not part of the target website's query string. + // Including them would cause recursion or invalid target URLs. + if k == "url" || k == "format" { + continue + } + hasChanges = true + for _, v := range vs { + targetQuery.Add(k, v) + } + } + if hasChanges { + u.RawQuery = targetQuery.Encode() + return u.String() + } + return rawLink +} + +/** + * NormalizeURL cleans and validates the user-provided URL. + * + * It handles common normalization issues, such as: + * - Missing scheme (defaults to https://). + * - Malformed schemes caused by some proxies (e.g., http:/example.com -> http://example.com). + * + * It also restricts the scheme to 'http' or 'https' to prevent usage of other protocols like 'file://' or 'gopher://'. + */ +func NormalizeURL(rawLink string) (*url.URL, error) { + if rawLink == "" { + return nil, errors.New("url parameter is empty") + } + + // Fix browser/proxy normalization of :// to :/ + if strings.HasPrefix(rawLink, "http:/") && !strings.HasPrefix(rawLink, "http://") { + rawLink = "http://" + rawLink[6:] + } else if strings.HasPrefix(rawLink, "https:/") && !strings.HasPrefix(rawLink, "https://") { + rawLink = "https://" + rawLink[7:] + } + + // add scheme if missing + if !strings.Contains(rawLink, "://") { + // default to https if no scheme provided + rawLink = fmt.Sprintf("https://%s", rawLink) + } + link, err := url.Parse(rawLink) + if err != nil { + return nil, fmt.Errorf("invalid URL: %w", err) + } + // only allow http(s) + if link.Scheme != "http" && link.Scheme != "https" { + return nil, errors.New("unsupported URL scheme") + } + return link, nil +} diff --git a/internal/request/utils_test.go b/internal/request/utils_test.go new file mode 100644 index 0000000..55d4713 --- /dev/null +++ b/internal/request/utils_test.go @@ -0,0 +1,170 @@ +package request + +import ( + "net/http" + "net/http/httptest" + "net/url" + "reflect" + "testing" +) + +func TestIsLLM(t *testing.T) { + tests := []struct { + ua string + want bool + }{ + {"Mozilla/5.0 (compatible; GPTBot/1.0; +https://openai.com/gptbot)", true}, + {"ChatGPT-User/1.0", true}, + {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", false}, + } + + for _, tt := range tests { + req := httptest.NewRequest("GET", "/", nil) + req.Header.Set("User-Agent", tt.ua) + if got := IsLLM(req); got != tt.want { + t.Errorf("IsLLM(UA=%q) = %v; want %v", tt.ua, tt.want, got) + } + } +} + +func TestGetFormat(t *testing.T) { + tests := []struct { + urlStr string + ua string + accept string + want string + }{ + {"/api?url=...&format=json", "", "", "json"}, + {"/api?url=...", "ChatGPT-User/1.0", "", "md"}, + {"/api?url=...", "Mozilla/5.0", "", "html"}, + {"/api?url=...", "Mozilla/5.0", "application/json", "json"}, + {"/api?url=...", "Mozilla/5.0", "text/markdown", "md"}, + {"/api?url=...", "Mozilla/5.0", "text/plain", "text"}, + // Query param should override Accept + {"/api?url=...&format=txt", "Mozilla/5.0", "application/json", "txt"}, + } + + for _, tt := range tests { + req := httptest.NewRequest("GET", tt.urlStr, nil) + req.Header.Set("User-Agent", tt.ua) + req.Header.Set("Accept", tt.accept) + if got := GetFormat(req); got != tt.want { + t.Errorf("GetFormat(%q, UA=%q, Accept=%q) = %q; want %q", tt.urlStr, tt.ua, tt.accept, got, tt.want) + } + } +} + +func TestReconstructTargetURL(t *testing.T) { + tests := []struct { + name string + query string + expected string + }{ + { + name: "simple url", + query: "?url=http://example.com", + expected: "http://example.com", + }, + { + name: "url with encoded params", + query: "?url=http%3A%2F%2Fexample.com%3Ffoo%3Dbar", + expected: "http://example.com?foo=bar", + }, + { + name: "split params", + query: "?url=http://example.com&foo=bar&baz=qux", + expected: "http://example.com?foo=bar&baz=qux", + }, + { + name: "split params with existing params", + query: "?url=http://example.com?a=b&c=d", + expected: "http://example.com?a=b&c=d", + }, + { + name: "mixed params", + query: "?url=http%3A%2F%2Fexample.com%3Fa%3Db&c=d", + expected: "http://example.com?a=b&c=d", + }, + { + name: "ignore format param", + query: "?url=http://example.com&format=json&foo=bar", + expected: "http://example.com?foo=bar", + }, + { + name: "empty url", + query: "?format=json", + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + u, _ := url.Parse("http://localhost/api" + tt.query) + r := &http.Request{URL: u} + got := ReconstructURL(r) + + if got == "" && tt.expected == "" { + return + } + + gotU, _ := url.Parse(got) + expU, _ := url.Parse(tt.expected) + + if gotU == nil || expU == nil { + if got != tt.expected { + t.Errorf("ReconstructURL() = %v, want %v", got, tt.expected) + } + return + } + + if gotU.Scheme != expU.Scheme || gotU.Host != expU.Host || gotU.Path != expU.Path { + t.Errorf("ReconstructURL() base mismatch = %v, want %v", got, tt.expected) + } + + gotQ := gotU.Query() + expQ := expU.Query() + + if len(gotQ) != len(expQ) { + t.Errorf("ReconstructURL() query length mismatch = %v, want %v", gotQ, expQ) + } + + for k, v := range expQ { + if !reflect.DeepEqual(gotQ[k], v) { + t.Errorf("ReconstructURL() param %s mismatch = %v, want %v", k, gotQ[k], v) + } + } + }) + } +} + +func TestNormalizeURL(t *testing.T) { + tests := []struct { + raw string + want string // expected host (with scheme) + shouldErr bool + }{ + {"", "", true}, + {"example.com", "https://example.com", false}, + {"http://foo.bar", "http://foo.bar", false}, + {"https:/go.dev/play", "https://go.dev", false}, + {"http:/example.com", "http://example.com", false}, + {"ftp://foo.bar", "", true}, + } + for _, tt := range tests { + u, err := NormalizeURL(tt.raw) + if tt.shouldErr { + if err == nil { + t.Errorf("NormalizeURL(%q) expected error, got none", tt.raw) + } + continue + } + if err != nil { + t.Errorf("NormalizeURL(%q) unexpected error: %v", tt.raw, err) + continue + } + got := u.Scheme + "://" + u.Host + if got != tt.want { + t.Errorf("NormalizeURL(%q) = %q; want %q", tt.raw, got, tt.want) + } + } +} diff --git a/internal/transport/client.go b/internal/transport/client.go new file mode 100644 index 0000000..db5f909 --- /dev/null +++ b/internal/transport/client.go @@ -0,0 +1,64 @@ +package transport + +import ( + "errors" + "fmt" + "net" + "net/http" + "syscall" + "time" +) + +const ( + maxRedirects = 5 + httpClientTimeout = 10 * time.Second + dialerTimeout = 30 * time.Second + dialerKeepAlive = 30 * time.Second +) + +/** + * NewSafeClient creates a custom http.Client that prevents Server-Side Request Forgery (SSRF). + * + * It uses a custom dialer that validates the resolved IP address before connecting, ensuring that it is not: + * - A private network address (e.g., 192.168.x.x, 10.x.x.x) + * - A loopback address (e.g., 127.0.0.1) + * - An unspecified address (e.g., 0.0.0.0) + */ +func NewSafeClient() *http.Client { + return &http.Client{ + Transport: &http.Transport{ + DialContext: newSafeDialer().DialContext, + }, + Timeout: httpClientTimeout, + CheckRedirect: func(_ *http.Request, via []*http.Request) error { + if len(via) >= maxRedirects { + return fmt.Errorf("stopped after %d redirects", maxRedirects) + } + return nil + }, + } +} + +func newSafeDialer() *net.Dialer { + dialer := &net.Dialer{ + Timeout: dialerTimeout, + KeepAlive: dialerKeepAlive, + Control: func(_, address string, _ syscall.RawConn) error { + host, _, err := net.SplitHostPort(address) + if err != nil { + return err + } + ips, err := net.LookupIP(host) + if err != nil { + return err + } + for _, ip := range ips { + if ip.IsPrivate() || ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsUnspecified() { + return errors.New("refusing to connect to private network address") + } + } + return nil + }, + } + return dialer +} diff --git a/internal/transport/client_test.go b/internal/transport/client_test.go new file mode 100644 index 0000000..4906ee5 --- /dev/null +++ b/internal/transport/client_test.go @@ -0,0 +1,54 @@ +package transport + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +/** + * TestSSRFProtection confirms that the custom dialer correctly blocks connections + * to private and loopback IP addresses. + * + * This is a critical security control to prevent the application from being used + * as a proxy to attack internal infrastructure (SSRF). + */ +func TestSSRFProtection(t *testing.T) { + client := NewSafeClient() + // a dummy server that should never be reached + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + t.Fatal("dialer did not block private IP, connection was made") + })) + defer srv.Close() + + // get loopback address of the server + // srv.URL will be something like http://127.0.0.1:54321 + // we want to test if the dialer blocks the connection to 127.0.0.1 + req, err := http.NewRequest("GET", srv.URL, nil) + if err != nil { + t.Fatalf("failed to create request: %v", err) + } + + _, err = client.Do(req) + if err == nil { + t.Fatal("expected an error when dialing a private IP, but got none") + } + // check if the error is the one we expect from our dialer + // the error is wrapped, so we need to check for the substring + if !strings.Contains(err.Error(), "refusing to connect to private network address") { + t.Errorf("expected error to contain 'refusing to connect to private network address', but got: %v", err) + } + + // Test Unspecified IP (0.0.0.0) bypass attempt + // We manually construct a URL with 0.0.0.0 and a port (it doesn't need to be open for the check to fire) + unspecifiedURL := "http://0.0.0.0:8080" + reqUnspecified, _ := http.NewRequest("GET", unspecifiedURL, nil) + _, err = client.Do(reqUnspecified) + if err == nil { + t.Fatal("expected an error when dialing 0.0.0.0, but got none") + } + if !strings.Contains(err.Error(), "refusing to connect to private network address") { + t.Errorf("expected error for 0.0.0.0 to contain 'refusing to connect to private network address', but got: %v", err) + } +}