Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions links/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,51 @@ func checkHTTPLink(rctx types.ResultContext, client *http.Client, url string) ty
return rctx.Errorf("%s (invalid URL: %v)", url, err)
}
req.Header.Set("User-Agent", "skill-validator/1.0")
req.Header.Set("Accept", "text/html, */*;q=0.1")

resp, err := client.Do(req)
if err != nil {
return rctx.Errorf("%s (request failed: %v)", url, err)
}
defer func() { _ = resp.Body.Close() }()

if resp.StatusCode >= 200 && resp.StatusCode < 300 {
return rctx.Passf("%s (HTTP %d)", url, resp.StatusCode)
// Some sites don't handle HEAD correctly (e.g. SPAs like crates.io return
// 404 for HEAD even though the page exists). Fall back to GET when HEAD
// returns 404 or 405, which is the standard approach used by lychee,
// markdown-link-check, and other link validators.
if resp.StatusCode == http.StatusNotFound || resp.StatusCode == http.StatusMethodNotAllowed {
return checkHTTPLinkGET(rctx, client, url)
}
if resp.StatusCode >= 300 && resp.StatusCode < 400 {
return rctx.Passf("%s (HTTP %d redirect)", url, resp.StatusCode)

return classifyResponse(rctx, url, resp.StatusCode)
}

func checkHTTPLinkGET(rctx types.ResultContext, client *http.Client, url string) types.Result {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return rctx.Errorf("%s (invalid URL: %v)", url, err)
}
req.Header.Set("User-Agent", "skill-validator/1.0")
req.Header.Set("Accept", "text/html, */*;q=0.1")

resp, err := client.Do(req)
if err != nil {
return rctx.Errorf("%s (request failed: %v)", url, err)
}
defer func() { _ = resp.Body.Close() }()

return classifyResponse(rctx, url, resp.StatusCode)
}

func classifyResponse(rctx types.ResultContext, url string, statusCode int) types.Result {
if statusCode >= 200 && statusCode < 300 {
return rctx.Passf("%s (HTTP %d)", url, statusCode)
}
if statusCode >= 300 && statusCode < 400 {
return rctx.Passf("%s (HTTP %d redirect)", url, statusCode)
}
if resp.StatusCode == http.StatusForbidden {
if statusCode == http.StatusForbidden {
return rctx.Infof("%s (HTTP 403 — may block automated requests)", url)
}
return rctx.Errorf("%s (HTTP %d)", url, resp.StatusCode)
return rctx.Errorf("%s (HTTP %d)", url, statusCode)
}
92 changes: 92 additions & 0 deletions links/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ func TestCheckLinks_HTTP(t *testing.T) {
mux.HandleFunc("/not-found", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
})
mux.HandleFunc("/head-404-get-200", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusNotFound)
return
}
w.WriteHeader(http.StatusOK)
})
mux.HandleFunc("/head-405-get-200", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
w.WriteHeader(http.StatusOK)
})
mux.HandleFunc("/forbidden", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusForbidden)
})
Expand Down Expand Up @@ -135,6 +149,20 @@ func TestCheckLinks_HTTP(t *testing.T) {
requireResultContaining(t, results, types.Error, "HTTP 500")
})

t.Run("HEAD 404 falls back to GET 200", func(t *testing.T) {
dir := t.TempDir()
body := "[spa](" + server.URL + "/head-404-get-200)"
results := CheckLinks(t.Context(), dir, body)
requireResultContaining(t, results, types.Pass, "HTTP 200")
})

t.Run("HEAD 405 falls back to GET 200", func(t *testing.T) {
dir := t.TempDir()
body := "[nohead](" + server.URL + "/head-405-get-200)"
results := CheckLinks(t.Context(), dir, body)
requireResultContaining(t, results, types.Pass, "HTTP 200")
})

t.Run("mixed relative and HTTP only checks HTTP", func(t *testing.T) {
dir := t.TempDir()
writeFile(t, dir, "references/guide.md", "content")
Expand Down Expand Up @@ -225,6 +253,70 @@ func TestCheckHTTPLink(t *testing.T) {
requireContains(t, result.Message, "HTTP 403")
})

t.Run("HEAD 404 retries with GET", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusNotFound)
return
}
w.WriteHeader(http.StatusOK)
}))
defer server.Close()

result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
if result.Level != types.Pass {
t.Errorf("expected Pass after GET fallback, got level=%d message=%q", result.Level, result.Message)
}
})

t.Run("HEAD 405 retries with GET", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
w.WriteHeader(http.StatusOK)
}))
defer server.Close()

result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
if result.Level != types.Pass {
t.Errorf("expected Pass after GET fallback, got level=%d message=%q", result.Level, result.Message)
}
})

t.Run("SPA requiring Accept text/html resolves via GET fallback", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Header.Get("Accept") == "" || r.Method == http.MethodHead {
w.WriteHeader(http.StatusNotFound)
return
}
if strings.Contains(r.Header.Get("Accept"), "text/html") {
w.WriteHeader(http.StatusOK)
return
}
w.WriteHeader(http.StatusNotFound)
}))
defer server.Close()

result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
if result.Level != types.Pass {
t.Errorf("expected Pass for SPA with Accept header, got level=%d message=%q", result.Level, result.Message)
}
})

t.Run("genuine 404 still errors after GET fallback", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
}))
defer server.Close()

result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
if result.Level != types.Error {
t.Errorf("expected Error for genuine 404, got level=%d message=%q", result.Level, result.Message)
}
})

t.Run("invalid URL", func(t *testing.T) {
result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, "http://invalid host with spaces/")
if result.Level != types.Error {
Expand Down
Loading