diff --git a/.gitignore b/.gitignore index 4ddbfd3..3b9fd69 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ __pycache__/ .pytest_cache/ .tox/ /client/go/kwkhtmltopdf_client +/client/go/kwkhtmltoimage_client /server/kwkhtmltopdf_server diff --git a/README.md b/README.md index 66d6145..a5e711f 100644 --- a/README.md +++ b/README.md @@ -31,26 +31,30 @@ the above server defined in the `KWKHTMLTOPDF_SERVER_URL` environment variable. There are two clients: -* a go client (preferred) -* a python client, which only depends on the `requests` library. +- go clients (preferred): + - PDF: `client/go/pdf/kwkhtmltopdf_client.go` + - Image: `client/go/image/kwkhtmltoimage_client.go` +- a python client, which only depends on the `requests` library. It should work with any python version supported by `requests`. ## Quick start ### Run the server -``` +```sh $ docker run --rm -p 8080:8080 ghcr.io/acsone/kwkhtmltopdf:0.12.6.1-latest ``` or -``` +```sh $ go run server/kwkhtmltopdf_server.go ``` The server should now listen on http://localhost:8080. +Available endpoints: `/` (PDF), `/pdf` (PDF), `/image` (Image), `/status` (Health check), `/metrics` (Prometheus). + #### Note for Apple Silicon users The docker image is built for amd64. If you are on Apple Silicon, @@ -63,22 +67,29 @@ Any of the following should generate a printout of the wkhtmltopdf home page to #### Using the built binary -``` -$ go build -o client/go/kwkhtmltopdf_client client/go/kwkhtmltopdf_client.go +```sh +$ go build -o client/go/kwkhtmltopdf_client client/go/pdf/kwkhtmltopdf_client.go +$ go build -o client/go/kwkhtmltoimage_client client/go/image/kwkhtmltoimage_client.go $ env KWKHTMLTOPDF_SERVER_URL=http://localhost:8080 \ client/go/kwkhtmltopdf_client https://wkhtmltopdf.org /tmp/test.pdf + +$ env KWKHTMLTOPDF_SERVER_URL=http://localhost:8080 \ + client/go/kwkhtmltoimage_client https://wkhtmltopdf.org /tmp/test.png ``` #### Using the Go client -``` +```sh $ env KWKHTMLTOPDF_SERVER_URL=http://localhost:8080 \ - go run client/go/kwkhtmltopdf_client.go https://wkhtmltopdf.org /tmp/test.pdf + go run client/go/pdf/kwkhtmltopdf_client.go https://wkhtmltopdf.org /tmp/test.pdf + +$ env KWKHTMLTOPDF_SERVER_URL=http://localhost:8080 \ + go run client/go/image/kwkhtmltoimage_client.go https://wkhtmltopdf.org /tmp/test.png ``` #### Using the Python client -``` +```sh $ env KWKHTMLTOPDF_SERVER_URL=http://localhost:8080 \ client/python/kwkhtmltopdf_client.py https://wkhtmltopdf.org /tmp/test.pdf ``` diff --git a/client/go/image/kwkhtmltoimage_client.go b/client/go/image/kwkhtmltoimage_client.go new file mode 100644 index 0000000..3cba133 --- /dev/null +++ b/client/go/image/kwkhtmltoimage_client.go @@ -0,0 +1,22 @@ +// Copyright (c) 2019 ACSONE SA/NV +// Distributed under the MIT License (http://opensource.org/licenses/MIT) + +package main + +import ( + "os" + + "github.com/acsone/kwkhtmltopdf/client/go/kwkhtmlclient" +) + +func main() { + serverURL, err := kwkhtmlclient.ServerURLFromEnv() + if err == nil { + err = kwkhtmlclient.Run(serverURL, "/image", os.Args[1:], os.Stdout) + } + if err != nil { + os.Stderr.WriteString(err.Error()) + os.Stderr.WriteString("\n") + os.Exit(-1) + } +} diff --git a/client/go/kwkhtmltopdf_client.go b/client/go/kwkhtmlclient/client.go similarity index 68% rename from client/go/kwkhtmltopdf_client.go rename to client/go/kwkhtmlclient/client.go index 81bd9ae..641f11f 100644 --- a/client/go/kwkhtmltopdf_client.go +++ b/client/go/kwkhtmlclient/client.go @@ -1,7 +1,7 @@ // Copyright (c) 2019 ACSONE SA/NV // Distributed under the MIT License (http://opensource.org/licenses/MIT) -package main +package kwkhtmlclient import ( "bytes" @@ -15,6 +15,16 @@ import ( const chunkSize = 32 * 1024 +var ErrServerURLNotSet = errors.New("KWKHTMLTOPDF_SERVER_URL not set") + +func ServerURLFromEnv() (string, error) { + serverURL := os.Getenv("KWKHTMLTOPDF_SERVER_URL") + if serverURL == "" { + return "", ErrServerURLNotSet + } + return serverURL, nil +} + func addOption(w *multipart.Writer, option string) error { return w.WriteField("option", option) } @@ -33,35 +43,36 @@ func addFile(w *multipart.Writer, filename string) error { return err } -func do() error { - var err error - var out *os.File - - serverURL := os.Getenv("KWKHTMLTOPDF_SERVER_URL") +// Run performs a request against the given endpoint (e.g. "/pdf" or "/image") +// on the server at serverURL. +// +// The behavior matches the original single-file Go client: +// - if args is empty, "-h" is sent +// - if the last argument looks like an output file, it is created and used +// - file arguments are sent as multipart file parts +func Run(serverURL, endpointPath string, args []string, stdout io.Writer) error { if serverURL == "" { - return errors.New("KWKHTMLTOPDF_SERVER_URL not set") + return ErrServerURLNotSet } - - // detect if last argument is output file, and create it - args := os.Args[1:] if len(args) == 0 { args = []string{"-h"} } + + out := stdout if len(args) >= 2 && !strings.HasPrefix(args[len(args)-1], "-") && !strings.HasPrefix(args[len(args)-2], "-") { - out, err = os.Create(args[len(args)-1]) + file, err := os.Create(args[len(args)-1]) if err != nil { return err } - defer out.Close() + defer file.Close() + out = file args = args[:len(args)-1] - } else { - out = os.Stdout } - // prepare request var postBuf bytes.Buffer w := multipart.NewWriter(&postBuf) for _, arg := range args { + var err error if arg == "-" { return errors.New("stdin/stdout input is not implemented") } else if strings.HasPrefix(arg, "-") { @@ -72,7 +83,7 @@ func do() error { err = addOption(w, arg) } else if strings.HasPrefix(arg, "file://") { err = addFile(w, arg[7:]) - } else if _, err := os.Stat(arg); err == nil { + } else if _, statErr := os.Stat(arg); statErr == nil { // TODO: better way to detect file arguments err = addFile(w, arg) } else { @@ -82,10 +93,10 @@ func do() error { return err } } - w.Close() + _ = w.Close() - // post request - resp, err := http.Post(serverURL, w.FormDataContentType(), &postBuf) + endpoint := serverURL + endpointPath + resp, err := http.Post(endpoint, w.FormDataContentType(), &postBuf) if err != nil { return err } @@ -94,7 +105,6 @@ func do() error { return errors.New("server error, consult server log for details") } - // read response respBuf := make([]byte, chunkSize) for { nr, er := resp.Body.Read(respBuf) @@ -114,12 +124,3 @@ func do() error { return nil } - -func main() { - err := do() - if err != nil { - os.Stderr.WriteString(err.Error()) - os.Stderr.WriteString("\n") - os.Exit(-1) - } -} diff --git a/client/go/kwkhtmlclient/client_test.go b/client/go/kwkhtmlclient/client_test.go new file mode 100644 index 0000000..f5d33d3 --- /dev/null +++ b/client/go/kwkhtmlclient/client_test.go @@ -0,0 +1,139 @@ +package kwkhtmlclient + +import ( + "bytes" + "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestRun_ServerURLNotSet(t *testing.T) { + var out bytes.Buffer + err := Run("", "/pdf", []string{"-h"}, &out) + if err != ErrServerURLNotSet { + t.Fatalf("expected ErrServerURLNotSet, got %v", err) + } +} + +func TestRun_SendsOptionsAndWritesStdout(t *testing.T) { + var gotPath string + var gotOptions []string + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + if err := r.ParseMultipartForm(10 << 20); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + gotOptions = append([]string(nil), r.MultipartForm.Value["option"]...) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + })) + defer ts.Close() + + var out bytes.Buffer + if err := Run(ts.URL, "/pdf", []string{"-h"}, &out); err != nil { + t.Fatalf("Run returned error: %v", err) + } + if gotPath != "/pdf" { + t.Fatalf("expected path /pdf, got %q", gotPath) + } + if out.String() != "ok" { + t.Fatalf("expected stdout %q, got %q", "ok", out.String()) + } + if len(gotOptions) != 1 || gotOptions[0] != "-h" { + t.Fatalf("expected options [-h], got %v", gotOptions) + } +} + +func TestRun_SendsFileArgumentAsMultipartFile(t *testing.T) { + inPath := filepath.Join(t.TempDir(), "input.html") + if err := os.WriteFile(inPath, []byte("hello"), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + var gotFilename string + var gotContent []byte + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if err := r.ParseMultipartForm(10 << 20); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + files := r.MultipartForm.File["file"] + if len(files) != 1 { + http.Error(w, "expected one file", http.StatusBadRequest) + return + } + gotFilename = files[0].Filename + f, err := files[0].Open() + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + defer f.Close() + gotContent, _ = io.ReadAll(f) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + })) + defer ts.Close() + + var out bytes.Buffer + if err := Run(ts.URL, "/pdf", []string{inPath}, &out); err != nil { + t.Fatalf("Run returned error: %v", err) + } + if out.String() != "ok" { + t.Fatalf("expected stdout %q, got %q", "ok", out.String()) + } + if !strings.HasSuffix(gotFilename, filepath.Base(inPath)) { + t.Fatalf("expected filename to end with %q, got %q", filepath.Base(inPath), gotFilename) + } + if string(gotContent) != "hello" { + t.Fatalf("expected file content %q, got %q", "hello", string(gotContent)) + } +} + +func TestRun_WritesToOutputFile(t *testing.T) { + outPath := filepath.Join(t.TempDir(), "out.bin") + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/image" { + http.Error(w, "wrong path", http.StatusBadRequest) + return + } + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("data")) + })) + defer ts.Close() + + // Two trailing non-dash args: last one is treated as output file. + args := []string{"https://example.invalid", outPath} + var stdout bytes.Buffer + if err := Run(ts.URL, "/image", args, &stdout); err != nil { + t.Fatalf("Run returned error: %v", err) + } + if stdout.Len() != 0 { + t.Fatalf("expected no stdout output, got %q", stdout.String()) + } + + b, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(b) != "data" { + t.Fatalf("expected output file %q, got %q", "data", string(b)) + } +} + +func TestRun_StdinNotImplemented(t *testing.T) { + var stdout bytes.Buffer + err := Run("http://example.invalid", "/pdf", []string{"-"}, &stdout) + if err == nil || err.Error() != "stdin/stdout input is not implemented" { + t.Fatalf("expected stdin/stdout error, got %v", err) + } +} diff --git a/client/go/pdf/kwkhtmltopdf_client.go b/client/go/pdf/kwkhtmltopdf_client.go new file mode 100644 index 0000000..4c09646 --- /dev/null +++ b/client/go/pdf/kwkhtmltopdf_client.go @@ -0,0 +1,22 @@ +// Copyright (c) 2019 ACSONE SA/NV +// Distributed under the MIT License (http://opensource.org/licenses/MIT) + +package main + +import ( + "os" + + "github.com/acsone/kwkhtmltopdf/client/go/kwkhtmlclient" +) + +func main() { + serverURL, err := kwkhtmlclient.ServerURLFromEnv() + if err == nil { + err = kwkhtmlclient.Run(serverURL, "/pdf", os.Args[1:], os.Stdout) + } + if err != nil { + os.Stderr.WriteString(err.Error()) + os.Stderr.WriteString("\n") + os.Exit(-1) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..71a912c --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/acsone/kwkhtmltopdf + +go 1.18 diff --git a/server/kwkhtmltopdf_server.go b/server/kwkhtmltopdf_server.go index 43f08ac..8602940 100644 --- a/server/kwkhtmltopdf_server.go +++ b/server/kwkhtmltopdf_server.go @@ -10,6 +10,8 @@ import ( "os" "os/exec" "path/filepath" + "strings" + "time" ) // TODO ignore opts? @@ -29,6 +31,14 @@ func wkhtmltopdfBin() string { return "wkhtmltopdf" } +func wkhtmltoimageBin() string { + bin := os.Getenv("KWKHTMLTOIMAGE_BIN") + if bin != "" { + return bin + } + return "wkhtmltoimage" +} + func isDocOption(arg string) bool { switch arg { case @@ -68,6 +78,73 @@ func httpAbort(w http.ResponseWriter, err error) { c.Close() } +type metricsResponseWriter struct { + http.ResponseWriter + bytes int64 +} + +func (w *metricsResponseWriter) WriteHeader(statusCode int) { + w.ResponseWriter.WriteHeader(statusCode) +} + +func (w *metricsResponseWriter) Write(p []byte) (int, error) { + n, err := w.ResponseWriter.Write(p) + w.bytes += int64(n) + return n, err +} + +var ( + conversionsTotal = NewCounterVec( + CounterOpts{ + Name: "kwkhtmltopdf_conversions_total", + Help: "Total number of conversions attempted.", + }, + []string{"type", "domain", "result"}, + ) + conversionDurationSeconds = NewHistogramVec( + HistogramOpts{ + Name: "kwkhtmltopdf_conversion_duration_seconds", + Help: "Conversion duration in seconds.", + Buckets: DefBuckets, + }, + []string{"type", "domain", "result"}, + ) + conversionOutputBytesTotal = NewCounterVec( + CounterOpts{ + Name: "kwkhtmltopdf_conversion_output_bytes_total", + Help: "Total number of bytes written in conversion responses.", + }, + []string{"type", "domain", "result"}, + ) +) + +func extractCookieDomainFromReportCookieJar(path string) (string, error) { + // Read only a limited amount: cookie jar files are expected to be small. + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + // Some cookie jars can be multiline; search globally. + content := string(data) + idx := strings.Index(content, "domain=") + if idx < 0 { + return "", nil + } + rest := content[idx+len("domain="):] + // Domain value ends at ';' or whitespace/newline. + end := len(rest) + if semi := strings.IndexByte(rest, ';'); semi >= 0 { + end = semi + } + if ws := strings.IndexAny(rest, " \t\r\n"); ws >= 0 && ws < end { + end = ws + } + domain := strings.TrimSpace(rest[:end]) + // Be defensive: normalize weird casing/spaces. + domain = strings.Trim(domain, ".") + return domain, nil +} + func redactArgs(args []string) []string { redacted := make([]string, 0, len(args)) i := 0 @@ -84,28 +161,47 @@ func redactArgs(args []string) []string { } func handler(w http.ResponseWriter, r *http.Request) { + mw := &metricsResponseWriter{ResponseWriter: w} + result := "success" + conversionType := "" + domainLabel := "unknown" + conversionStarted := false + conversionStart := time.Time{} + defer func() { + if conversionStarted { + conversionsTotal.WithLabelValues(conversionType, domainLabel, result).Inc() + conversionDurationSeconds.WithLabelValues(conversionType, domainLabel, result).Observe(time.Since(conversionStart).Seconds()) + conversionOutputBytesTotal.WithLabelValues(conversionType, domainLabel, result).Add(float64(mw.bytes)) + } + }() if r.URL.Path == "/status" { - w.WriteHeader(http.StatusOK) + mw.WriteHeader(http.StatusOK) + return + } else if r.URL.Path == "/metrics" || r.URL.Path == "/metrics/" { + MetricsHandler(mw, r) return } else { // don't log status log.Printf("%s %s", r.Method, r.URL.Path) } if r.Method != http.MethodPost { - httpError(w, errors.New("http method not allowed: "+r.Method), http.StatusMethodNotAllowed) + result = "error" + httpError(mw, errors.New("http method not allowed: "+r.Method), http.StatusMethodNotAllowed) return } - if r.URL.Path != "/" && r.URL.Path != "/pdf" { - // handle / and /pdf, keep the rest for future use - httpError(w, errors.New("path not found: "+r.URL.Path), http.StatusNotFound) + if r.URL.Path != "/" && r.URL.Path != "/pdf" && r.URL.Path != "/image" { + // handle /, /pdf, and /image, keep the rest for future use + result = "error" + httpError(mw, errors.New("path not found: "+r.URL.Path), http.StatusNotFound) return } // temp dir for files tmpdir, err := ioutil.TempDir("", "kwk") if err != nil { - httpError(w, err, http.StatusNotFound) + result = "error" + httpError(mw, err, http.StatusNotFound) return } defer os.RemoveAll(tmpdir) @@ -113,7 +209,8 @@ func handler(w http.ResponseWriter, r *http.Request) { // parse request reader, err := r.MultipartReader() if err != nil { - httpError(w, err, http.StatusBadRequest) + result = "error" + httpError(mw, err, http.StatusBadRequest) return } var docOutput bool @@ -124,7 +221,8 @@ func handler(w http.ResponseWriter, r *http.Request) { break } if err != nil { - httpError(w, err, http.StatusBadRequest) + result = "error" + httpError(mw, err, http.StatusBadRequest) return } if part.FormName() == "option" { @@ -143,53 +241,87 @@ func handler(w http.ResponseWriter, r *http.Request) { // TODO what if multiple files with same basename? file, err := os.Create(path) if err != nil { - httpError(w, err, http.StatusBadRequest) + result = "error" + httpError(mw, err, http.StatusBadRequest) return } _, err = io.Copy(file, part) file.Close() if err != nil { - httpError(w, err, http.StatusBadRequest) + result = "error" + httpError(mw, err, http.StatusBadRequest) return } + if domainLabel == "unknown" { + base := filepath.Base(part.FileName()) + if strings.HasPrefix(base, "report.cookie_jar") { + domain, derr := extractCookieDomainFromReportCookieJar(path) + if derr != nil { + log.Println("failed to read cookie jar domain:", derr) + } else if domain != "" { + domainLabel = domain + } + } + } args = append(args, path) } else { - httpError(w, errors.New("unpexpected part name: "+part.FormName()), http.StatusBadRequest) + result = "error" + httpError(mw, errors.New("unpexpected part name: "+part.FormName()), http.StatusBadRequest) return } } + // determine if this is an image request + isImageRequest := r.URL.Path == "/image" + if docOutput { - w.Header().Add("Content-Type", "text/plain") + conversionType = "doc" + mw.Header().Add("Content-Type", "text/plain") + } else if isImageRequest { + conversionType = "image" + mw.Header().Add("Content-Type", "image/png") + args = append(args, "-") } else { - w.Header().Add("Content-Type", "application/pdf") + conversionType = "pdf" + mw.Header().Add("Content-Type", "application/pdf") args = append(args, "-") } + conversionStarted = true + conversionStart = time.Now() var redactedArgs = redactArgs(args) log.Println(redactedArgs, "starting") - cmd := exec.Command(wkhtmltopdfBin(), args...) + var cmd *exec.Cmd + if isImageRequest { + cmd = exec.Command(wkhtmltoimageBin(), args...) + } else { + cmd = exec.Command(wkhtmltopdfBin(), args...) + } cmdStdout, err := cmd.StdoutPipe() if err != nil { - httpError(w, err, http.StatusInternalServerError) + result = "error" + httpError(mw, err, http.StatusInternalServerError) return } cmd.Stderr = os.Stderr err = cmd.Start() if err != nil { - httpError(w, err, http.StatusInternalServerError) + result = "error" + httpError(mw, err, http.StatusInternalServerError) return } - w.WriteHeader(http.StatusOK) - _, err = io.Copy(w, cmdStdout) + mw.WriteHeader(http.StatusOK) + _, err = io.Copy(mw, cmdStdout) if err != nil { + result = "abort" httpAbort(w, err) return } err = cmd.Wait() if err != nil { + result = "abort" httpAbort(w, err) return } @@ -198,7 +330,11 @@ func handler(w http.ResponseWriter, r *http.Request) { } func main() { + http.HandleFunc("/metrics", MetricsHandler) http.HandleFunc("/", handler) + http.HandleFunc("/pdf", handler) + http.HandleFunc("/image", handler) log.Println("kwkhtmltopdf server listening on port 8080") + log.Println("Available endpoints: / (PDF), /pdf (PDF), /image (Image), /status (Health check), /metrics (Prometheus)") log.Fatal(http.ListenAndServe(":8080", nil)) } diff --git a/server/kwkhtmltopdf_server_test.go b/server/kwkhtmltopdf_server_test.go new file mode 100644 index 0000000..33d7975 --- /dev/null +++ b/server/kwkhtmltopdf_server_test.go @@ -0,0 +1,40 @@ +package main + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func testServerMux() *http.ServeMux { + mux := http.NewServeMux() + mux.HandleFunc("/metrics", MetricsHandler) + mux.HandleFunc("/", handler) + mux.HandleFunc("/pdf", handler) + mux.HandleFunc("/image", handler) + return mux +} + +func TestMetricsEndpoint_AllowsGet(t *testing.T) { + mux := testServerMux() + + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + rr := httptest.NewRecorder() + mux.ServeHTTP(rr, req) + + if rr.Code != http.StatusOK { + t.Fatalf("expected status 200 for /metrics, got %d", rr.Code) + } +} + +func TestMetricsEndpointTrailingSlash_AllowsGet(t *testing.T) { + mux := testServerMux() + + req := httptest.NewRequest(http.MethodGet, "/metrics/", nil) + rr := httptest.NewRecorder() + mux.ServeHTTP(rr, req) + + if rr.Code != http.StatusOK { + t.Fatalf("expected status 200 for /metrics/, got %d", rr.Code) + } +} diff --git a/server/metrics.go b/server/metrics.go new file mode 100644 index 0000000..73b979e --- /dev/null +++ b/server/metrics.go @@ -0,0 +1,322 @@ +package main + +import ( + "fmt" + "io" + "math" + "net/http" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" +) + +// Minimal Prometheus text exposition implementation. +// Supports the subset of features used by this server (CounterVec, HistogramVec). + +var DefBuckets = []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240} + +type CounterOpts struct { + Name string + Help string +} + +type HistogramOpts struct { + Name string + Help string + Buckets []float64 +} + +type promMetric interface { + writeProm(w io.Writer) +} + +type registry struct { + mu sync.Mutex + metrics []promMetric +} + +var defaultRegistry = ®istry{} + +func (r *registry) register(m promMetric) { + r.mu.Lock() + r.metrics = append(r.metrics, m) + r.mu.Unlock() +} + +func (r *registry) writeAll(w io.Writer) { + r.mu.Lock() + metrics := append([]promMetric(nil), r.metrics...) + r.mu.Unlock() + for _, m := range metrics { + m.writeProm(w) + } +} + +func MetricsHandler(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + defaultRegistry.writeAll(w) +} + +// --- Counter --- + +type CounterVec struct { + name string + help string + labelNames []string + + mu sync.Mutex + series map[string]*counterSeries +} + +type counterSeries struct { + labelValues []string + valueBits uint64 // atomic float64 +} + +type Counter struct{ s *counterSeries } + +func NewCounterVec(opts CounterOpts, labelNames []string) *CounterVec { + cv := &CounterVec{ + name: opts.Name, + help: opts.Help, + labelNames: append([]string(nil), labelNames...), + series: make(map[string]*counterSeries), + } + defaultRegistry.register(cv) + return cv +} + +func (v *CounterVec) WithLabelValues(values ...string) *Counter { + if len(values) != len(v.labelNames) { + panic(fmt.Sprintf("%s: expected %d label values, got %d", v.name, len(v.labelNames), len(values))) + } + key := encodeLabelValues(values) + v.mu.Lock() + s := v.series[key] + if s == nil { + s = &counterSeries{labelValues: append([]string(nil), values...)} + v.series[key] = s + } + v.mu.Unlock() + return &Counter{s: s} +} + +func (c *Counter) Inc() { c.Add(1) } + +func (c *Counter) Add(v float64) { + if v < 0 { + // Prometheus counters are expected to be monotonic; ignore negative adds. + return + } + atomicAddFloat64(&c.s.valueBits, v) +} + +func (v *CounterVec) writeProm(w io.Writer) { + fmt.Fprintf(w, "# HELP %s %s\n", v.name, escapeHelp(v.help)) + fmt.Fprintf(w, "# TYPE %s counter\n", v.name) + keys, series := v.snapshotSeries() + for _, k := range keys { + s := series[k] + val := atomicLoadFloat64(&s.valueBits) + fmt.Fprintf(w, "%s%s %s\n", v.name, formatLabelSet(v.labelNames, s.labelValues), formatFloat(val)) + } +} + +// --- Histogram --- + +type HistogramVec struct { + name string + help string + labelNames []string + buckets []float64 + + mu sync.Mutex + series map[string]*histogramSeries +} + +type histogramSeries struct { + labelValues []string + bucketCnt []uint64 // per-bucket counts, last is +Inf + sumBits uint64 // atomic float64 + count uint64 // atomic uint64 +} + +type Histogram struct { + s *histogramSeries + buckets []float64 +} + +func NewHistogramVec(opts HistogramOpts, labelNames []string) *HistogramVec { + buckets := append([]float64(nil), opts.Buckets...) + if len(buckets) == 0 { + buckets = append([]float64(nil), DefBuckets...) + } + // Ensure buckets are sorted increasing. + sort.Float64s(buckets) + + hv := &HistogramVec{ + name: opts.Name, + help: opts.Help, + labelNames: append([]string(nil), labelNames...), + buckets: buckets, + series: make(map[string]*histogramSeries), + } + defaultRegistry.register(hv) + return hv +} + +func (v *HistogramVec) WithLabelValues(values ...string) *Histogram { + if len(values) != len(v.labelNames) { + panic(fmt.Sprintf("%s: expected %d label values, got %d", v.name, len(v.labelNames), len(values))) + } + key := encodeLabelValues(values) + v.mu.Lock() + s := v.series[key] + if s == nil { + s = &histogramSeries{ + labelValues: append([]string(nil), values...), + bucketCnt: make([]uint64, len(v.buckets)+1), + } + v.series[key] = s + } + v.mu.Unlock() + return &Histogram{s: s, buckets: v.buckets} +} + +func (h *Histogram) Observe(v float64) { + idx := len(h.buckets) // +Inf by default + for i, b := range h.buckets { + if v <= b { + idx = i + break + } + } + atomic.AddUint64(&h.s.bucketCnt[idx], 1) + atomic.AddUint64(&h.s.count, 1) + atomicAddFloat64(&h.s.sumBits, v) +} + +func (v *HistogramVec) writeProm(w io.Writer) { + fmt.Fprintf(w, "# HELP %s %s\n", v.name, escapeHelp(v.help)) + fmt.Fprintf(w, "# TYPE %s histogram\n", v.name) + + keys, series := v.snapshotSeries() + for _, k := range keys { + s := series[k] + + baseLabels := make([]string, 0, len(v.labelNames)+1) + baseLabels = append(baseLabels, s.labelValues...) + + // Buckets are cumulative in exposition format. + var cumulative uint64 + for i, b := range v.buckets { + cumulative += atomic.LoadUint64(&s.bucketCnt[i]) + fmt.Fprintf(w, "%s_bucket%s %d\n", v.name, formatLabelSet(append(v.labelNames, "le"), append(baseLabels, formatFloat(b))), cumulative) + } + cumulative += atomic.LoadUint64(&s.bucketCnt[len(v.buckets)]) + fmt.Fprintf(w, "%s_bucket%s %d\n", v.name, formatLabelSet(append(v.labelNames, "le"), append(baseLabels, "+Inf")), cumulative) + + sum := atomicLoadFloat64(&s.sumBits) + count := atomic.LoadUint64(&s.count) + fmt.Fprintf(w, "%s_sum%s %s\n", v.name, formatLabelSet(v.labelNames, s.labelValues), formatFloat(sum)) + fmt.Fprintf(w, "%s_count%s %d\n", v.name, formatLabelSet(v.labelNames, s.labelValues), count) + } +} + +// --- Helpers --- + +func (v *CounterVec) snapshotSeries() ([]string, map[string]*counterSeries) { + v.mu.Lock() + keys := make([]string, 0, len(v.series)) + for k := range v.series { + keys = append(keys, k) + } + series := make(map[string]*counterSeries, len(v.series)) + for k, s := range v.series { + series[k] = s + } + v.mu.Unlock() + sort.Strings(keys) + return keys, series +} + +func (v *HistogramVec) snapshotSeries() ([]string, map[string]*histogramSeries) { + v.mu.Lock() + keys := make([]string, 0, len(v.series)) + for k := range v.series { + keys = append(keys, k) + } + series := make(map[string]*histogramSeries, len(v.series)) + for k, s := range v.series { + series[k] = s + } + v.mu.Unlock() + sort.Strings(keys) + return keys, series +} + +func encodeLabelValues(values []string) string { + var b strings.Builder + for _, v := range values { + b.WriteString(strconv.Itoa(len(v))) + b.WriteByte(':') + b.WriteString(v) + b.WriteByte('|') + } + return b.String() +} + +func escapeHelp(s string) string { + // Prometheus help strings should not contain newlines. + s = strings.ReplaceAll(s, "\\", "\\\\") + s = strings.ReplaceAll(s, "\n", "\\n") + return s +} + +func escapeLabelValue(s string) string { + s = strings.ReplaceAll(s, "\\", "\\\\") + s = strings.ReplaceAll(s, "\n", "\\n") + s = strings.ReplaceAll(s, "\"", "\\\"") + return s +} + +func formatLabelSet(names, values []string) string { + if len(names) == 0 { + return "" + } + var b strings.Builder + b.WriteByte('{') + for i := range names { + if i > 0 { + b.WriteByte(',') + } + b.WriteString(names[i]) + b.WriteString("=\"") + b.WriteString(escapeLabelValue(values[i])) + b.WriteByte('"') + } + b.WriteByte('}') + return b.String() +} + +func formatFloat(v float64) string { + return strconv.FormatFloat(v, 'g', -1, 64) +} + +func atomicLoadFloat64(bits *uint64) float64 { + return math.Float64frombits(atomic.LoadUint64(bits)) +} + +func atomicAddFloat64(bits *uint64, delta float64) { + for { + oldBits := atomic.LoadUint64(bits) + old := math.Float64frombits(oldBits) + newVal := old + delta + newBits := math.Float64bits(newVal) + if atomic.CompareAndSwapUint64(bits, oldBits, newBits) { + return + } + } +} diff --git a/server/metrics_test.go b/server/metrics_test.go new file mode 100644 index 0000000..e2915b9 --- /dev/null +++ b/server/metrics_test.go @@ -0,0 +1,150 @@ +package main + +import ( + "net/http/httptest" + "strconv" + "strings" + "sync" + "testing" +) + +func resetMetricsRegistry() { + defaultRegistry = ®istry{} +} + +func TestMetricsHandler_ExportsCounterAndHistogram(t *testing.T) { + resetMetricsRegistry() + + reqTotal := NewCounterVec(CounterOpts{Name: "test_requests_total", Help: "Total requests."}, []string{"path"}) + duration := NewHistogramVec(HistogramOpts{Name: "test_duration_seconds", Help: "Duration.", Buckets: []float64{0.1, 0.5}}, []string{"path"}) + + reqTotal.WithLabelValues("/pdf").Inc() + reqTotal.WithLabelValues("/pdf").Add(2) + + duration.WithLabelValues("/pdf").Observe(0.05) + duration.WithLabelValues("/pdf").Observe(0.2) + duration.WithLabelValues("/pdf").Observe(1.0) + + rr := httptest.NewRecorder() + MetricsHandler(rr, httptest.NewRequest("GET", "/metrics", nil)) + body := rr.Body.String() + + if ct := rr.Header().Get("Content-Type"); !strings.HasPrefix(ct, "text/plain") { + t.Fatalf("unexpected Content-Type: %q", ct) + } + + // Counter + if !strings.Contains(body, "# TYPE test_requests_total counter\n") { + t.Fatalf("missing counter type line, got:\n%s", body) + } + if !strings.Contains(body, "test_requests_total{path=\"/pdf\"} 3\n") { + t.Fatalf("missing/incorrect counter sample, got:\n%s", body) + } + + // Histogram + if !strings.Contains(body, "# TYPE test_duration_seconds histogram\n") { + t.Fatalf("missing histogram type line, got:\n%s", body) + } + if !strings.Contains(body, "test_duration_seconds_bucket{path=\"/pdf\",le=\"0.1\"} 1\n") { + t.Fatalf("missing/incorrect histogram bucket le=0.1, got:\n%s", body) + } + if !strings.Contains(body, "test_duration_seconds_bucket{path=\"/pdf\",le=\"0.5\"} 2\n") { + t.Fatalf("missing/incorrect histogram bucket le=0.5, got:\n%s", body) + } + if !strings.Contains(body, "test_duration_seconds_bucket{path=\"/pdf\",le=\"+Inf\"} 3\n") { + t.Fatalf("missing/incorrect histogram bucket le=+Inf, got:\n%s", body) + } + if !strings.Contains(body, "test_duration_seconds_count{path=\"/pdf\"} 3\n") { + t.Fatalf("missing/incorrect histogram count, got:\n%s", body) + } + if !strings.Contains(body, "test_duration_seconds_sum{path=\"/pdf\"} 1.25\n") { + t.Fatalf("missing/incorrect histogram sum, got:\n%s", body) + } +} + +func TestCounter_NegativeAddIgnored(t *testing.T) { + resetMetricsRegistry() + + cv := NewCounterVec(CounterOpts{Name: "test_counter_total", Help: "help"}, []string{"k"}) + c := cv.WithLabelValues("v") + c.Add(-10) + + rr := httptest.NewRecorder() + MetricsHandler(rr, httptest.NewRequest("GET", "/metrics", nil)) + body := rr.Body.String() + + if !strings.Contains(body, "test_counter_total{k=\"v\"} 0\n") { + t.Fatalf("expected counter to stay at 0, got:\n%s", body) + } +} + +func TestEscaping_LabelValuesAndHelp(t *testing.T) { + resetMetricsRegistry() + + // Include characters that must be escaped in Prometheus text format. + help := "line1\\line2\nline3" + cv := NewCounterVec(CounterOpts{Name: "test_escape_total", Help: help}, []string{"k"}) + cv.WithLabelValues("a\"b\\c\nd").Inc() + + rr := httptest.NewRecorder() + MetricsHandler(rr, httptest.NewRequest("GET", "/metrics", nil)) + body := rr.Body.String() + + if !strings.Contains(body, "# HELP test_escape_total line1\\\\line2\\nline3\n") { + t.Fatalf("help escaping mismatch, got:\n%s", body) + } + if !strings.Contains(body, "test_escape_total{k=\"a\\\"b\\\\c\\nd\"} 1\n") { + t.Fatalf("label escaping mismatch, got:\n%s", body) + } +} + +func TestEncodeLabelValues_AvoidsAmbiguity(t *testing.T) { + // This is a regression test for common ambiguous concatenation bugs. + k1 := encodeLabelValues([]string{"a", "bc"}) + k2 := encodeLabelValues([]string{"ab", "c"}) + if k1 == k2 { + t.Fatalf("expected different encoded keys, got %q", k1) + } +} + +func TestMetrics_ConcurrentUpdates(t *testing.T) { + resetMetricsRegistry() + + workers := 16 + iters := 2000 + + cv := NewCounterVec(CounterOpts{Name: "test_concurrent_counter_total", Help: "help"}, []string{"worker"}) + hv := NewHistogramVec(HistogramOpts{Name: "test_concurrent_hist_seconds", Help: "help", Buckets: []float64{0.1, 0.5}}, []string{"worker"}) + + var wg sync.WaitGroup + wg.Add(workers) + for w := 0; w < workers; w++ { + workerLabel := strconv.Itoa(w) + go func() { + defer wg.Done() + c := cv.WithLabelValues(workerLabel) + h := hv.WithLabelValues(workerLabel) + for i := 0; i < iters; i++ { + c.Inc() + h.Observe(0.2) + } + }() + } + wg.Wait() + + // Export once to ensure formatting still works after concurrent writes. + rr := httptest.NewRecorder() + MetricsHandler(rr, httptest.NewRequest("GET", "/metrics", nil)) + body := rr.Body.String() + + // Spot-check a few workers for deterministic values. + for _, w := range []int{0, workers - 1} { + workerLabel := strconv.Itoa(w) + if !strings.Contains(body, "test_concurrent_counter_total{worker=\""+workerLabel+"\"} "+strconv.Itoa(iters)+"\n") { + t.Fatalf("missing/incorrect counter for worker=%s, got:\n%s", workerLabel, body) + } + if !strings.Contains(body, "test_concurrent_hist_seconds_count{worker=\""+workerLabel+"\"} "+strconv.Itoa(iters)+"\n") { + t.Fatalf("missing/incorrect histogram count for worker=%s, got:\n%s", workerLabel, body) + } + } +} diff --git a/tests/kwkhtmltopdf_test.py b/tests/kwkhtmltopdf_test.py index 9ef099a..a9ca20a 100755 --- a/tests/kwkhtmltopdf_test.py +++ b/tests/kwkhtmltopdf_test.py @@ -87,7 +87,9 @@ def client(request): [ "go", "run", - os.path.join(HERE, "..", "client", "go", "kwkhtmltopdf_client.go"), + os.path.join( + HERE, "..", "client", "go", "pdf", "kwkhtmltopdf_client.go" + ), ] ) @@ -147,3 +149,25 @@ def test_status(): requests.get( urljoin(os.getenv("KWKHTMLTOPDF_SERVER_URL"), "/status") ).raise_for_status() + + +def test_image_go_client_generates_image(tmp_path): + out_path = tmp_path / "o.png" + cmd = [ + "go", + "run", + os.path.join(HERE, "..", "client", "go", "image", "kwkhtmltoimage_client.go"), + "test1.html", + str(out_path), + ] + r = subprocess.call(cmd, cwd=os.path.join(HERE, "data")) + assert r == 0 + with open(out_path, "rb") as f: + header = f.read(8) + + # Accept common wkhtmltoimage outputs. + # PNG signature: 89 50 4E 47 0D 0A 1A 0A + # JPEG signature (prefix): FF D8 FF + is_png = header == b"\x89PNG\r\n\x1a\n" + is_jpeg = header.startswith(b"\xff\xd8\xff") + assert is_png or is_jpeg, "unexpected image signature: %r" % header diff --git a/tox.ini b/tox.ini index d9b9f94..40f4fe6 100644 --- a/tox.ini +++ b/tox.ini @@ -10,6 +10,7 @@ deps = passenv = KWKHTMLTOPDF_SERVER_URL KWKHTMLTOPDF_BIN + KWKHTMLTOIMAGE_BIN HOME commands = pytest -v tests {posargs}