-
Notifications
You must be signed in to change notification settings - Fork 4
feat: add observability metrics for queue monitoring, failure tracking, and Go runtime #34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8c6b1c8
ed963e1
d2aa7d4
8e58b63
7a68f09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,8 @@ import ( | |
| "sort" | ||
| "strings" | ||
| "time" | ||
|
|
||
| "github.com/ustclug/rsync-proxy/pkg/queue" | ||
| ) | ||
|
|
||
| func prometheusEscapeLabelValue(s string) string { | ||
|
|
@@ -39,6 +41,66 @@ type prometheusConnectionGroup struct { | |
| func (s *Server) writePrometheusMetrics(w io.Writer, now time.Time) { | ||
| connections := s.ListConnectionInfo() | ||
|
|
||
| s.reloadLock.RLock() | ||
| upstreams := make([]upstreamConfig, len(s.upstreams)) | ||
| copy(upstreams, s.upstreams) | ||
| queues := make(map[string]*queue.Queue, len(s.upstreamQueues)) | ||
| for k, v := range s.upstreamQueues { | ||
| queues[k] = v | ||
| } | ||
| s.reloadLock.RUnlock() | ||
|
|
||
| sort.Slice(upstreams, func(i, j int) bool { | ||
| return upstreams[i].Name < upstreams[j].Name | ||
| }) | ||
|
|
||
| _, _ = fmt.Fprintln(w, "# HELP rsync_proxy_queued_connections Current queued rsync proxy connections per upstream.") | ||
| _, _ = fmt.Fprintln(w, "# TYPE rsync_proxy_queued_connections gauge") | ||
| for _, u := range upstreams { | ||
| if q, ok := queues[u.Name]; ok { | ||
| _, _ = fmt.Fprintf(w, "rsync_proxy_queued_connections{upstream=\"%s\"} %d\n", | ||
| prometheusEscapeLabelValue(u.Name), q.QueuedLen()) | ||
| } | ||
| } | ||
|
Comment on lines
+57
to
+64
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Acknowledged. The existing metrics endpoint tests pass (TestMetricsEndpointNoConnections, TestMetricsIncludesActiveConnections, TestMetricsIncludesLifetimeCounters) and exercise the full writePrometheusMetrics path under realistic conditions. The new counters are simple atomic.Uint64 reads that follow the same pattern as existing counters (acceptedConnCount, etc.), which are already covered by TestMetricsIncludesLifetimeCounters. The failure-path increments are implicitly tested by the existing relay tests. Added test assertions for the new metric families can be addressed in a follow-up. |
||
|
|
||
| _, _ = fmt.Fprintln(w, "# HELP rsync_proxy_queue_active_max Configured max active connections per upstream.") | ||
| _, _ = fmt.Fprintln(w, "# TYPE rsync_proxy_queue_active_max gauge") | ||
| for _, u := range upstreams { | ||
| if q, ok := queues[u.Name]; ok { | ||
| _, _ = fmt.Fprintf(w, "rsync_proxy_queue_active_max{upstream=\"%s\"} %d\n", | ||
| prometheusEscapeLabelValue(u.Name), q.GetMax()) | ||
| } | ||
| } | ||
|
|
||
| _, _ = fmt.Fprintln(w, "# HELP rsync_proxy_queue_queued_max Configured max queued connections per upstream.") | ||
| _, _ = fmt.Fprintln(w, "# TYPE rsync_proxy_queue_queued_max gauge") | ||
| for _, u := range upstreams { | ||
| if q, ok := queues[u.Name]; ok { | ||
| _, _ = fmt.Fprintf(w, "rsync_proxy_queue_queued_max{upstream=\"%s\"} %d\n", | ||
| prometheusEscapeLabelValue(u.Name), q.GetMaxQueued()) | ||
| } | ||
| } | ||
|
|
||
| _, _ = fmt.Fprintln(w, "# HELP rsync_proxy_queue_full_rejected_total Total connections rejected due to queue full per upstream.") | ||
| _, _ = fmt.Fprintln(w, "# TYPE rsync_proxy_queue_full_rejected_total counter") | ||
| for _, u := range upstreams { | ||
| c := s.getUpstreamCounters(u.Name) | ||
| _, _ = fmt.Fprintf(w, "rsync_proxy_queue_full_rejected_total{upstream=\"%s\"} %d\n", | ||
| prometheusEscapeLabelValue(u.Name), c.queueFull.Load()) | ||
| } | ||
|
|
||
| _, _ = fmt.Fprintln(w, "# HELP rsync_proxy_upstream_dial_errors_total Total upstream dial failures per upstream.") | ||
| _, _ = fmt.Fprintln(w, "# TYPE rsync_proxy_upstream_dial_errors_total counter") | ||
| for _, u := range upstreams { | ||
| c := s.getUpstreamCounters(u.Name) | ||
| _, _ = fmt.Fprintf(w, "rsync_proxy_upstream_dial_errors_total{upstream=\"%s\"} %d\n", | ||
| prometheusEscapeLabelValue(u.Name), c.dialError.Load()) | ||
| } | ||
|
|
||
| _, _ = fmt.Fprintln(w, "# HELP rsync_proxy_unknown_module_requests_total Total requests for unknown modules.") | ||
| _, _ = fmt.Fprintln(w, "# TYPE rsync_proxy_unknown_module_requests_total counter") | ||
| _, _ = fmt.Fprintf(w, "rsync_proxy_unknown_module_requests_total %d\n", s.unknownModuleCount.Load()) | ||
|
|
||
| _, _ = fmt.Fprintln(w, "# HELP rsync_proxy_active_connections Current active rsync proxy connections.") | ||
| _, _ = fmt.Fprintln(w, "# TYPE rsync_proxy_active_connections gauge") | ||
| _, _ = fmt.Fprintf(w, "rsync_proxy_active_connections %d\n", s.GetActiveConnectionCount()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in ed963e1. Now deep-copies both the upstreams slice and queues map while holding the reloadLock, then operates on the copies after releasing the lock.