From b97aa7fd5a5f3cfffd38ebc6d9f40beee9166794 Mon Sep 17 00:00:00 2001 From: Lewis Buckley Date: Mon, 29 Jun 2026 12:27:39 +0100 Subject: [PATCH 1/4] Scope Prometheus dashboard queries by environment Production and staging share one Prometheus, so the uptime and probe-status dashboards, the live status page, and the rollup job all showed production data on staging. Filter every probe metric query by the environment label the OTel collector stamps (production/staging), leaving local/test queries unscoped. --- app/models/concerns/upright/services/live_status.rb | 9 +++++++-- app/models/upright/probes/status.rb | 2 +- app/models/upright/probes/uptime.rb | 2 +- app/models/upright/rollups/probe_rollup.rb | 7 ++++++- lib/upright.rb | 8 ++++++++ test/lib/helpers/rails_env_helper.rb | 8 ++++++++ test/models/upright/probes/status_test.rb | 12 ++++++++++++ test/test_helper.rb | 1 + test/upright_test.rb | 13 +++++++++++++ 9 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 test/lib/helpers/rails_env_helper.rb diff --git a/app/models/concerns/upright/services/live_status.rb b/app/models/concerns/upright/services/live_status.rb index 567728f..c0a7882 100644 --- a/app/models/concerns/upright/services/live_status.rb +++ b/app/models/concerns/upright/services/live_status.rb @@ -26,18 +26,23 @@ def live_up_fraction def live_down_fraction response = Upright.prometheus_client.query( - query: "max(upright:probe_down_fraction{probe_service=\"#{code}\"}) or vector(0)" + query: live_down_query ).deep_symbolize_keys response.dig(:result, 0, :value, 1).to_f end def live_down_history(now:) response = Upright.prometheus_client.query_range( - query: "max(upright:probe_down_fraction{probe_service=\"#{code}\"}) or vector(0)", + query: live_down_query, start: (now - OUTAGE_LOOKBACK).iso8601, end: now.iso8601, step: "300s" ).deep_symbolize_keys response.dig(:result, 0, :values) || [] end + + def live_down_query + matchers = [ %(probe_service="#{code}"), Upright.environment_matcher ].compact + %(max(upright:probe_down_fraction{#{matchers.join(",")}}) or vector(0)) + end end diff --git a/app/models/upright/probes/status.rb b/app/models/upright/probes/status.rb index d15250c..88a6b7e 100644 --- a/app/models/upright/probes/status.rb +++ b/app/models/upright/probes/status.rb @@ -17,7 +17,7 @@ def query(probe_type) end def label_selector(probe_type) - matchers = [ "alert_severity!=\"\"" ] + matchers = [ "alert_severity!=\"\"", Upright.environment_matcher ].compact matchers << "type=\"#{probe_type}\"" if probe_type.present? "{#{matchers.join(",")}}" end diff --git a/app/models/upright/probes/uptime.rb b/app/models/upright/probes/uptime.rb index be688b1..575964b 100644 --- a/app/models/upright/probes/uptime.rb +++ b/app/models/upright/probes/uptime.rb @@ -21,7 +21,7 @@ def query(probe_type) end def label_selector(probe_type) - matchers = [ "alert_severity!=\"\"" ] + matchers = [ "alert_severity!=\"\"", Upright.environment_matcher ].compact matchers << "type=\"#{probe_type}\"" if probe_type.present? "{#{matchers.join(",")}}" end diff --git a/app/models/upright/rollups/probe_rollup.rb b/app/models/upright/rollups/probe_rollup.rb index 4d69588..aab47b5 100644 --- a/app/models/upright/rollups/probe_rollup.rb +++ b/app/models/upright/rollups/probe_rollup.rb @@ -23,7 +23,7 @@ def self.rollup_day(day) def self.fetch_uptime_for(day) query_time = [ day.end_of_day, Time.current ].min - response = Upright.prometheus_client.query(query: PROMETHEUS_METRIC, time: query_time.iso8601).deep_symbolize_keys + response = Upright.prometheus_client.query(query: uptime_query, time: query_time.iso8601).deep_symbolize_keys Array(response[:result]).map do |series| { @@ -34,6 +34,11 @@ def self.fetch_uptime_for(day) end end + def self.uptime_query + matcher = Upright.environment_matcher + matcher ? "#{PROMETHEUS_METRIC}{#{matcher}}" : PROMETHEUS_METRIC + end + def service Upright::Service.find_by(code: probe_service) if probe_service.present? end diff --git a/lib/upright.rb b/lib/upright.rb index 32afd42..61cc8a5 100644 --- a/lib/upright.rb +++ b/lib/upright.rb @@ -50,6 +50,14 @@ def prometheus_client ) end + def metrics_environment + Rails.env.to_s unless Rails.env.local? + end + + def environment_matcher + %(environment="#{metrics_environment}") if metrics_environment + end + def sites @sites ||= load_sites end diff --git a/test/lib/helpers/rails_env_helper.rb b/test/lib/helpers/rails_env_helper.rb new file mode 100644 index 0000000..419e4ba --- /dev/null +++ b/test/lib/helpers/rails_env_helper.rb @@ -0,0 +1,8 @@ +module RailsEnvHelper + def with_rails_env(name) + Rails.stubs(:env).returns(ActiveSupport::EnvironmentInquirer.new(name)) + yield + ensure + Rails.unstub(:env) + end +end diff --git a/test/models/upright/probes/status_test.rb b/test/models/upright/probes/status_test.rb index 3d5a0f2..79cc962 100644 --- a/test/models/upright/probes/status_test.rb +++ b/test/models/upright/probes/status_test.rb @@ -25,6 +25,18 @@ class Upright::Probes::StatusTest < ActiveSupport::TestCase assert_equal [], Upright::Probes::Status.for_type(:http) end + test ".for_type scopes the query to the deployment environment" do + with_rails_env("staging") do + stub_prometheus_query_range([]) + + Upright::Probes::Status.for_type(:http) + + assert_requested :get, /localhost:9090.*query_range/ do |request| + request.uri.query_values["query"].include?(%(environment="staging")) + end + end + end + test "probe exposes site statuses with up/down state" do stub_prometheus_query_range([ { "metric" => { "name" => "example.com", "type" => "http", "probe_target" => "https://example.com", "site_code" => "iad" }, diff --git a/test/test_helper.rb b/test/test_helper.rb index 96c8153..f3df713 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -30,6 +30,7 @@ class TestCase include IpApiHelper include MtrHelper + include RailsEnvHelper include SiteHelper include YabedaTestHelper diff --git a/test/upright_test.rb b/test/upright_test.rb index 187e6ad..9f18fcb 100644 --- a/test/upright_test.rb +++ b/test/upright_test.rb @@ -4,4 +4,17 @@ class UprightTest < ActiveSupport::TestCase test "version number" do assert Upright::VERSION end + + test "metrics_environment mirrors the deployment environment, nil locally" do + assert_equal "production", with_rails_env("production") { Upright.metrics_environment } + assert_equal "staging", with_rails_env("staging") { Upright.metrics_environment } + assert_nil with_rails_env("development") { Upright.metrics_environment } + assert_nil with_rails_env("test") { Upright.metrics_environment } + end + + test "environment_matcher scopes deployed queries, nil locally" do + assert_equal %(environment="production"), with_rails_env("production") { Upright.environment_matcher } + assert_equal %(environment="staging"), with_rails_env("staging") { Upright.environment_matcher } + assert_nil with_rails_env("development") { Upright.environment_matcher } + end end From 79db110dcfdf96a7315a5c76521f694cdd0c9a6e Mon Sep 17 00:00:00 2001 From: Lewis Buckley Date: Mon, 29 Jun 2026 12:33:50 +0100 Subject: [PATCH 2/4] Collapse to a single environment_matcher method --- lib/upright.rb | 6 +----- test/upright_test.rb | 8 +------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/lib/upright.rb b/lib/upright.rb index 61cc8a5..7cd7ee3 100644 --- a/lib/upright.rb +++ b/lib/upright.rb @@ -50,12 +50,8 @@ def prometheus_client ) end - def metrics_environment - Rails.env.to_s unless Rails.env.local? - end - def environment_matcher - %(environment="#{metrics_environment}") if metrics_environment + %(environment="#{Rails.env}") unless Rails.env.local? end def sites diff --git a/test/upright_test.rb b/test/upright_test.rb index 9f18fcb..9577320 100644 --- a/test/upright_test.rb +++ b/test/upright_test.rb @@ -5,16 +5,10 @@ class UprightTest < ActiveSupport::TestCase assert Upright::VERSION end - test "metrics_environment mirrors the deployment environment, nil locally" do - assert_equal "production", with_rails_env("production") { Upright.metrics_environment } - assert_equal "staging", with_rails_env("staging") { Upright.metrics_environment } - assert_nil with_rails_env("development") { Upright.metrics_environment } - assert_nil with_rails_env("test") { Upright.metrics_environment } - end - test "environment_matcher scopes deployed queries, nil locally" do assert_equal %(environment="production"), with_rails_env("production") { Upright.environment_matcher } assert_equal %(environment="staging"), with_rails_env("staging") { Upright.environment_matcher } assert_nil with_rails_env("development") { Upright.environment_matcher } + assert_nil with_rails_env("test") { Upright.environment_matcher } end end From fd6f1d67e5db20a6d9741943afad46e5a39549a1 Mon Sep 17 00:00:00 2001 From: Lewis Buckley Date: Mon, 29 Jun 2026 12:35:32 +0100 Subject: [PATCH 3/4] Inline environment matcher at each query site --- app/models/concerns/upright/services/live_status.rb | 3 ++- app/models/upright/probes/status.rb | 3 ++- app/models/upright/probes/uptime.rb | 3 ++- app/models/upright/rollups/probe_rollup.rb | 4 ++-- lib/upright.rb | 4 ---- test/upright_test.rb | 7 ------- 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/app/models/concerns/upright/services/live_status.rb b/app/models/concerns/upright/services/live_status.rb index c0a7882..62c0785 100644 --- a/app/models/concerns/upright/services/live_status.rb +++ b/app/models/concerns/upright/services/live_status.rb @@ -42,7 +42,8 @@ def live_down_history(now:) end def live_down_query - matchers = [ %(probe_service="#{code}"), Upright.environment_matcher ].compact + matchers = [ %(probe_service="#{code}") ] + matchers << %(environment="#{Rails.env}") unless Rails.env.local? %(max(upright:probe_down_fraction{#{matchers.join(",")}}) or vector(0)) end end diff --git a/app/models/upright/probes/status.rb b/app/models/upright/probes/status.rb index 88a6b7e..dc46a9f 100644 --- a/app/models/upright/probes/status.rb +++ b/app/models/upright/probes/status.rb @@ -17,7 +17,8 @@ def query(probe_type) end def label_selector(probe_type) - matchers = [ "alert_severity!=\"\"", Upright.environment_matcher ].compact + matchers = [ "alert_severity!=\"\"" ] + matchers << "environment=\"#{Rails.env}\"" unless Rails.env.local? matchers << "type=\"#{probe_type}\"" if probe_type.present? "{#{matchers.join(",")}}" end diff --git a/app/models/upright/probes/uptime.rb b/app/models/upright/probes/uptime.rb index 575964b..d5f676e 100644 --- a/app/models/upright/probes/uptime.rb +++ b/app/models/upright/probes/uptime.rb @@ -21,7 +21,8 @@ def query(probe_type) end def label_selector(probe_type) - matchers = [ "alert_severity!=\"\"", Upright.environment_matcher ].compact + matchers = [ "alert_severity!=\"\"" ] + matchers << "environment=\"#{Rails.env}\"" unless Rails.env.local? matchers << "type=\"#{probe_type}\"" if probe_type.present? "{#{matchers.join(",")}}" end diff --git a/app/models/upright/rollups/probe_rollup.rb b/app/models/upright/rollups/probe_rollup.rb index aab47b5..9ccc564 100644 --- a/app/models/upright/rollups/probe_rollup.rb +++ b/app/models/upright/rollups/probe_rollup.rb @@ -35,8 +35,8 @@ def self.fetch_uptime_for(day) end def self.uptime_query - matcher = Upright.environment_matcher - matcher ? "#{PROMETHEUS_METRIC}{#{matcher}}" : PROMETHEUS_METRIC + return PROMETHEUS_METRIC if Rails.env.local? + %(#{PROMETHEUS_METRIC}{environment="#{Rails.env}"}) end def service diff --git a/lib/upright.rb b/lib/upright.rb index 7cd7ee3..32afd42 100644 --- a/lib/upright.rb +++ b/lib/upright.rb @@ -50,10 +50,6 @@ def prometheus_client ) end - def environment_matcher - %(environment="#{Rails.env}") unless Rails.env.local? - end - def sites @sites ||= load_sites end diff --git a/test/upright_test.rb b/test/upright_test.rb index 9577320..187e6ad 100644 --- a/test/upright_test.rb +++ b/test/upright_test.rb @@ -4,11 +4,4 @@ class UprightTest < ActiveSupport::TestCase test "version number" do assert Upright::VERSION end - - test "environment_matcher scopes deployed queries, nil locally" do - assert_equal %(environment="production"), with_rails_env("production") { Upright.environment_matcher } - assert_equal %(environment="staging"), with_rails_env("staging") { Upright.environment_matcher } - assert_nil with_rails_env("development") { Upright.environment_matcher } - assert_nil with_rails_env("test") { Upright.environment_matcher } - end end From 935eade11f9fb6fa0ed271452d400eb7d7c8209f Mon Sep 17 00:00:00 2001 From: Lewis Buckley Date: Mon, 29 Jun 2026 12:39:48 +0100 Subject: [PATCH 4/4] Scope queries by environment unconditionally; label local seed data Local metrics now carry an environment label too (development), via the relabel_configs on the dev Prometheus scrape and the seeded series, so the queries no longer need to special-case local. --- app/models/concerns/upright/services/live_status.rb | 3 +-- app/models/upright/probes/status.rb | 3 +-- app/models/upright/probes/uptime.rb | 3 +-- app/models/upright/rollups/probe_rollup.rb | 1 - test/dummy/bin/seed-prometheus | 4 ++-- 5 files changed, 5 insertions(+), 9 deletions(-) diff --git a/app/models/concerns/upright/services/live_status.rb b/app/models/concerns/upright/services/live_status.rb index 62c0785..bfbc5df 100644 --- a/app/models/concerns/upright/services/live_status.rb +++ b/app/models/concerns/upright/services/live_status.rb @@ -42,8 +42,7 @@ def live_down_history(now:) end def live_down_query - matchers = [ %(probe_service="#{code}") ] - matchers << %(environment="#{Rails.env}") unless Rails.env.local? + matchers = [ %(probe_service="#{code}"), %(environment="#{Rails.env}") ] %(max(upright:probe_down_fraction{#{matchers.join(",")}}) or vector(0)) end end diff --git a/app/models/upright/probes/status.rb b/app/models/upright/probes/status.rb index dc46a9f..cda57e7 100644 --- a/app/models/upright/probes/status.rb +++ b/app/models/upright/probes/status.rb @@ -17,8 +17,7 @@ def query(probe_type) end def label_selector(probe_type) - matchers = [ "alert_severity!=\"\"" ] - matchers << "environment=\"#{Rails.env}\"" unless Rails.env.local? + matchers = [ "alert_severity!=\"\"", "environment=\"#{Rails.env}\"" ] matchers << "type=\"#{probe_type}\"" if probe_type.present? "{#{matchers.join(",")}}" end diff --git a/app/models/upright/probes/uptime.rb b/app/models/upright/probes/uptime.rb index d5f676e..6425232 100644 --- a/app/models/upright/probes/uptime.rb +++ b/app/models/upright/probes/uptime.rb @@ -21,8 +21,7 @@ def query(probe_type) end def label_selector(probe_type) - matchers = [ "alert_severity!=\"\"" ] - matchers << "environment=\"#{Rails.env}\"" unless Rails.env.local? + matchers = [ "alert_severity!=\"\"", "environment=\"#{Rails.env}\"" ] matchers << "type=\"#{probe_type}\"" if probe_type.present? "{#{matchers.join(",")}}" end diff --git a/app/models/upright/rollups/probe_rollup.rb b/app/models/upright/rollups/probe_rollup.rb index 9ccc564..f6eb186 100644 --- a/app/models/upright/rollups/probe_rollup.rb +++ b/app/models/upright/rollups/probe_rollup.rb @@ -35,7 +35,6 @@ def self.fetch_uptime_for(day) end def self.uptime_query - return PROMETHEUS_METRIC if Rails.env.local? %(#{PROMETHEUS_METRIC}{environment="#{Rails.env}"}) end diff --git a/test/dummy/bin/seed-prometheus b/test/dummy/bin/seed-prometheus index bd72ef5..923665e 100755 --- a/test/dummy/bin/seed-prometheus +++ b/test/dummy/bin/seed-prometheus @@ -92,7 +92,7 @@ echo "Generating seed data..." esac fi - echo "upright:probe_uptime_daily{name=\"${pname}\",type=\"${ptype}\",probe_target=\"${ptarget}\",probe_service=\"${pservice}\"} ${uptime} ${ts}.0" + echo "upright:probe_uptime_daily{name=\"${pname}\",type=\"${ptype}\",probe_target=\"${ptarget}\",probe_service=\"${pservice}\",environment=\"development\"} ${uptime} ${ts}.0" done done @@ -116,7 +116,7 @@ echo "Generating seed data..." val=0 fi - echo "upright_probe_up{name=\"${pname}\",type=\"${ptype}\",probe_target=\"${ptarget}\",probe_service=\"${pservice}\",alert_severity=\"high\",site_code=\"${scode}\",site_city=\"${scity}\",site_country=\"${scountry}\",site_geohash=\"${sgeohash}\",site_provider=\"${sprovider}\"} ${val}.0 ${ts}.0" + echo "upright_probe_up{name=\"${pname}\",type=\"${ptype}\",probe_target=\"${ptarget}\",probe_service=\"${pservice}\",alert_severity=\"high\",site_code=\"${scode}\",site_city=\"${scity}\",site_country=\"${scountry}\",site_geohash=\"${sgeohash}\",site_provider=\"${sprovider}\",environment=\"development\"} ${val}.0 ${ts}.0" done done done