From cb914ab6cc9c3444125b603819f9d77a4a738cb6 Mon Sep 17 00:00:00 2001 From: igorsatsyuk Date: Sun, 5 Apr 2026 11:19:24 +0300 Subject: [PATCH 1/6] Migrate tracing to Spring OTel starter and add observability tests --- pom.xml | 11 +-- src/main/resources/application.properties | 2 + .../observability/LoggingCorrelationTest.java | 62 ++++++++++++ .../service/KeycloakAuthServiceTest.java | 96 +++++++++++++++++++ 4 files changed, 163 insertions(+), 8 deletions(-) create mode 100644 src/test/java/lt/satsyuk/observability/LoggingCorrelationTest.java diff --git a/pom.xml b/pom.xml index b6d2165..53163af 100644 --- a/pom.xml +++ b/pom.xml @@ -143,15 +143,10 @@ micrometer-registry-prometheus - + - io.micrometer - micrometer-tracing-bridge-otel - - - - io.opentelemetry - opentelemetry-exporter-otlp + org.springframework.boot + spring-boot-starter-opentelemetry diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index cb85ef8..4bb282c 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -89,6 +89,8 @@ management.metrics.web.server.request.autotime.percentiles=0.5,0.95,0.99 management.tracing.sampling.probability=1.0 logging.pattern.level=%5p [${spring.application.name:},%X{traceId:-},%X{spanId:-}] management.otlp.tracing.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces} +management.otlp.metrics.export.enabled=false +management.otlp.logging.export.enabled=false # ------------------------------------------------------------ # RATE LIMITING (custom Bucket4j filter) diff --git a/src/test/java/lt/satsyuk/observability/LoggingCorrelationTest.java b/src/test/java/lt/satsyuk/observability/LoggingCorrelationTest.java new file mode 100644 index 0000000..ccb57fb --- /dev/null +++ b/src/test/java/lt/satsyuk/observability/LoggingCorrelationTest.java @@ -0,0 +1,62 @@ +package lt.satsyuk.observability; + +import io.micrometer.tracing.Span; +import io.micrometer.tracing.Tracer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.system.CapturedOutput; +import org.springframework.boot.test.system.OutputCaptureExtension; + +import static org.assertj.core.api.Assertions.assertThat; + +@SpringBootTest( + classes = LoggingCorrelationTest.TestApplication.class, + properties = { + "spring.main.web-application-type=none", + "spring.autoconfigure.exclude=" + + "org.springframework.boot.jdbc.autoconfigure.DataSourceAutoConfiguration," + + "org.springframework.boot.jpa.autoconfigure.HibernateJpaAutoConfiguration," + + "org.springframework.boot.data.jpa.autoconfigure.DataJpaRepositoriesAutoConfiguration," + + "org.springframework.boot.quartz.autoconfigure.QuartzAutoConfiguration," + + "org.springframework.boot.flyway.autoconfigure.FlywayAutoConfiguration" + } +) +@ExtendWith(OutputCaptureExtension.class) +class LoggingCorrelationTest { + + private static final Logger log = LoggerFactory.getLogger(LoggingCorrelationTest.class); + + @Autowired + private Tracer tracer; + + @Test + void logLineContainsTraceIdAndSpanIdWhenSpanIsActive(CapturedOutput output) { + Span span = tracer.nextSpan().name("logging-correlation-test").start(); + try (Tracer.SpanInScope scope = tracer.withSpan(span)) { + assertThat(scope).isNotNull(); + log.info("correlation-check-message"); + } finally { + span.end(); + } + + String logs = output.toString(); + assertThat(logs) + .contains("correlation-check-message") + .contains("traceId") + .contains("spanId"); + } + + @SpringBootConfiguration + @EnableAutoConfiguration + static class TestApplication { + } +} + + + diff --git a/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java b/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java index 87d736f..37c3bb3 100644 --- a/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java +++ b/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java @@ -2,6 +2,8 @@ import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import lt.satsyuk.config.KeycloakProperties; +import lt.satsyuk.dto.KeycloakTokenResponse; +import lt.satsyuk.dto.LoginRequest; import lt.satsyuk.dto.LogoutRequest; import lt.satsyuk.dto.RefreshRequest; import lt.satsyuk.exception.KeycloakAuthException; @@ -10,9 +12,12 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.springframework.http.HttpEntity; +import org.springframework.http.HttpMethod; import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.test.util.ReflectionTestUtils; +import org.springframework.test.web.client.MockRestServiceServer; import org.springframework.web.client.RestTemplate; import static org.assertj.core.api.Assertions.assertThat; @@ -20,7 +25,12 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.when; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.method; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.requestTo; +import static org.springframework.test.web.client.response.MockRestResponseCreators.withStatus; +import static org.springframework.test.web.client.response.MockRestResponseCreators.withSuccess; @ExtendWith(MockitoExtension.class) class KeycloakAuthServiceTest { @@ -28,6 +38,8 @@ class KeycloakAuthServiceTest { private static final String CLIENT_ID = "client"; private static final String CLIENT_SECRET = "secret"; private static final String REFRESH_TOKEN = "refresh"; + private static final String USERNAME = "user"; + private static final String PASSWORD = "password"; @Mock private RestTemplate rest; @@ -96,5 +108,89 @@ void extractErrorMessageHandlesNotAllowed() { assertThat(result).isEqualTo("not_allowed"); } + + @Test + void loginCountersKeepSuccessAndFailureTags() { + SimpleMeterRegistry registry = new SimpleMeterRegistry(); + when(props.getTokenUrl()).thenReturn("http://token"); + + RestTemplate realRestTemplate = new RestTemplate(); + MockRestServiceServer mockServer = MockRestServiceServer.bindTo(realRestTemplate).build(); + mockServer.expect(requestTo("http://token")) + .andExpect(method(HttpMethod.POST)) + .andRespond(withSuccess( + "{\"access_token\":\"a\",\"refresh_token\":\"r\",\"token_type\":\"Bearer\"}", + MediaType.APPLICATION_JSON + )); + mockServer.expect(requestTo("http://token")) + .andExpect(method(HttpMethod.POST)) + .andRespond(withStatus(HttpStatus.UNAUTHORIZED) + .contentType(MediaType.APPLICATION_JSON) + .body("{\"error\":\"invalid_client\"}")); + + KeycloakAuthService service = new KeycloakAuthService(realRestTemplate, props, registry); + service.login(new LoginRequest(USERNAME, PASSWORD, CLIENT_ID, CLIENT_SECRET)); + + assertThatThrownBy(() -> service.login(new LoginRequest(USERNAME, PASSWORD, CLIENT_ID, CLIENT_SECRET))) + .isInstanceOf(KeycloakAuthException.class); + + mockServer.verify(); + + assertThat(counterCount(registry, "auth.login", "success")).isEqualTo(1.0); + assertThat(counterCount(registry, "auth.login", "failure")).isEqualTo(1.0); + } + + @Test + void refreshCountersKeepSuccessAndFailureTags() { + SimpleMeterRegistry registry = new SimpleMeterRegistry(); + when(props.getTokenUrl()).thenReturn("http://token"); + + KeycloakTokenResponse tokenResponse = new KeycloakTokenResponse().setAccessToken("a").setRefreshToken("r"); + when(rest.postForEntity(anyString(), any(HttpEntity.class), eq(KeycloakTokenResponse.class))) + .thenReturn(ResponseEntity.ok(tokenResponse)); + + KeycloakAuthService service = new KeycloakAuthService(rest, props, registry); + service.refresh(new RefreshRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET)); + + doThrow(new org.springframework.web.client.HttpClientErrorException( + HttpStatus.BAD_REQUEST, + "bad request", + "{\"error\":\"invalid_grant\"}".getBytes(), + java.nio.charset.StandardCharsets.UTF_8 + )).when(rest).postForEntity(anyString(), any(HttpEntity.class), eq(KeycloakTokenResponse.class)); + + assertThatThrownBy(() -> service.refresh(new RefreshRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET))) + .isInstanceOf(KeycloakAuthException.class); + + assertThat(counterCount(registry, "auth.refresh", "success")).isEqualTo(1.0); + assertThat(counterCount(registry, "auth.refresh", "failure")).isEqualTo(1.0); + } + + @Test + void logoutCountersKeepSuccessAndFailureTags() { + SimpleMeterRegistry registry = new SimpleMeterRegistry(); + when(props.getLogoutUrl()).thenReturn("http://logout"); + when(rest.postForEntity(anyString(), any(HttpEntity.class), eq(String.class))) + .thenReturn(ResponseEntity.ok("")); + + KeycloakAuthService service = new KeycloakAuthService(rest, props, registry); + service.logout(new LogoutRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET)); + + when(rest.postForEntity(anyString(), any(HttpEntity.class), eq(String.class))) + .thenReturn(ResponseEntity.ok("invalid_token")); + + assertThatThrownBy(() -> service.logout(new LogoutRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET))) + .isInstanceOf(KeycloakAuthException.class); + + assertThat(counterCount(registry, "auth.logout", "success")).isEqualTo(1.0); + assertThat(counterCount(registry, "auth.logout", "failure")).isEqualTo(1.0); + } + + private double counterCount(SimpleMeterRegistry registry, String name, String resultTag) { + return registry.get(name) + .tag("result", resultTag) + .counter() + .count(); + } } From cf92191a400367b26bf35b2c29681a400f2af926 Mon Sep 17 00:00:00 2001 From: igorsatsyuk Date: Sun, 5 Apr 2026 12:38:01 +0300 Subject: [PATCH 2/6] Adopt OTLP-first observability and remove Promtail path --- CHANGELOG.md | 2 +- KODA.md | 1 - README.md | 43 +++++++++++++-- docker-compose.yaml | 15 +----- .../dashboards/logs-dashboard.json | 4 +- grafana/provisioning/datasources/loki.yaml | 2 +- grafana/provisioning/datasources/tempo.yaml | 4 +- otel.yaml | 12 ++++- promtail-config.yaml | 53 ------------------- src/main/resources/application.properties | 4 +- 10 files changed, 61 insertions(+), 79 deletions(-) delete mode 100644 promtail-config.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index a447d58..fd27d47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Structured JSON logging with Logstash encoder and trace/span IDs - Prometheus metrics via Spring Boot Actuator - Swagger/OpenAPI documentation (SpringDoc) -- Full observability stack via Docker Compose (Grafana, Prometheus, Tempo, Loki, Promtail) +- Full observability stack via Docker Compose (Grafana, Prometheus, Tempo, Loki) - Comprehensive integration tests with Testcontainers (Keycloak, PostgreSQL) - Negative testing with WireMock (network failures, timeouts, error responses) - Phone uniqueness validation with conflict handling diff --git a/KODA.md b/KODA.md index 579faa4..7158efa 100644 --- a/KODA.md +++ b/KODA.md @@ -51,7 +51,6 @@ The backend does not store client credentials for login/refresh/logout — the c ├── prometheus.yml — Prometheus configuration ├── tempo.yaml — Tempo configuration ├── loki-config.yaml — Loki configuration -├── promtail-config.yaml — Promtail configuration ├── otel.yaml — OpenTelemetry Collector configuration ├── src/main/java/lt/satsyuk/ — Source code │ ├── api/ — Controllers, DTOs diff --git a/README.md b/README.md index b55e755..f76c4cc 100644 --- a/README.md +++ b/README.md @@ -769,9 +769,46 @@ http://localhost:8081/actuator/prometheus http://localhost:8081/actuator/health ``` -**Tracing**: OpenTelemetry traces are exported to OTLP endpoint (configure in `application.properties`) - -**Logging**: Structured JSON logs with trace/span IDs via Logstash encoder +**OTLP-first Observability (recommended)** + +- traces: `Spring Boot -> OTLP -> OTel Collector -> Tempo` +- logs: `Spring Boot -> OTLP -> OTel Collector -> Loki` +- metrics: `Prometheus` pulls `/actuator/prometheus` + +```mermaid +flowchart LR + subgraph App[Spring Boot jwt-demo] + A1[HTTP metrics\nActuator /prometheus] + A2[Traces OTLP\nmanagement.otlp.tracing.endpoint] + A3[Logs OTLP\nmanagement.otlp.logging.endpoint] + end + + subgraph Infra[Observability Infra] + C[OTel Collector] + T[Tempo] + L[Loki] + P[Prometheus] + G[Grafana] + end + + A1 -->|pull /actuator/prometheus| P + A2 -->|OTLP traces| C + A3 -->|OTLP logs| C + C -->|traces| T + C -->|logs| L + + P --> G + T --> G + L --> G +``` + +**Recommended properties for Variant B** + +- `management.otlp.tracing.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces}` +- `management.otlp.tracing.export.enabled=true` +- `management.otlp.logging.endpoint=${MANAGEMENT_OTLP_LOGGING_ENDPOINT:http://localhost:4318/v1/logs}` +- `management.otlp.logging.export.enabled=true` +- `management.otlp.metrics.export.enabled=false` (avoid duplicate metric ingestion with Prometheus scrape) --- diff --git a/docker-compose.yaml b/docker-compose.yaml index 36310a9..5e8d63e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -76,6 +76,7 @@ services: KEYCLOAK_RESOURCE_CLIENT_SECRET: ${KEYCLOAK_RESOURCE_CLIENT_SECRET} MANAGEMENT_OTLP_TRACING_ENDPOINT: http://otel-collector:4318/v1/traces + MANAGEMENT_OTLP_LOGGING_ENDPOINT: http://otel-collector:4318/v1/logs depends_on: - keycloak - postgres-app @@ -122,20 +123,6 @@ services: - loki-data:/loki - loki-wal:/wal - # ------------------------------------------------------------ - # PROMTAIL (LOG SHIPPER) - # ------------------------------------------------------------ - promtail: - image: grafana/promtail:2.8.2 - container_name: promtail - volumes: - - /var/log:/var/log:ro - - /var/lib/docker/containers:/var/lib/docker/containers:ro - - /var/run/docker.sock:/var/run/docker.sock:ro - - ./promtail-config.yaml:/etc/promtail/promtail-config.yaml - command: -config.file=/etc/promtail/promtail-config.yaml - depends_on: - - loki # ------------------------------------------------------------ # PROMETHEUS (METRICS) diff --git a/grafana/provisioning/dashboards/logs-dashboard.json b/grafana/provisioning/dashboards/logs-dashboard.json index ee82fc9..38d14d7 100644 --- a/grafana/provisioning/dashboards/logs-dashboard.json +++ b/grafana/provisioning/dashboards/logs-dashboard.json @@ -30,11 +30,11 @@ }, "targets": [ { - "expr": "{job=\"jwt-demo\"}", + "expr": "{service_name=\"jwt-demo\"}", "refId": "A" } ], - "title": "Recent logs (jwt-demo)", + "title": "Recent logs (jwt-demo via OTLP)", "type": "logs" } ], diff --git a/grafana/provisioning/datasources/loki.yaml b/grafana/provisioning/datasources/loki.yaml index 41937e2..9d89553 100644 --- a/grafana/provisioning/datasources/loki.yaml +++ b/grafana/provisioning/datasources/loki.yaml @@ -13,4 +13,4 @@ datasources: derivedFields: - name: trace datasourceUid: tempo - matcher: "trace" + matcherRegex: '"traceId":"([a-f0-9]{32})"' diff --git a/grafana/provisioning/datasources/tempo.yaml b/grafana/provisioning/datasources/tempo.yaml index 3a50b03..300e48c 100644 --- a/grafana/provisioning/datasources/tempo.yaml +++ b/grafana/provisioning/datasources/tempo.yaml @@ -12,8 +12,8 @@ datasources: httpMethod: GET tracesToLogs: datasourceUid: 'loki' - tags: ['job', 'instance', 'pod', 'namespace'] - mappedTags: [{ key: 'service.name', value: 'service' }] + tags: ['service.name'] + mappedTags: [{ key: 'service.name', value: 'service_name' }] serviceMap: datasourceUid: 'Tempo' nodeGraph: diff --git a/otel.yaml b/otel.yaml index df7ef56..886f324 100644 --- a/otel.yaml +++ b/otel.yaml @@ -11,9 +11,19 @@ exporters: endpoint: tempo:4317 tls: insecure: true + loki: + endpoint: http://loki:3100/loki/api/v1/push + +processors: + batch: service: pipelines: traces: receivers: [otlp] - exporters: [otlp] \ No newline at end of file + processors: [batch] + exporters: [otlp] + logs: + receivers: [otlp] + processors: [batch] + exporters: [loki] diff --git a/promtail-config.yaml b/promtail-config.yaml deleted file mode 100644 index fb8ad2a..0000000 --- a/promtail-config.yaml +++ /dev/null @@ -1,53 +0,0 @@ -server: - http_listen_port: 9080 - grpc_listen_port: 0 - -positions: - filename: /tmp/positions.yaml - -clients: - - url: http://loki:3100/loki/api/v1/push - -scrape_configs: - - job_name: system - static_configs: - - targets: ['localhost'] - labels: - job: varlogs - __path__: /var/log/*log - - - job_name: docker - docker_sd_configs: - - host: unix:///var/run/docker.sock - refresh_interval: 5s - relabel_configs: - - source_labels: ['__meta_docker_container_label_com_docker_swarm_service_name'] - target_label: service - - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] - target_label: compose_service - - source_labels: ['__meta_docker_container_label_com_docker_compose_project'] - target_label: compose_project - - source_labels: ['__meta_docker_container_name'] - target_label: container - - source_labels: ['__meta_docker_container_label_io_kubernetes_pod_namespace'] - target_label: namespace - - source_labels: ['__meta_docker_container_label_io_kubernetes_pod_name'] - target_label: pod - - source_labels: ['__meta_docker_container_label_io_kubernetes_pod_container_name'] - target_label: container_name - - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] - target_label: job - - source_labels: ['__meta_docker_container_id'] - target_label: container_id - - source_labels: ['__path__'] - target_label: __path__ - - # WSL-aware job: if you run Docker Desktop with WSL2 or run Promtail inside WSL, - # контейнерные логи обычно доступны по пути /var/lib/docker/containers/*/*.log - # Этот job полезен, если docker_sd_configs не доступен (например, Promtail вне docker.sock). - - job_name: docker-wsl - static_configs: - - targets: ['localhost'] - labels: - job: docker-wsl - __path__: /var/lib/docker/containers/*/*.log diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 4bb282c..2affec6 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -89,8 +89,10 @@ management.metrics.web.server.request.autotime.percentiles=0.5,0.95,0.99 management.tracing.sampling.probability=1.0 logging.pattern.level=%5p [${spring.application.name:},%X{traceId:-},%X{spanId:-}] management.otlp.tracing.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces} +management.otlp.tracing.export.enabled=true management.otlp.metrics.export.enabled=false -management.otlp.logging.export.enabled=false +management.otlp.logging.endpoint=${MANAGEMENT_OTLP_LOGGING_ENDPOINT:http://localhost:4318/v1/logs} +management.otlp.logging.export.enabled=true # ------------------------------------------------------------ # RATE LIMITING (custom Bucket4j filter) From 86b9d21588d8d86ebeeacd981edfcfe574c9917a Mon Sep 17 00:00:00 2001 From: igorsatsyuk Date: Sun, 5 Apr 2026 15:27:59 +0300 Subject: [PATCH 3/6] Fix Sonar lambdas in KeycloakAuthServiceTest --- .../java/lt/satsyuk/service/KeycloakAuthServiceTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java b/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java index 37c3bb3..d16fe56 100644 --- a/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java +++ b/src/test/java/lt/satsyuk/service/KeycloakAuthServiceTest.java @@ -131,7 +131,8 @@ void loginCountersKeepSuccessAndFailureTags() { KeycloakAuthService service = new KeycloakAuthService(realRestTemplate, props, registry); service.login(new LoginRequest(USERNAME, PASSWORD, CLIENT_ID, CLIENT_SECRET)); - assertThatThrownBy(() -> service.login(new LoginRequest(USERNAME, PASSWORD, CLIENT_ID, CLIENT_SECRET))) + LoginRequest failedLoginRequest = new LoginRequest(USERNAME, PASSWORD, CLIENT_ID, CLIENT_SECRET); + assertThatThrownBy(() -> service.login(failedLoginRequest)) .isInstanceOf(KeycloakAuthException.class); mockServer.verify(); @@ -159,7 +160,8 @@ void refreshCountersKeepSuccessAndFailureTags() { java.nio.charset.StandardCharsets.UTF_8 )).when(rest).postForEntity(anyString(), any(HttpEntity.class), eq(KeycloakTokenResponse.class)); - assertThatThrownBy(() -> service.refresh(new RefreshRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET))) + RefreshRequest failedRefreshRequest = new RefreshRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET); + assertThatThrownBy(() -> service.refresh(failedRefreshRequest)) .isInstanceOf(KeycloakAuthException.class); assertThat(counterCount(registry, "auth.refresh", "success")).isEqualTo(1.0); @@ -179,7 +181,8 @@ void logoutCountersKeepSuccessAndFailureTags() { when(rest.postForEntity(anyString(), any(HttpEntity.class), eq(String.class))) .thenReturn(ResponseEntity.ok("invalid_token")); - assertThatThrownBy(() -> service.logout(new LogoutRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET))) + LogoutRequest failedLogoutRequest = new LogoutRequest(REFRESH_TOKEN, CLIENT_ID, CLIENT_SECRET); + assertThatThrownBy(() -> service.logout(failedLogoutRequest)) .isInstanceOf(KeycloakAuthException.class); assertThat(counterCount(registry, "auth.logout", "success")).isEqualTo(1.0); From 8625e52ed257cf75700db2bcc31785708fc7c46b Mon Sep 17 00:00:00 2001 From: igorsatsyuk Date: Sun, 5 Apr 2026 18:30:27 +0300 Subject: [PATCH 4/6] Finalize OTLP-only observability and fix Grafana dashboards --- README.md | 12 ++++----- docker-compose.yaml | 1 + .../provisioning/dashboards/app-metrics.json | 19 +++++++++++--- .../application-metrics-dashboard.json | 26 +++++++++---------- .../dashboards/logs-dashboard.json | 4 +++ .../dashboards/traces-dashboard.json | 12 ++++----- otel.yaml | 21 +++++++++++++-- pom.xml | 7 +++++ .../lt/satsyuk/config/OtelLogbackConfig.java | 21 +++++++++++++++ src/main/resources/application.properties | 8 +++--- src/main/resources/logback-spring.xml | 3 +++ 11 files changed, 99 insertions(+), 35 deletions(-) create mode 100644 src/main/java/lt/satsyuk/config/OtelLogbackConfig.java diff --git a/README.md b/README.md index f76c4cc..694fe9c 100644 --- a/README.md +++ b/README.md @@ -779,8 +779,8 @@ http://localhost:8081/actuator/health flowchart LR subgraph App[Spring Boot jwt-demo] A1[HTTP metrics\nActuator /prometheus] - A2[Traces OTLP\nmanagement.otlp.tracing.endpoint] - A3[Logs OTLP\nmanagement.otlp.logging.endpoint] + A2[Traces OTLP\nmanagement.opentelemetry.tracing.export.otlp.endpoint] + A3[Logs OTLP\nmanagement.opentelemetry.logging.export.otlp.endpoint] end subgraph Infra[Observability Infra] @@ -804,10 +804,10 @@ flowchart LR **Recommended properties for Variant B** -- `management.otlp.tracing.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces}` -- `management.otlp.tracing.export.enabled=true` -- `management.otlp.logging.endpoint=${MANAGEMENT_OTLP_LOGGING_ENDPOINT:http://localhost:4318/v1/logs}` -- `management.otlp.logging.export.enabled=true` +- `management.opentelemetry.tracing.export.otlp.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces}` +- `management.tracing.export.otlp.enabled=true` +- `management.opentelemetry.logging.export.otlp.endpoint=${MANAGEMENT_OTLP_LOGGING_ENDPOINT:http://localhost:4318/v1/logs}` +- `management.logging.export.otlp.enabled=true` - `management.otlp.metrics.export.enabled=false` (avoid duplicate metric ingestion with Prometheus scrape) --- diff --git a/docker-compose.yaml b/docker-compose.yaml index 5e8d63e..4453b79 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -77,6 +77,7 @@ services: MANAGEMENT_OTLP_TRACING_ENDPOINT: http://otel-collector:4318/v1/traces MANAGEMENT_OTLP_LOGGING_ENDPOINT: http://otel-collector:4318/v1/logs + OTEL_RESOURCE_ATTRIBUTES: service.name=jwt-demo,service.namespace=jwt-demo,deployment.environment=local depends_on: - keycloak - postgres-app diff --git a/grafana/provisioning/dashboards/app-metrics.json b/grafana/provisioning/dashboards/app-metrics.json index 9b4cc5d..0377f5d 100644 --- a/grafana/provisioning/dashboards/app-metrics.json +++ b/grafana/provisioning/dashboards/app-metrics.json @@ -659,7 +659,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "histogram_quantile(.99,sum(rate(http_server_requests_seconds_bucket{job=\"$job\", uri!=\"/actuator/prometheus\"}[1m])) by(uri, le))", + "expr": "histogram_quantile(.99,sum(rate(http_server_requests_seconds_bucket{job=\"$job\", uri!=\"/actuator/prometheus\"}[1m])) by(uri, le)) or avg by(uri) (http_server_requests_seconds{job=\"$job\", uri!=\"/actuator/prometheus\", quantile=\"0.99\"})", "interval": "", "legendFormat": "{{uri}}", "range": true, @@ -755,7 +755,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "histogram_quantile(.95,sum(rate(http_server_requests_seconds_bucket{job=\"$job\", uri!=\"/actuator/prometheus\"}[1m])) by(uri, le))", + "expr": "histogram_quantile(.95,sum(rate(http_server_requests_seconds_bucket{job=\"$job\", uri!=\"/actuator/prometheus\"}[1m])) by(uri, le)) or avg by(uri) (http_server_requests_seconds{job=\"$job\", uri!=\"/actuator/prometheus\", quantile=\"0.95\"})", "interval": "", "legendFormat": "{{uri}}", "range": true, @@ -992,7 +992,7 @@ "uid": "loki" }, "editorMode": "code", - "expr": "sum by(level) (rate({job=\"jwt-demo\"} |= \"$log_keyword\" | json | level != \"\" [1m]))", + "expr": "sum by(level) (rate({service_name=\"jwt-demo\",level=~\".+\"} |= \"$log_keyword\" != \"/actuator/prometheus\" [1m])) + sum by(level) (rate({job=~\".*jwt-demo.*\",level=~\".+\"} |= \"$log_keyword\" != \"/actuator/prometheus\" [1m]))", "legendFormat": "{{level}}", "queryType": "range", "refId": "A" @@ -1031,10 +1031,21 @@ "uid": "loki" }, "editorMode": "code", - "expr": "{job=\"jwt-demo\"} |= \"$log_keyword\" | json | line_format \"{{ index . \\\"@timestamp\\\" }}\\t{{.logger_name}}\\t{{.level}}\\ttrace_id={{.traceId}}\\tspan_id={{.spanId}}\\t{{.message}}\"", + "expr": "{service_name=\"jwt-demo\"} |= \"$log_keyword\" != \"/actuator/prometheus\" | json | line_format \"{{.severity}}\\t{{.body}}\"", "hide": false, "queryType": "range", "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": "{job=~\".*jwt-demo.*\",service_name!~\".+\"} |= \"$log_keyword\" != \"/actuator/prometheus\" | json | line_format \"{{.severity}}\\t{{.body}}\"", + "hide": false, + "queryType": "range", + "refId": "B" } ], "title": "Log of All Spring Boot Apps", diff --git a/grafana/provisioning/dashboards/application-metrics-dashboard.json b/grafana/provisioning/dashboards/application-metrics-dashboard.json index 84912c5..ac619d3 100644 --- a/grafana/provisioning/dashboards/application-metrics-dashboard.json +++ b/grafana/provisioning/dashboards/application-metrics-dashboard.json @@ -437,7 +437,7 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by(uri, le) (rate(http_server_requests_seconds_bucket{job=\"jwt-demo\",uri!=\"/actuator/prometheus\"}[5m]))) * 1000", + "expr": "(histogram_quantile(0.95, sum by(uri, le) (rate(http_server_requests_seconds_bucket{job=\"jwt-demo\",uri!=\"/actuator/prometheus\"}[5m]))) * 1000) or (avg by(uri) (http_server_requests_seconds{job=\"jwt-demo\",uri!=\"/actuator/prometheus\",quantile=\"0.95\"}) * 1000)", "legendFormat": "{{uri}}", "refId": "A" } @@ -2011,7 +2011,7 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "sum by(cache) (rate(cache_gets_total{job=\"jwt-demo\",result=\"hit\"}[5m])) / sum by(cache) (rate(cache_gets_total{job=\"jwt-demo\"}[5m]))", + "expr": "(sum by(cache) (rate(cache_gets_total{job=\"jwt-demo\",result=\"hit\"}[5m])) / clamp_min(sum by(cache) (rate(cache_gets_total{job=\"jwt-demo\"}[5m])), 1e-9)) or on() vector(0)", "legendFormat": "{{cache}}", "refId": "A" } @@ -2098,7 +2098,7 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "rate(cache_evictions_total{job=\"jwt-demo\"}[5m])", + "expr": "rate(cache_evictions_total{job=\"jwt-demo\"}[5m]) or on() vector(0)", "legendFormat": "{{cache}}", "refId": "A" } @@ -2184,7 +2184,7 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "bucket4j_summary_consumed_total{job=\"jwt-demo\"}", + "expr": "bucket4j_summary_available_tokens{job=\"jwt-demo\"} or bucket4j_summary_consumed_total{job=\"jwt-demo\"}", "legendFormat": "{{id}}", "refId": "A" } @@ -2271,7 +2271,7 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "rate(bucket4j_summary_rejected_total{job=\"jwt-demo\"}[5m])", + "expr": "rate(bucket4j_summary_rejected_total{job=\"jwt-demo\"}[5m]) or on() vector(0)", "legendFormat": "{{id}}", "refId": "A" } @@ -2358,7 +2358,7 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "sum by(uri) (rate(http_client_requests_seconds_count{job=\"jwt-demo\"}[5m]))", + "expr": "sum by(uri) (rate(http_client_requests_seconds_count{job=\"jwt-demo\"}[5m])) or on() vector(0)", "legendFormat": "{{uri}}", "refId": "A" } @@ -2445,7 +2445,7 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by(uri, le) (rate(http_client_requests_seconds_bucket{job=\"jwt-demo\"}[5m]))) * 1000", + "expr": "(histogram_quantile(0.95, sum by(uri, le) (rate(http_client_requests_seconds_bucket{job=\"jwt-demo\"}[5m]))) * 1000) or (avg by(uri) (http_client_requests_seconds{job=\"jwt-demo\",quantile=\"0.95\"}) * 1000) or on() vector(0)", "legendFormat": "{{uri}}", "refId": "A" } @@ -2532,12 +2532,12 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "rate(container_cpu_usage_seconds_total{container=~\"jwt-demo.*\"}[5m])", - "legendFormat": "{{container}}", + "expr": "process_cpu_usage{job=\"jwt-demo\"}", + "legendFormat": "process_cpu_usage", "refId": "A" } ], - "title": "Container CPU Usage", + "title": "Process CPU Usage", "type": "timeseries" }, { @@ -2619,12 +2619,12 @@ "type": "prometheus", "uid": "prometheus" }, - "expr": "container_memory_working_set_bytes{container=~\"jwt-demo.*\"}", - "legendFormat": "{{container}}", + "expr": "sum(jvm_memory_used_bytes{job=\"jwt-demo\",area=\"heap\"})", + "legendFormat": "jvm_heap_used", "refId": "A" } ], - "title": "Container Memory Working Set", + "title": "JVM Heap Used", "type": "timeseries" } ], diff --git a/grafana/provisioning/dashboards/logs-dashboard.json b/grafana/provisioning/dashboards/logs-dashboard.json index 38d14d7..1cd3708 100644 --- a/grafana/provisioning/dashboards/logs-dashboard.json +++ b/grafana/provisioning/dashboards/logs-dashboard.json @@ -32,6 +32,10 @@ { "expr": "{service_name=\"jwt-demo\"}", "refId": "A" + }, + { + "expr": "{job=~\".*jwt-demo.*\",service_name!~\".+\"}", + "refId": "B" } ], "title": "Recent logs (jwt-demo via OTLP)", diff --git a/grafana/provisioning/dashboards/traces-dashboard.json b/grafana/provisioning/dashboards/traces-dashboard.json index da0ee6e..16a0a51 100644 --- a/grafana/provisioning/dashboards/traces-dashboard.json +++ b/grafana/provisioning/dashboards/traces-dashboard.json @@ -78,7 +78,7 @@ "limit": 20, "queryType": "traceql", "refId": "A", - "query": "{}" + "query": "{ span.http.route != \"/actuator/prometheus\" && name !~ \".*actuator/prometheus.*\" }" } ], "title": "Traces by Service", @@ -138,7 +138,7 @@ "limit": 20, "queryType": "traceql", "refId": "A", - "query": "{}" + "query": "{ name != \"http get /actuator/prometheus\" }" } ], "title": "Total Traces", @@ -226,7 +226,7 @@ "limit": 20, "queryType": "traceql", "refId": "A", - "query": "{}" + "query": "{ name != \"http get /actuator/prometheus\" }" } ], "title": "Request Duration (P95) by Service", @@ -313,7 +313,7 @@ "limit": 20, "queryType": "traceql", "refId": "A", - "query": "{}" + "query": "{ name != \"http get /actuator/prometheus\" }" } ], "title": "Request Rate by Service", @@ -391,7 +391,7 @@ "limit": 20, "queryType": "traceql", "refId": "A", - "query": "{ status = error }" + "query": "{ status = error && name != \"http get /actuator/prometheus\" }" } ], "title": "Error Traces", @@ -503,7 +503,7 @@ "limit": 50, "queryType": "traceql", "refId": "A", - "query": "{}" + "query": "{ name != \"http get /actuator/prometheus\" }" } ], "title": "Recent Traces", diff --git a/otel.yaml b/otel.yaml index 886f324..563c2df 100644 --- a/otel.yaml +++ b/otel.yaml @@ -15,15 +15,32 @@ exporters: endpoint: http://loki:3100/loki/api/v1/push processors: + memory_limiter: + check_interval: 1s + limit_mib: 256 + spike_limit_mib: 64 batch: + timeout: 2s + send_batch_size: 1024 + resource/logs: + attributes: + - action: upsert + key: service.name + value: jwt-demo + - action: upsert + key: loki.resource.labels + value: service.name,service.namespace,service.instance.id + - action: upsert + key: loki.attribute.labels + value: level,logger_name,traceId,spanId service: pipelines: traces: receivers: [otlp] - processors: [batch] + processors: [memory_limiter, batch] exporters: [otlp] logs: receivers: [otlp] - processors: [batch] + processors: [memory_limiter, resource/logs, batch] exporters: [loki] diff --git a/pom.xml b/pom.xml index 53163af..d246c22 100644 --- a/pom.xml +++ b/pom.xml @@ -156,6 +156,13 @@ 7.4 + + + io.opentelemetry.instrumentation + opentelemetry-logback-appender-1.0 + 2.26.1-alpha + + org.springframework.boot diff --git a/src/main/java/lt/satsyuk/config/OtelLogbackConfig.java b/src/main/java/lt/satsyuk/config/OtelLogbackConfig.java new file mode 100644 index 0000000..44ec925 --- /dev/null +++ b/src/main/java/lt/satsyuk/config/OtelLogbackConfig.java @@ -0,0 +1,21 @@ +package lt.satsyuk.config; + +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.instrumentation.logback.appender.v1_0.OpenTelemetryAppender; +import jakarta.annotation.PostConstruct; +import lombok.RequiredArgsConstructor; +import org.springframework.context.annotation.Configuration; + +@Configuration +@RequiredArgsConstructor +public class OtelLogbackConfig { + + private final OpenTelemetry openTelemetry; + + @PostConstruct + void installLogbackAppender() { + // Binds Logback appender to the OpenTelemetry instance configured by Spring Boot. + OpenTelemetryAppender.install(openTelemetry); + } +} + diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 2affec6..d1aa833 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -88,11 +88,11 @@ management.metrics.web.server.request.autotime.percentiles=0.5,0.95,0.99 # ------------------------------------------------------------ management.tracing.sampling.probability=1.0 logging.pattern.level=%5p [${spring.application.name:},%X{traceId:-},%X{spanId:-}] -management.otlp.tracing.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces} -management.otlp.tracing.export.enabled=true +management.opentelemetry.tracing.export.otlp.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces} +management.tracing.export.otlp.enabled=true management.otlp.metrics.export.enabled=false -management.otlp.logging.endpoint=${MANAGEMENT_OTLP_LOGGING_ENDPOINT:http://localhost:4318/v1/logs} -management.otlp.logging.export.enabled=true +management.opentelemetry.logging.export.otlp.endpoint=${MANAGEMENT_OTLP_LOGGING_ENDPOINT:http://localhost:4318/v1/logs} +management.logging.export.otlp.enabled=true # ------------------------------------------------------------ # RATE LIMITING (custom Bucket4j filter) diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml index 5df126d..0534719 100644 --- a/src/main/resources/logback-spring.xml +++ b/src/main/resources/logback-spring.xml @@ -19,7 +19,10 @@ + + + From 30c5f514c37ef295bf482620b51b383b9859df32 Mon Sep 17 00:00:00 2001 From: igorsatsyuk Date: Sun, 5 Apr 2026 18:49:39 +0300 Subject: [PATCH 5/6] Address PR review: LogQL union and README wording --- README.md | 2 +- grafana/provisioning/dashboards/app-metrics.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 694fe9c..bd6d592 100644 --- a/README.md +++ b/README.md @@ -802,7 +802,7 @@ flowchart LR L --> G ``` -**Recommended properties for Variant B** +**Recommended properties** - `management.opentelemetry.tracing.export.otlp.endpoint=${MANAGEMENT_OTLP_TRACING_ENDPOINT:http://localhost:4318/v1/traces}` - `management.tracing.export.otlp.enabled=true` diff --git a/grafana/provisioning/dashboards/app-metrics.json b/grafana/provisioning/dashboards/app-metrics.json index 0377f5d..afba424 100644 --- a/grafana/provisioning/dashboards/app-metrics.json +++ b/grafana/provisioning/dashboards/app-metrics.json @@ -992,7 +992,7 @@ "uid": "loki" }, "editorMode": "code", - "expr": "sum by(level) (rate({service_name=\"jwt-demo\",level=~\".+\"} |= \"$log_keyword\" != \"/actuator/prometheus\" [1m])) + sum by(level) (rate({job=~\".*jwt-demo.*\",level=~\".+\"} |= \"$log_keyword\" != \"/actuator/prometheus\" [1m]))", + "expr": "sum by(level) (rate({service_name=\"jwt-demo\",level=~\".+\"} |= \"$log_keyword\" != \"/actuator/prometheus\" [1m]) or rate({job=~\".*jwt-demo.*\",level=~\".+\"} |= \"$log_keyword\" != \"/actuator/prometheus\" [1m]))", "legendFormat": "{{level}}", "queryType": "range", "refId": "A" From 75a3367ed81ef1f90dc77723a56c104cb11295bb Mon Sep 17 00:00:00 2001 From: igorsatsyuk Date: Sun, 5 Apr 2026 22:04:56 +0300 Subject: [PATCH 6/6] fix(pom): downgrade otel logback appender to 2.21.0-alpha --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d246c22..692b9ed 100644 --- a/pom.xml +++ b/pom.xml @@ -160,7 +160,7 @@ io.opentelemetry.instrumentation opentelemetry-logback-appender-1.0 - 2.26.1-alpha + 2.21.0-alpha