From c1987b8632128aa46433f201f1b05772728e202b Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Wed, 6 May 2026 11:56:07 +0300 Subject: [PATCH] feat: add otel examples table & queryservice --- trace/README.md | 96 +++++ trace/application/query-service/Dockerfile | 13 + trace/application/query-service/README.md | 8 + trace/application/query-service/pom.xml | 97 +++++ .../ydb/trace/query/QueryServiceTraceApp.java | 188 ++++++++++ .../application/table-client-impl/Dockerfile | 13 + trace/application/table-client-impl/README.md | 8 + trace/application/table-client-impl/pom.xml | 97 +++++ .../trace/table/TableClientImplTraceApp.java | 198 ++++++++++ trace/compose-e2e.yaml | 137 +++++++ trace/grafana/dashboards/README.md | 3 + .../provisioning/dashboards/dashboards.yaml | 11 + .../provisioning/datasources/datasources.yaml | 20 + trace/otel/otel-collector-config.yaml | 44 +++ trace/prometheus/prometheus.yaml | 7 + trace/tempo/tempo.yaml | 15 + trace/ydb/README.md | 28 ++ trace/ydb/otel-tracing-snippet.yaml | 26 ++ trace/ydb/ydb-config.yaml | 354 ++++++++++++++++++ 19 files changed, 1363 insertions(+) create mode 100644 trace/README.md create mode 100644 trace/application/query-service/Dockerfile create mode 100644 trace/application/query-service/README.md create mode 100644 trace/application/query-service/pom.xml create mode 100644 trace/application/query-service/src/main/java/tech/ydb/trace/query/QueryServiceTraceApp.java create mode 100644 trace/application/table-client-impl/Dockerfile create mode 100644 trace/application/table-client-impl/README.md create mode 100644 trace/application/table-client-impl/pom.xml create mode 100644 trace/application/table-client-impl/src/main/java/tech/ydb/trace/table/TableClientImplTraceApp.java create mode 100644 trace/compose-e2e.yaml create mode 100644 trace/grafana/dashboards/README.md create mode 100644 trace/grafana/provisioning/dashboards/dashboards.yaml create mode 100644 trace/grafana/provisioning/datasources/datasources.yaml create mode 100644 trace/otel/otel-collector-config.yaml create mode 100644 trace/prometheus/prometheus.yaml create mode 100644 trace/tempo/tempo.yaml create mode 100644 trace/ydb/README.md create mode 100644 trace/ydb/otel-tracing-snippet.yaml create mode 100644 trace/ydb/ydb-config.yaml diff --git a/trace/README.md b/trace/README.md new file mode 100644 index 0000000..e31dc02 --- /dev/null +++ b/trace/README.md @@ -0,0 +1,96 @@ +# Tracing Infrastructure + +This folder contains local tracing infrastructure for examples: + +- YDB +- OpenTelemetry Collector +- Tempo +- Prometheus +- Grafana +- QueryService demo (`trace/application/query-service`, programmatic OTel) +- TableClientImpl demo (`trace/application/table-client-impl`, programmatic OTel) +- JDBC demo (`trace/application/jdbc-trace`, programmatic OTel + `enableOpenTelemetryTracer` on the URL) +- Spring Data JPA / Hibernate 6 (`trace/application/spring-data-jpa-v6-trace`, Spring Boot 3 + same OTel wiring) +- Spring Data JDBC (`trace/application/spring-data-jdbc-trace`, Spring Boot 3 + Flyway + same OTel wiring) + +Application runners are located in: + +- `trace/application/query-service/` +- `trace/application/table-client-impl/` +- `trace/application/jdbc-trace/` +- `trace/application/spring-data-jpa-v6-trace/` +- `trace/application/spring-data-jdbc-trace/` + +Prerequisite for SDK snapshots: + +```bash +mvn install -DskipTests +``` + +Trace applications are pinned to `ydb-sdk-*` version `2.4.1-SNAPSHOT`. Demos that use JDBC (`jdbc-trace`, Spring Data +modules) expect a matching `ydb-jdbc-driver` snapshot (see each module `pom.xml`); install it locally if it is not in +your cache. + +Spring Kotlin examples use Kotlin **2.2.x** (compatible with JDK 25 build hosts). + +Build trace applications locally (uses your local Maven cache with snapshots): + +```bash +mvn -B -f trace/application/query-service/pom.xml clean package -DskipTests +mvn -B -f trace/application/table-client-impl/pom.xml clean package -DskipTests +mvn -B -f trace/application/jdbc-trace/pom.xml clean package -DskipTests +mvn -B -f trace/application/spring-data-jpa-v6-trace/pom.xml clean package -DskipTests +mvn -B -f trace/application/spring-data-jdbc-trace/pom.xml clean package -DskipTests +``` + +## Run + +```bash +docker compose -f trace/compose-e2e.yaml up -d +``` + +## Run QueryService demo + +```bash +docker-compose -f trace/compose-e2e.yaml run --rm query-service-trace +``` + +## Run TableClientImpl demo + +```bash +docker-compose -f trace/compose-e2e.yaml run --rm table-client-impl-trace +``` + +## Run JDBC demo + +```bash +docker-compose -f trace/compose-e2e.yaml run --rm jdbc-trace +``` + +## Run Spring Data JPA (v6) demo + +```bash +docker-compose -f trace/compose-e2e.yaml run --rm spring-data-jpa-v6-trace +``` + +## Run Spring Data JDBC demo + +```bash +docker-compose -f trace/compose-e2e.yaml run --rm spring-data-jdbc-trace +``` + +## Run demos + +```bash +docker-compose -f trace/compose-e2e.yaml run --rm query-service-trace +docker-compose -f trace/compose-e2e.yaml run --rm table-client-impl-trace +docker-compose -f trace/compose-e2e.yaml run --rm jdbc-trace +docker-compose -f trace/compose-e2e.yaml run --rm spring-data-jpa-v6-trace +docker-compose -f trace/compose-e2e.yaml run --rm spring-data-jdbc-trace +``` + +## Stop + +```bash +docker compose -f trace/compose-e2e.yaml down +``` diff --git a/trace/application/query-service/Dockerfile b/trace/application/query-service/Dockerfile new file mode 100644 index 0000000..e1c7344 --- /dev/null +++ b/trace/application/query-service/Dockerfile @@ -0,0 +1,13 @@ +FROM maven:3.9.11-eclipse-temurin-21 AS build +WORKDIR /workspace + +COPY . . +RUN mvn -B -f trace/application/query-service/pom.xml clean package -DskipTests + +FROM eclipse-temurin:21-jre +WORKDIR /app + +COPY --from=build /workspace/trace/application/query-service/target/trace-query-service.jar /app/trace-query-service.jar +COPY --from=build /workspace/trace/application/query-service/target/libs /app/libs + +ENTRYPOINT ["java", "-cp", "/app/trace-query-service.jar:/app/libs/*", "tech.ydb.trace.query.QueryServiceTraceApp"] diff --git a/trace/application/query-service/README.md b/trace/application/query-service/README.md new file mode 100644 index 0000000..b5f5d69 --- /dev/null +++ b/trace/application/query-service/README.md @@ -0,0 +1,8 @@ +# QueryService trace demo + +This demo is a standalone Java application with programmatic OpenTelemetry setup. +It configures OTLP exporter in code and adds `GrpcTelemetry` interceptor to the YDB gRPC channel builder. + +- Source style: `query-example` scenario (implemented locally in `trace/`) +- Main class: `tech.ydb.trace.query.QueryServiceTraceApp` +- Service name in traces: `ydb-query-service-example` diff --git a/trace/application/query-service/pom.xml b/trace/application/query-service/pom.xml new file mode 100644 index 0000000..fdaa088 --- /dev/null +++ b/trace/application/query-service/pom.xml @@ -0,0 +1,97 @@ + + + 4.0.0 + + + tech.ydb.examples + ydb-sdk-examples + 1.1.0-SNAPSHOT + ../../../pom.xml + + + trace-query-service + Trace QueryService Demo + + + 2.4.1-SNAPSHOT + 1.58.0 + 2.2.20 + + + + + tech.ydb + ydb-sdk-query + + + tech.ydb.auth + yc-auth-provider + + + org.apache.logging.log4j + log4j-slf4j-impl + + + + io.opentelemetry + opentelemetry-api + + + io.opentelemetry + opentelemetry-sdk + + + io.opentelemetry + opentelemetry-exporter-otlp + + + org.jetbrains.kotlin + kotlin-stdlib + ${kotlin.version} + + + + + + + tech.ydb + ydb-sdk-bom + ${ydb.sdk.version} + pom + import + + + io.opentelemetry + opentelemetry-bom + ${opentelemetry.version} + pom + import + + + + + + trace-query-service + + + org.apache.maven.plugins + maven-dependency-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + libs/ + tech.ydb.trace.query.QueryServiceTraceApp + + + + + + + diff --git a/trace/application/query-service/src/main/java/tech/ydb/trace/query/QueryServiceTraceApp.java b/trace/application/query-service/src/main/java/tech/ydb/trace/query/QueryServiceTraceApp.java new file mode 100644 index 0000000..75cee81 --- /dev/null +++ b/trace/application/query-service/src/main/java/tech/ydb/trace/query/QueryServiceTraceApp.java @@ -0,0 +1,188 @@ +package tech.ydb.trace.query; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.api.trace.propagation.W3CTraceContextPropagator; +import io.opentelemetry.context.Scope; +import io.opentelemetry.context.propagation.ContextPropagators; +import io.opentelemetry.exporter.otlp.trace.OtlpGrpcSpanExporter; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.resources.Resource; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; +import io.opentelemetry.sdk.trace.export.SpanExporter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import tech.ydb.auth.iam.CloudAuthHelper; +import tech.ydb.common.transaction.TxMode; +import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.core.tracing.OpenTelemetryTracer; +import tech.ydb.query.QueryClient; +import tech.ydb.query.QueryTransaction; +import tech.ydb.query.tools.QueryReader; +import tech.ydb.query.tools.SessionRetryContext; +import tech.ydb.table.query.Params; +import tech.ydb.table.result.ResultSetReader; +import tech.ydb.table.values.PrimitiveValue; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +public final class QueryServiceTraceApp { + private static final Logger logger = LoggerFactory.getLogger(QueryServiceTraceApp.class); + private static final String TABLE = "bank_query_service"; + private static final String ACTIVITY_SOURCE = "Ydb.Sdk.Java.QueryService.OpenTelemetry.Sample"; + + private QueryServiceTraceApp() { + } + + public static void main(String[] args) { + String connectionString = "grpc://ydb:2136/local"; + String serviceName = "ydb-sdk-sample-query"; + String otelEndpoint = "http://otel-collector:4317"; + String serviceVersion = resolveServiceVersion(); + + OpenTelemetrySdk openTelemetry = createOpenTelemetry(serviceName, serviceVersion, otelEndpoint); + Tracer appTracer = openTelemetry.getTracer(ACTIVITY_SOURCE); + + try (GrpcTransport transport = GrpcTransport.forConnectionString(connectionString) + .withAuthProvider(CloudAuthHelper.getAuthProviderFromEnviron()) + .withTracer(OpenTelemetryTracer.fromOpenTelemetry(openTelemetry)) + .build(); + QueryClient queryClient = QueryClient.newClient(transport).build()) { + + SessionRetryContext retryCtx = SessionRetryContext.create(queryClient).build(); + runScenario(retryCtx, appTracer, serviceName); + System.out.println("App finished."); + } finally { + openTelemetry.close(); + } + } + + private static void runScenario(SessionRetryContext retryCtx, Tracer appTracer, String serviceName) { + System.out.println("[" + java.time.OffsetDateTime.now() + "] started, service.name=" + serviceName); + System.out.println("Initializing..."); + + Span startupSpan = appTracer.spanBuilder("app.startup").startSpan(); + try (Scope ignored = startupSpan.makeCurrent()) { + startupSpan.setAttribute("app.message", "hello"); + safeDropTable(retryCtx); + createTable(retryCtx); + } finally { + startupSpan.end(); + } + + System.out.println("Insert row..."); + retryCtx.supplyResult(session -> session.createQuery( + "INSERT INTO " + TABLE + "(id, amount) VALUES (1, 0);", + TxMode.SERIALIZABLE_RW + ).execute()).join().getStatus().expectSuccess("insert failed"); + + System.out.println("Preparing queries..."); + incrementInTransaction(retryCtx); + + System.out.println("Emulation TLI..."); + List> tasks = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + final int taskNum = i; + tasks.add(CompletableFuture.runAsync(() -> { + Span concurrent = appTracer.spanBuilder("example_tli").startSpan(); + try (Scope ignored = concurrent.makeCurrent()) { + concurrent.setAttribute("app.message", "concurrent task " + taskNum); + incrementInTransaction(retryCtx); + } finally { + concurrent.end(); + } + })); + } + CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).join(); + + System.out.println("Retry connection example..."); + int amount = readAmount(retryCtx); + logger.info("Current amount={}", amount); + } + + private static void createTable(SessionRetryContext retryCtx) { + retryCtx.supplyResult(session -> session.createQuery( + "CREATE TABLE " + TABLE + "(id Int32, amount Int32, PRIMARY KEY (id));", + TxMode.NONE + ).execute()).join().getStatus().expectSuccess("create table failed"); + } + + private static void safeDropTable(SessionRetryContext retryCtx) { + try { + retryCtx.supplyResult(session -> session.createQuery( + "DROP TABLE " + TABLE + ";", + TxMode.NONE + ).execute()).join().getStatus().expectSuccess("drop table failed"); + } catch (RuntimeException ex) { + logger.debug("Drop table skipped: {}", ex.getMessage()); + } + } + + private static void incrementInTransaction(SessionRetryContext retryCtx) { + retryCtx.supplyResult(session -> { + QueryTransaction tx = session.beginTransaction(TxMode.SERIALIZABLE_RW).join().getValue(); + int count = readAmount(tx); + tx.createQuery( + "DECLARE $amount AS Int32; UPDATE " + TABLE + " SET amount = $amount + 1 WHERE id = 1;", + Params.of("$amount", PrimitiveValue.newInt32(count)) + ).execute().join().getStatus().expectSuccess("update failed"); + return tx.commit(); + }).join().getStatus().expectSuccess("transaction failed"); + } + + private static int readAmount(SessionRetryContext retryCtx) { + QueryReader reader = retryCtx.supplyResult(session -> QueryReader.readFrom( + session.createQuery( + "SELECT amount FROM " + TABLE + " WHERE id = 1;", + TxMode.SNAPSHOT_RO + ) + )).join().getValue(); + ResultSetReader rs = reader.getResultSet(0); + if (!rs.next()) { + throw new IllegalStateException("row not found"); + } + return rs.getColumn("amount").getInt32(); + } + + private static int readAmount(QueryTransaction tx) { + QueryReader reader = QueryReader.readFrom( + tx.createQuery("SELECT amount FROM " + TABLE + " WHERE id = 1;") + ).join().getValue(); + ResultSetReader rs = reader.getResultSet(0); + if (!rs.next()) { + throw new IllegalStateException("row not found"); + } + return rs.getColumn("amount").getInt32(); + } + + private static OpenTelemetrySdk createOpenTelemetry(String serviceName, String serviceVersion, String otelEndpoint) { + Resource resource = Resource.getDefault().merge(Resource.builder() + .put(AttributeKey.stringKey("service.name"), serviceName) + .put(AttributeKey.stringKey("service.version"), serviceVersion) + .build()); + + SpanExporter exporter = OtlpGrpcSpanExporter.builder() + .setEndpoint(otelEndpoint) + .build(); + + SdkTracerProvider tracerProvider = SdkTracerProvider.builder() + .setResource(resource) + .addSpanProcessor(BatchSpanProcessor.builder(exporter).build()) + .build(); + + return OpenTelemetrySdk.builder() + .setTracerProvider(tracerProvider) + .setPropagators(ContextPropagators.create(W3CTraceContextPropagator.getInstance())) + .build(); + } + + private static String resolveServiceVersion() { + Package pkg = QueryServiceTraceApp.class.getPackage(); + String version = pkg == null ? null : pkg.getImplementationVersion(); + return version == null || version.isEmpty() ? "unknown" : version; + } +} diff --git a/trace/application/table-client-impl/Dockerfile b/trace/application/table-client-impl/Dockerfile new file mode 100644 index 0000000..5afc64f --- /dev/null +++ b/trace/application/table-client-impl/Dockerfile @@ -0,0 +1,13 @@ +FROM maven:3.9.11-eclipse-temurin-21 AS build +WORKDIR /workspace + +COPY . . +RUN mvn -B -f trace/application/table-client-impl/pom.xml clean package -DskipTests + +FROM eclipse-temurin:21-jre +WORKDIR /app + +COPY --from=build /workspace/trace/application/table-client-impl/target/trace-table-client-impl.jar /app/trace-table-client-impl.jar +COPY --from=build /workspace/trace/application/table-client-impl/target/libs /app/libs + +ENTRYPOINT ["java", "-cp", "/app/trace-table-client-impl.jar:/app/libs/*", "tech.ydb.trace.table.TableClientImplTraceApp"] diff --git a/trace/application/table-client-impl/README.md b/trace/application/table-client-impl/README.md new file mode 100644 index 0000000..86f43f9 --- /dev/null +++ b/trace/application/table-client-impl/README.md @@ -0,0 +1,8 @@ +# TableClientImpl trace demo + +This demo is a standalone Java application with programmatic OpenTelemetry setup. +It configures OTLP exporter in code and adds `GrpcTelemetry` interceptor to the YDB gRPC channel builder. + +- Source style: `basic_example` scenario (implemented locally in `trace/`) +- Main class: `tech.ydb.trace.table.TableClientImplTraceApp` +- Service name in traces: `ydb-table-client-impl-example` diff --git a/trace/application/table-client-impl/pom.xml b/trace/application/table-client-impl/pom.xml new file mode 100644 index 0000000..392c77f --- /dev/null +++ b/trace/application/table-client-impl/pom.xml @@ -0,0 +1,97 @@ + + + 4.0.0 + + + tech.ydb.examples + ydb-sdk-examples + 1.1.0-SNAPSHOT + ../../../pom.xml + + + trace-table-client-impl + Trace TableClientImpl Demo + + + 2.4.1-SNAPSHOT + 1.58.0 + 2.2.20 + + + + + tech.ydb + ydb-sdk-query + + + tech.ydb.auth + yc-auth-provider + + + org.apache.logging.log4j + log4j-slf4j-impl + + + + io.opentelemetry + opentelemetry-api + + + io.opentelemetry + opentelemetry-sdk + + + io.opentelemetry + opentelemetry-exporter-otlp + + + org.jetbrains.kotlin + kotlin-stdlib + ${kotlin.version} + + + + + + + tech.ydb + ydb-sdk-bom + ${ydb.sdk.version} + pom + import + + + io.opentelemetry + opentelemetry-bom + ${opentelemetry.version} + pom + import + + + + + + trace-table-client-impl + + + org.apache.maven.plugins + maven-dependency-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + libs/ + tech.ydb.trace.table.TableClientImplTraceApp + + + + + + + diff --git a/trace/application/table-client-impl/src/main/java/tech/ydb/trace/table/TableClientImplTraceApp.java b/trace/application/table-client-impl/src/main/java/tech/ydb/trace/table/TableClientImplTraceApp.java new file mode 100644 index 0000000..e301ba5 --- /dev/null +++ b/trace/application/table-client-impl/src/main/java/tech/ydb/trace/table/TableClientImplTraceApp.java @@ -0,0 +1,198 @@ +package tech.ydb.trace.table; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.api.trace.propagation.W3CTraceContextPropagator; +import io.opentelemetry.context.Scope; +import io.opentelemetry.context.propagation.ContextPropagators; +import io.opentelemetry.exporter.otlp.trace.OtlpGrpcSpanExporter; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.resources.Resource; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; +import io.opentelemetry.sdk.trace.export.SpanExporter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import tech.ydb.auth.iam.CloudAuthHelper; +import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.core.tracing.OpenTelemetryTracer; +import tech.ydb.query.QueryClient; +import tech.ydb.table.SessionRetryContext; +import tech.ydb.table.TableClient; +import tech.ydb.table.description.TableDescription; +import tech.ydb.table.query.DataQueryResult; +import tech.ydb.table.query.Params; +import tech.ydb.table.result.ResultSetReader; +import tech.ydb.table.transaction.TableTransaction; +import tech.ydb.table.transaction.TxControl; +import tech.ydb.table.values.PrimitiveType; +import tech.ydb.table.values.PrimitiveValue; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +public final class TableClientImplTraceApp { + private static final Logger logger = LoggerFactory.getLogger(TableClientImplTraceApp.class); + private static final String TABLE = "bank_table_service"; + private static final String ACTIVITY_SOURCE = "Ydb.Sdk.Java.TableClientImpl.OpenTelemetry.Sample"; + + private TableClientImplTraceApp() { } + + public static void main(String[] args) { + String connectionString = "grpc://ydb:2136/local"; + String serviceName = "ydb-sdk-sample-table"; + String otelEndpoint = "http://otel-collector:4317"; + String serviceVersion = resolveServiceVersion(); + + OpenTelemetrySdk openTelemetry = createOpenTelemetry(serviceName, serviceVersion, otelEndpoint); + Tracer appTracer = openTelemetry.getTracer(ACTIVITY_SOURCE); + + try (GrpcTransport transport = GrpcTransport.forConnectionString(connectionString) + .withAuthProvider(CloudAuthHelper.getAuthProviderFromEnviron()) + .withTracer(OpenTelemetryTracer.fromOpenTelemetry(openTelemetry)) + .build(); + TableClient tableClient = QueryClient.newTableClient(transport).build()) { + + SessionRetryContext retryCtx = SessionRetryContext.create(tableClient).build(); + runScenario(transport.getDatabase(), retryCtx, appTracer, serviceName); + System.out.println("App finished."); + } finally { + openTelemetry.close(); + } + } + + private static void runScenario(String database, SessionRetryContext retryCtx, Tracer appTracer, String serviceName) { + String tablePath = database + "/" + TABLE; + System.out.println("[" + java.time.OffsetDateTime.now() + "] started, service.name=" + serviceName); + System.out.println("Initializing..."); + + Span startupSpan = appTracer.spanBuilder("app.startup").startSpan(); + try (Scope ignored = startupSpan.makeCurrent()) { + startupSpan.setAttribute("app.message", "hello"); + safeDropTable(tablePath, retryCtx); + createTable(tablePath, retryCtx); + } finally { + startupSpan.end(); + } + + System.out.println("Insert row..."); + insertRow(retryCtx); + + System.out.println("Preparing queries..."); + incrementInTransaction(retryCtx); + + System.out.println("Emulation TLI..."); + List> tasks = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + final int taskNum = i; + tasks.add(CompletableFuture.runAsync(() -> { + Span concurrent = appTracer.spanBuilder("example_tli").startSpan(); + try (Scope ignored = concurrent.makeCurrent()) { + concurrent.setAttribute("app.message", "concurrent task " + taskNum); + incrementInTransaction(retryCtx); + } finally { + concurrent.end(); + } + })); + } + CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).join(); + + System.out.println("Retry connection example..."); + int amount = readAmount(retryCtx); + logger.info("Current amount={}", amount); + } + + private static void createTable(String tablePath, SessionRetryContext retryCtx) { + TableDescription tableDescription = TableDescription.newBuilder() + .addNonnullColumn("id", PrimitiveType.Int32) + .addNullableColumn("amount", PrimitiveType.Int32) + .setPrimaryKey("id") + .build(); + retryCtx.supplyStatus(session -> session.createTable(tablePath, tableDescription)) + .join().expectSuccess("create table failed"); + } + + private static void safeDropTable(String tablePath, SessionRetryContext retryCtx) { + try { + retryCtx.supplyStatus(session -> session.dropTable(tablePath)) + .join().expectSuccess("drop table failed"); + } catch (RuntimeException ex) { + logger.debug("Drop table skipped: {}", ex.getMessage()); + } + } + + private static void insertRow(SessionRetryContext retryCtx) { + TxControl txControl = TxControl.serializableRw().setCommitTx(true); + retryCtx.supplyResult(session -> session.executeDataQuery( + "DECLARE $id AS Int32; DECLARE $amount AS Int32; " + + "INSERT INTO " + TABLE + "(id, amount) VALUES ($id, $amount);", + txControl, + Params.of("$id", PrimitiveValue.newInt32(1), "$amount", PrimitiveValue.newInt32(0)) + )).join().getStatus().expectSuccess("insert failed"); + } + + private static void incrementInTransaction(SessionRetryContext retryCtx) { + retryCtx.supplyStatus(session -> { + TableTransaction tx = session.createNewTransaction(tech.ydb.common.transaction.TxMode.SERIALIZABLE_RW); + int count = readAmount(tx); + tx.executeDataQuery( + "DECLARE $amount AS Int32; UPDATE " + TABLE + " SET amount = $amount + 1 WHERE id = 1", + Params.of("$amount", PrimitiveValue.newInt32(count)) + ).join().getStatus().expectSuccess("update failed"); + return tx.commit(); + }).join().expectSuccess("transaction failed"); + } + + private static int readAmount(SessionRetryContext retryCtx) { + DataQueryResult result = retryCtx.supplyResult(session -> session.executeDataQuery( + "SELECT amount FROM " + TABLE + " WHERE id = 1", + TxControl.snapshotRo().setCommitTx(true) + )).join().getValue(); + + ResultSetReader rs = result.getResultSet(0); + if (!rs.next()) { + throw new IllegalStateException("row not found"); + } + return rs.getColumn("amount").getInt32(); + } + + private static int readAmount(TableTransaction tx) { + DataQueryResult result = tx.executeDataQuery( + "SELECT amount FROM " + TABLE + " WHERE id = 1" + ).join().getValue(); + ResultSetReader rs = result.getResultSet(0); + if (!rs.next()) { + throw new IllegalStateException("row not found"); + } + return rs.getColumn("amount").getInt32(); + } + + private static OpenTelemetrySdk createOpenTelemetry(String serviceName, String serviceVersion, String otelEndpoint) { + Resource resource = Resource.getDefault().merge(Resource.builder() + .put(AttributeKey.stringKey("service.name"), serviceName) + .put(AttributeKey.stringKey("service.version"), serviceVersion) + .build()); + + SpanExporter exporter = OtlpGrpcSpanExporter.builder() + .setEndpoint(otelEndpoint) + .build(); + + SdkTracerProvider tracerProvider = SdkTracerProvider.builder() + .setResource(resource) + .addSpanProcessor(BatchSpanProcessor.builder(exporter).build()) + .build(); + + return OpenTelemetrySdk.builder() + .setTracerProvider(tracerProvider) + .setPropagators(ContextPropagators.create(W3CTraceContextPropagator.getInstance())) + .build(); + } + + private static String resolveServiceVersion() { + Package pkg = TableClientImplTraceApp.class.getPackage(); + String version = pkg == null ? null : pkg.getImplementationVersion(); + return version == null || version.isEmpty() ? "unknown" : version; + } +} diff --git a/trace/compose-e2e.yaml b/trace/compose-e2e.yaml new file mode 100644 index 0000000..867e6d7 --- /dev/null +++ b/trace/compose-e2e.yaml @@ -0,0 +1,137 @@ +services: + ydb: + image: ydbplatform/local-ydb:trunk + platform: linux/amd64 + command: [ "--config-path", "/ydb_config/ydb-config.yaml" ] + environment: + YDB_DEFAULT_LOG_LEVEL: NOTICE + GRPC_TLS_PORT: "2135" + GRPC_PORT: "2136" + MON_PORT: "8765" + YDB_USE_IN_MEMORY_PDISKS: "true" + ports: + - "2135:2135" + - "2136:2136" + - "8765:8765" + volumes: + - ./ydb:/ydb_config:ro + healthcheck: + test: [ "CMD-SHELL", "bash -lc 'true >/dev/tcp/127.0.0.1/2136'" ] + interval: 5s + timeout: 5s + retries: 30 + start_period: 10s + + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + command: [ "--config=/etc/otelcol/config.yaml" ] + depends_on: [ tempo ] + volumes: + - ./otel/otel-collector-config.yaml:/etc/otelcol/config.yaml:ro + ports: + - "4317:4317" + - "4318:4318" + - "9464:9464" + - "13133:13133" + - "55679:55679" + + prometheus: + image: prom/prometheus:latest + volumes: + - ./prometheus/prometheus.yaml:/etc/prometheus/prometheus.yml:ro + ports: + - "9090:9090" + depends_on: [ otel-collector ] + + tempo: + image: grafana/tempo:2.4.1 + command: [ "-config.file=/etc/tempo.yaml" ] + volumes: + - ./tempo/tempo.yaml:/etc/tempo.yaml:ro + ports: + - "3200:3200" + + grafana: + image: grafana/grafana:10.4.2 + environment: + GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro + ports: + - "3000:3000" + depends_on: [ prometheus, tempo ] + + query-service-trace: + image: eclipse-temurin:21-jre + working_dir: /app + depends_on: + ydb: + condition: service_healthy + otel-collector: + condition: service_started + volumes: + - ./application/query-service/target/trace-query-service.jar:/app/trace-query-service.jar:ro + - ./application/query-service/target/libs:/app/libs:ro + command: [ "java", "-cp", "/app/trace-query-service.jar:/app/libs/*", "tech.ydb.trace.query.QueryServiceTraceApp" ] + environment: + YDB_ANONYMOUS_CREDENTIALS: "1" + + table-client-impl-trace: + image: eclipse-temurin:21-jre + working_dir: /app + depends_on: + ydb: + condition: service_healthy + otel-collector: + condition: service_started + volumes: + - ./application/table-client-impl/target/trace-table-client-impl.jar:/app/trace-table-client-impl.jar:ro + - ./application/table-client-impl/target/libs:/app/libs:ro + command: [ "java", "-cp", "/app/trace-table-client-impl.jar:/app/libs/*", "tech.ydb.trace.table.TableClientImplTraceApp" ] + environment: + YDB_ANONYMOUS_CREDENTIALS: "1" + +# jdbc-trace: +# image: eclipse-temurin:21-jre +# working_dir: /app +# depends_on: +# ydb: +# condition: service_healthy +# otel-collector: +# condition: service_started +# volumes: +# - ./application/jdbc-trace/target/trace-jdbc.jar:/app/trace-jdbc.jar:ro +# - ./application/jdbc-trace/target/libs:/app/libs:ro +# command: [ "java", "-cp", "/app/trace-jdbc.jar:/app/libs/*", "tech.ydb.trace.jdbc.JdbcTraceApp" ] +# environment: +# YDB_ANONYMOUS_CREDENTIALS: "1" + +# spring-data-jpa-v6-trace: +# image: eclipse-temurin:21-jre +# working_dir: /app +# depends_on: +# ydb: +# condition: service_healthy +# otel-collector: +# condition: service_started +# volumes: +# - ./application/spring-data-jpa-v6-trace/target/trace-spring-data-jpa-v6.jar:/app/app.jar:ro +# command: [ "java", "-jar", "/app/app.jar" ] +# environment: +# YDB_ANONYMOUS_CREDENTIALS: "1" +# +# spring-data-jdbc-trace: +# image: eclipse-temurin:21-jre +# working_dir: /app +# depends_on: +# ydb: +# condition: service_healthy +# otel-collector: +# condition: service_started +# volumes: +# - ./application/spring-data-jdbc-trace/target/trace-spring-data-jdbc.jar:/app/app.jar:ro +# command: [ "java", "-jar", "/app/app.jar" ] +# environment: +# YDB_ANONYMOUS_CREDENTIALS: "1" diff --git a/trace/grafana/dashboards/README.md b/trace/grafana/dashboards/README.md new file mode 100644 index 0000000..9941fc8 --- /dev/null +++ b/trace/grafana/dashboards/README.md @@ -0,0 +1,3 @@ +This folder is intentionally left empty. + +Grafana is provisioned with Tempo + Prometheus datasources; use **Explore** to search traces. diff --git a/trace/grafana/provisioning/dashboards/dashboards.yaml b/trace/grafana/provisioning/dashboards/dashboards.yaml new file mode 100644 index 0000000..48b8582 --- /dev/null +++ b/trace/grafana/provisioning/dashboards/dashboards.yaml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: true + editable: false + options: + path: /var/lib/grafana/dashboards diff --git a/trace/grafana/provisioning/datasources/datasources.yaml b/trace/grafana/provisioning/datasources/datasources.yaml new file mode 100644 index 0000000..1b0d125 --- /dev/null +++ b/trace/grafana/provisioning/datasources/datasources.yaml @@ -0,0 +1,20 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + editable: false + jsonData: + tracesToMetrics: + datasourceUid: Prometheus + serviceMap: + datasourceUid: Prometheus diff --git a/trace/otel/otel-collector-config.yaml b/trace/otel/otel-collector-config.yaml new file mode 100644 index 0000000..7f78444 --- /dev/null +++ b/trace/otel/otel-collector-config.yaml @@ -0,0 +1,44 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: { } + +exporters: + prometheus: + endpoint: 0.0.0.0:9464 + resource_to_telemetry_conversion: + enabled: true + + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true + + debug: + verbosity: detailed + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + + zpages: + endpoint: 0.0.0.0:55679 + +service: + extensions: [ health_check, zpages ] + pipelines: + metrics: + receivers: [ otlp ] + processors: [ batch ] + exporters: [ prometheus ] + + traces: + receivers: [ otlp ] + processors: [ batch ] + exporters: [ otlp/tempo, debug ] diff --git a/trace/prometheus/prometheus.yaml b/trace/prometheus/prometheus.yaml new file mode 100644 index 0000000..64b3182 --- /dev/null +++ b/trace/prometheus/prometheus.yaml @@ -0,0 +1,7 @@ +global: + scrape_interval: 5s + +scrape_configs: + - job_name: otel-collector + static_configs: + - targets: ["otel-collector:9464"] diff --git a/trace/tempo/tempo.yaml b/trace/tempo/tempo.yaml new file mode 100644 index 0000000..43dbb19 --- /dev/null +++ b/trace/tempo/tempo.yaml @@ -0,0 +1,15 @@ +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +storage: + trace: + backend: local + local: + path: /tmp/tempo diff --git a/trace/ydb/README.md b/trace/ydb/README.md new file mode 100644 index 0000000..f065126 --- /dev/null +++ b/trace/ydb/README.md @@ -0,0 +1,28 @@ +# YDB server-side tracing (OpenTelemetry) + +This folder is used to keep a **custom YDB config** that enables server-side OpenTelemetry tracing. + +## 1) Export the default config from a running container + +If YDB is running as `ydb-local`: + +```bash +docker cp ydb-local:/ydb_data/cluster/kikimr_configs/config.yaml ./trace/ydb/ydb-config.yaml +``` + +## 2) Enable OpenTelemetry exporter in the config + +Edit `ydb-config.yaml` and add the contents of `otel-tracing-snippet.yaml` (usually as a top-level section). + +Default OTLP endpoint (inside docker-compose network): `grpc://otel-collector:4317` +Default service name (so you can find it in Tempo/Grafana): `ydb` + +## 3) Run with the overridden config + +Restart YDB: + +```bash +docker compose -f trace/compose-e2e.yaml up -d --force-recreate ydb +``` + +Now you should see additional server-side traces in Tempo/Grafana. diff --git a/trace/ydb/otel-tracing-snippet.yaml b/trace/ydb/otel-tracing-snippet.yaml new file mode 100644 index 0000000..bd5978d --- /dev/null +++ b/trace/ydb/otel-tracing-snippet.yaml @@ -0,0 +1,26 @@ +tracing_config: + backend: + opentelemetry: + collector_url: grpc://otel-collector:4317 + service_name: ydb + external_throttling: + - scope: + database: /local + max_traces_per_minute: 60 + max_traces_burst: 3 + # Highest tracing detail for *sampled* traces (YDB-generated trace-id). + # Note: requests with an external `traceparent` are traced at level 13 (Detailed) per YDB docs. + sampling: + - scope: + database: /local + fraction: 1 + level: 15 + max_traces_per_minute: 1000 + max_traces_burst: 100 + uploader: + max_exported_spans_per_second: 30 + max_spans_in_batch: 100 + max_bytes_in_batch: 10485760 # 10 MiB + max_export_requests_inflight: 3 + max_batch_accumulation_milliseconds: 5000 + span_export_timeout_seconds: 120 diff --git a/trace/ydb/ydb-config.yaml b/trace/ydb/ydb-config.yaml new file mode 100644 index 0000000..a6ae5ab --- /dev/null +++ b/trace/ydb/ydb-config.yaml @@ -0,0 +1,354 @@ +actor_system_config: + batch_executor: 2 + executor: + - name: System + spin_threshold: 0 + threads: 2 + type: BASIC + - name: User + spin_threshold: 0 + threads: 3 + type: BASIC + - name: Batch + spin_threshold: 0 + threads: 2 + type: BASIC + - name: IO + threads: 1 + time_per_mailbox_micro_secs: 100 + type: IO + - name: IC + spin_threshold: 10 + threads: 1 + time_per_mailbox_micro_secs: 100 + type: BASIC + io_executor: 3 + scheduler: + progress_threshold: 10000 + resolution: 1024 + spin_threshold: 0 + service_executor: + - executor_id: 4 + service_name: Interconnect + sys_executor: 0 + user_executor: 1 +blob_storage_config: + service_set: + availability_domains: 1 + groups: + - erasure_species: 0 + group_generation: 1 + group_id: 0 + rings: + - fail_domains: + - vdisk_locations: + - node_id: 1 + pdisk_guid: 1 + pdisk_id: 1 + vdisk_slot_id: 0 + pdisks: + - node_id: 1 + path: SectorMap:1:64 + pdisk_category: 0 + pdisk_guid: 1 + pdisk_id: 1 + vdisks: + - vdisk_id: + domain: 0 + group_generation: 1 + group_id: 0 + ring: 0 + vdisk: 0 + vdisk_location: + node_id: 1 + pdisk_guid: 1 + pdisk_id: 1 + vdisk_slot_id: 0 +channel_profile_config: + profile: + - channel: + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + profile_id: 0 + - channel: + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + - erasure_species: none + pdisk_category: 0 + storage_pool_kind: hdd + profile_id: 1 +domains_config: + domain: + - domain_id: 1 + name: local + storage_pool_types: + - kind: hdd + pool_config: + box_id: 1 + erasure_species: none + kind: hdd + pdisk_filter: + - property: + - type: ROT + vdisk_kind: Default + - kind: hdd1 + pool_config: + box_id: 1 + erasure_species: none + kind: hdd + pdisk_filter: + - property: + - type: ROT + vdisk_kind: Default + - kind: hdd2 + pool_config: + box_id: 1 + erasure_species: none + kind: hdd + pdisk_filter: + - property: + - type: ROT + vdisk_kind: Default + - kind: hdde + pool_config: + box_id: 1 + encryption_mode: 1 + erasure_species: none + kind: hdd + pdisk_filter: + - property: + - type: ROT + vdisk_kind: Default + security_config: + default_users: + - name: root + password: '1234' + state_storage: + - ring: + nto_select: 1 + ring: + - node: + - 1 + use_ring_specific_node_selection: true + ssid: 1 +feature_flags: + enable_drain_on_shutdown: false + enable_mvcc_snapshot_reads: true + enable_persistent_query_stats: true + enable_public_api_external_blobs: false + enable_scheme_transactions_at_scheme_shard: true +federated_query_config: + audit: + enabled: false + uaconfig: + uri: '' + checkpoint_coordinator: + checkpointing_period_millis: 1000 + enabled: true + max_inflight: 1 + storage: + endpoint: '' + common: + ids_prefix: pt + use_bearer_for_ydb: true + control_plane_proxy: + enabled: true + request_timeout: 30s + control_plane_storage: + available_binding: + - DATA_STREAMS + - OBJECT_STORAGE + available_connection: + - YDB_DATABASE + - CLICKHOUSE_CLUSTER + - DATA_STREAMS + - OBJECT_STORAGE + - MONITORING + enabled: true + storage: + endpoint: '' + db_pool: + enabled: true + storage: + endpoint: '' + enabled: false + gateways: + dq: + default_settings: [] + enabled: true + pq: + cluster_mapping: [] + solomon: + cluster_mapping: [] + nodes_manager: + enabled: true + pending_fetcher: + enabled: true + pinger: + ping_period: 30s + private_api: + enabled: true + private_proxy: + enabled: true + resource_manager: + enabled: true + token_accessor: + enabled: true +grpc_config: + ca: /ydb_certs/ca.pem + cert: /ydb_certs/cert.pem + host: '[::]' + key: /ydb_certs/key.pem + services: + - legacy + - tablet_service + - yql + - discovery + - cms + - locking + - kesus + - pq + - pqcd + - pqv1 + - topic + - datastreams + - scripting + - clickhouse_internal + - rate_limiter + - analytics + - export + - import + - yq + - keyvalue + - monitoring + - auth + - query_service + - view +interconnect_config: + start_tcp: true +kafka_proxy_config: + enable_kafka_proxy: true + listening_port: 9092 +kqpconfig: + settings: + - name: _ResultRowsLimit + value: '1000' + - name: _KqpYqlSyntaxVersion + value: '1' + - name: _KqpAllowNewEngine + value: 'true' +log_config: + default_level: 5 + entry: [] + sys_log: false +nameservice_config: + node: + - address: ::1 + host: localhost + node_id: 1 + port: 19001 + walle_location: + body: 1 + data_center: '1' + rack: '1' +net_classifier_config: + cms_config_timeout_seconds: 30 + net_data_file_path: /ydb_data/netData.tsv + updater_config: + net_data_update_interval_seconds: 60 + retry_interval_seconds: 30 +pqcluster_discovery_config: + enabled: false +pqconfig: + check_acl: false + cluster_table_path: '' + clusters_update_timeout_sec: 1 + enable_proto_source_id_info: true + enabled: true + max_storage_node_port: 65535 + meta_cache_timeout_sec: 1 + quoting_config: + enable_quoting: false + require_credentials_in_new_protocol: false + root: '' + topics_are_first_class_citizen: true + version_table_path: '' +sqs_config: + enable_dead_letter_queues: true + enable_sqs: false + force_queue_creation_v2: true + force_queue_deletion_v2: true + scheme_cache_hard_refresh_time_seconds: 0 + scheme_cache_soft_refresh_time_seconds: 0 +static_erasure: none +system_tablets: + default_node: + - 1 + flat_schemeshard: + - info: + tablet_id: 72057594046678944 + flat_tx_coordinator: + - node: + - 1 + tx_allocator: + - node: + - 1 + tx_mediator: + - node: + - 1 +table_service_config: + filter_pushdown_over_join_optional_side: false + resource_manager: + channel_buffer_size: 262144 + mkql_heavy_program_memory_limit: 1048576 + mkql_light_program_memory_limit: 65536 + verbose_memory_limit_exception: true +tracing_config: + backend: + opentelemetry: + collector_url: grpc://otel-collector:4317 + service_name: ydb + external_throttling: + - scope: + database: /local + max_traces_per_minute: 60 + max_traces_burst: 3 + # Highest tracing detail for *sampled* traces (YDB-generated trace-id). + # Note: requests with an external `traceparent` are traced at level 13 (Detailed) per YDB docs. + sampling: + - scope: + database: /local + fraction: 1 + level: 15 + max_traces_per_minute: 1000 + max_traces_burst: 100 + uploader: + max_exported_spans_per_second: 30 + max_spans_in_batch: 100 + max_bytes_in_batch: 10485760 # 10 MiB + max_export_requests_inflight: 3 + max_batch_accumulation_milliseconds: 5000 + span_export_timeout_seconds: 120