From 79baed31c3c8360fdf39d9fac71248c2f648d2cb Mon Sep 17 00:00:00 2001 From: Katrina Liu Date: Wed, 30 Jul 2025 18:01:09 -0700 Subject: [PATCH 1/7] tracing.py --- pyproject.toml | 2 + src/semgrep_mcp/server.py | 8 ++-- src/utilities/tracing.py | 91 +++++++++++++++++++++++++++++++++++++++ uv.lock | 4 ++ 4 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 src/utilities/tracing.py diff --git a/pyproject.toml b/pyproject.toml index 4c2a171..39fb5c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ classifiers = [ dependencies = [ "mcp>=1.12.0", "semgrep==1.131.0", + "opentelemetry-api>=1.25.0", + "opentelemetry-sdk>=1.25.0", ] [project.license] diff --git a/src/semgrep_mcp/server.py b/src/semgrep_mcp/server.py index a8878ad..9a79cb8 100755 --- a/src/semgrep_mcp/server.py +++ b/src/semgrep_mcp/server.py @@ -31,6 +31,7 @@ set_semgrep_executable, ) from semgrep_mcp.semgrep_interfaces.semgrep_output_v1 import CliOutput +from utilities.tracing import initialize_tracing # --------------------------------------------------------------------------------- # Constants @@ -289,9 +290,10 @@ def remove_temp_dir_from_results(results: SemgrepScanResult, temp_dir: str) -> N @asynccontextmanager async def server_lifespan(_server: FastMCP) -> AsyncIterator[SemgrepContext | None]: """Manage server startup and shutdown lifecycle.""" - # Initialize resources on startup - # MCP requires Pro Engine - context = await run_semgrep_daemon() + # Initialize resources on startup with tracing + # MCP requires Pro Engine + with initialize_tracing("mcp-python-server") as span: + context = await run_semgrep_daemon() try: yield context diff --git a/src/utilities/tracing.py b/src/utilities/tracing.py new file mode 100644 index 0000000..a6b07c8 --- /dev/null +++ b/src/utilities/tracing.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +import os +from contextlib import contextmanager +from typing import Any, Dict, Generator, Optional + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.resources import SERVICE_NAME, DEPLOYMENT_ENVIRONMENT, Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +# coupling: these need to be kept in sync with semgrep-proprietary/tracing.py +DEFAULT_TRACE_ENDPOINT = "https://telemetry.semgrep.dev/v1/traces" +DEFAULT_DEV_ENDPOINT = "https://telemetry.dev2.semgrep.dev/v1/traces" +DEFAULT_LOCAL_ENDPOINT = "http://localhost:4318/v1/traces" + +MCP_SERVICE_NAME = "mcp" + +top_level_span : trace.Span | None = None + + +def get_trace_endpoint() -> str: + """Get the appropriate trace endpoint based on environment.""" + env = os.environ.get("ENVIRONMENT", "dev").lower() + + if env == "prod": + return DEFAULT_TRACE_ENDPOINT + elif env == "local": + return DEFAULT_LOCAL_ENDPOINT + else: + return DEFAULT_DEV_ENDPOINT + +@contextmanager +def initialize_tracing(name: str) -> None: + """Initialize OpenTelemetry tracing with basic configuration.""" + + # Create resource with basic attributes + resource = Resource.create({ + SERVICE_NAME: MCP_SERVICE_NAME, + DEPLOYMENT_ENVIRONMENT: os.environ.get("ENVIRONMENT", "dev"), + }) + + # Create tracer provider + provider = TracerProvider(resource=resource) + + # Create OTLP exporter + endpoint = get_trace_endpoint() + exporter = OTLPSpanExporter(endpoint=endpoint) + + # Create span processor + processor = BatchSpanProcessor(exporter) + provider.add_span_processor(processor) + + # Set the global tracer provider + trace.set_tracer_provider(provider) + + # Get tracer instance + tracer = trace.get_tracer(MCP_SERVICE_NAME) + + with tracer.start_as_current_span(name) as span: + top_level_span = span + print("Tracing initialized") + print(f"Tracing initialized with span ID: {top_level_span.get_span_context().span_id} and trace ID: {top_level_span.get_span_context().trace_id}") + yield span + + + +@contextmanager +def trace_span( + name: str, +) -> Generator[trace.Span, None, None]: + """ + Context manager for creating and managing OpenTelemetry spans. + + Args: + name: The name of the span + attributes: Optional attributes to set on the span + + Yields: + The created span instance + """ + tracer = trace.get_tracer(MCP_SERVICE_NAME) + + with tracer.start_as_current_span(name) as span: + + try: + yield span + except Exception as e: + print(f"Error in span {name}: {e}") + raise \ No newline at end of file diff --git a/uv.lock b/uv.lock index 2f31bc5..59d5d3d 100644 --- a/uv.lock +++ b/uv.lock @@ -1157,6 +1157,8 @@ version = "0.4.1" source = { editable = "." } dependencies = [ { name = "mcp" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-sdk" }, { name = "semgrep" }, ] @@ -1176,6 +1178,8 @@ dev = [ requires-dist = [ { name = "mcp", specifier = ">=1.12.0" }, { name = "semgrep", specifier = "==1.131.0" }, + { name = "opentelemetry-api", specifier = ">=1.25.0" }, + { name = "opentelemetry-sdk", specifier = ">=1.25.0" }, ] [package.metadata.requires-dev] From b492ad6806a0437ffec247f513bd37f7619e2ade Mon Sep 17 00:00:00 2001 From: Katrina Liu Date: Wed, 30 Jul 2025 18:09:20 -0700 Subject: [PATCH 2/7] clean up --- src/utilities/tracing.py | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/src/utilities/tracing.py b/src/utilities/tracing.py index a6b07c8..7039222 100644 --- a/src/utilities/tracing.py +++ b/src/utilities/tracing.py @@ -2,7 +2,7 @@ import os from contextlib import contextmanager -from typing import Any, Dict, Generator, Optional +from typing import Generator from opentelemetry import trace from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter @@ -20,32 +20,33 @@ top_level_span : trace.Span | None = None -def get_trace_endpoint() -> str: +def get_trace_endpoint() -> (str, str): """Get the appropriate trace endpoint based on environment.""" env = os.environ.get("ENVIRONMENT", "dev").lower() if env == "prod": - return DEFAULT_TRACE_ENDPOINT + return (DEFAULT_TRACE_ENDPOINT, "prod") elif env == "local": - return DEFAULT_LOCAL_ENDPOINT + return (DEFAULT_LOCAL_ENDPOINT, "local") else: - return DEFAULT_DEV_ENDPOINT + return (DEFAULT_DEV_ENDPOINT, "dev") @contextmanager -def initialize_tracing(name: str) -> None: - """Initialize OpenTelemetry tracing with basic configuration.""" +def initialize_tracing(name: str) -> Generator[trace.Span, None, None]: + """Initialize OpenTelemetry tracing.""" + + (endpoint, env) = get_trace_endpoint() # Create resource with basic attributes resource = Resource.create({ SERVICE_NAME: MCP_SERVICE_NAME, - DEPLOYMENT_ENVIRONMENT: os.environ.get("ENVIRONMENT", "dev"), + DEPLOYMENT_ENVIRONMENT: env, }) # Create tracer provider provider = TracerProvider(resource=resource) # Create OTLP exporter - endpoint = get_trace_endpoint() exporter = OTLPSpanExporter(endpoint=endpoint) # Create span processor @@ -60,8 +61,11 @@ def initialize_tracing(name: str) -> None: with tracer.start_as_current_span(name) as span: top_level_span = span + # TODO: fix different trace id from datadog + # TODO: use logging print("Tracing initialized") print(f"Tracing initialized with span ID: {top_level_span.get_span_context().span_id} and trace ID: {top_level_span.get_span_context().trace_id}") + yield span @@ -70,22 +74,7 @@ def initialize_tracing(name: str) -> None: def trace_span( name: str, ) -> Generator[trace.Span, None, None]: - """ - Context manager for creating and managing OpenTelemetry spans. - - Args: - name: The name of the span - attributes: Optional attributes to set on the span - - Yields: - The created span instance - """ tracer = trace.get_tracer(MCP_SERVICE_NAME) with tracer.start_as_current_span(name) as span: - - try: - yield span - except Exception as e: - print(f"Error in span {name}: {e}") - raise \ No newline at end of file + yield span \ No newline at end of file From 9627b082efdd4bdc60d7c837f27df2cce458f093 Mon Sep 17 00:00:00 2001 From: Katrina Liu Date: Thu, 31 Jul 2025 13:37:52 -0700 Subject: [PATCH 3/7] trace id --- src/utilities/tracing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/utilities/tracing.py b/src/utilities/tracing.py index 7039222..7a838c0 100644 --- a/src/utilities/tracing.py +++ b/src/utilities/tracing.py @@ -42,7 +42,6 @@ def initialize_tracing(name: str) -> Generator[trace.Span, None, None]: SERVICE_NAME: MCP_SERVICE_NAME, DEPLOYMENT_ENVIRONMENT: env, }) - # Create tracer provider provider = TracerProvider(resource=resource) @@ -61,15 +60,14 @@ def initialize_tracing(name: str) -> Generator[trace.Span, None, None]: with tracer.start_as_current_span(name) as span: top_level_span = span - # TODO: fix different trace id from datadog + trace_id = trace.format_trace_id(top_level_span.get_span_context().trace_id) # TODO: use logging print("Tracing initialized") - print(f"Tracing initialized with span ID: {top_level_span.get_span_context().span_id} and trace ID: {top_level_span.get_span_context().trace_id}") + print(f"Tracing initialized with trace ID: {trace_id}") yield span - @contextmanager def trace_span( name: str, From e4876063708d3a6775b6301be747f923120e8217 Mon Sep 17 00:00:00 2001 From: Katrina Liu Date: Thu, 31 Jul 2025 16:19:48 -0700 Subject: [PATCH 4/7] link parent span --- src/semgrep_mcp/semgrep.py | 5 +++- src/semgrep_mcp/server.py | 7 +++--- src/utilities/tracing.py | 51 +++++++++++++++++++------------------- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/src/semgrep_mcp/semgrep.py b/src/semgrep_mcp/semgrep.py index 79bddc7..cb0f966 100644 --- a/src/semgrep_mcp/semgrep.py +++ b/src/semgrep_mcp/semgrep.py @@ -7,6 +7,7 @@ from mcp.shared.exceptions import McpError from mcp.types import INTERNAL_ERROR, ErrorData +from opentelemetry import trace from semgrep_mcp.models import CodeFile from semgrep_mcp.semgrep_interfaces.semgrep_output_v1 import CliOutput @@ -140,9 +141,11 @@ class SemgrepContext: process: asyncio.subprocess.Process stdin: asyncio.StreamWriter stdout: asyncio.StreamReader + top_level_span: trace.Span - def __init__(self, process: asyncio.subprocess.Process) -> None: + def __init__(self, process: asyncio.subprocess.Process, top_level_span: trace.Span) -> None: self.process = process + self.top_level_span = top_level_span if process.stdin is not None and process.stdout is not None: self.stdin = process.stdin diff --git a/src/semgrep_mcp/server.py b/src/semgrep_mcp/server.py index 9a79cb8..b3b63ce 100755 --- a/src/semgrep_mcp/server.py +++ b/src/semgrep_mcp/server.py @@ -31,7 +31,7 @@ set_semgrep_executable, ) from semgrep_mcp.semgrep_interfaces.semgrep_output_v1 import CliOutput -from utilities.tracing import initialize_tracing +from utilities.tracing import start_tracing, with_span # --------------------------------------------------------------------------------- # Constants @@ -292,7 +292,7 @@ async def server_lifespan(_server: FastMCP) -> AsyncIterator[SemgrepContext | No """Manage server startup and shutdown lifecycle.""" # Initialize resources on startup with tracing # MCP requires Pro Engine - with initialize_tracing("mcp-python-server") as span: + with start_tracing("mcp-python-server") as span: context = await run_semgrep_daemon() try: @@ -690,7 +690,8 @@ async def semgrep_scan_rpc( temp_dir = None try: # TODO: perhaps should return more interpretable results? - cli_output = await run_semgrep_via_rpc(context, code_files) + with with_span(context.top_level_span, "semgrep_scan_rpc"): + cli_output = await run_semgrep_via_rpc(context, code_files) return cli_output except McpError as e: raise e diff --git a/src/utilities/tracing.py b/src/utilities/tracing.py index 7a838c0..91216e0 100644 --- a/src/utilities/tracing.py +++ b/src/utilities/tracing.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 import os +from collections.abc import Generator from contextlib import contextmanager -from typing import Generator from opentelemetry import trace from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.sdk.resources import SERVICE_NAME, DEPLOYMENT_ENVIRONMENT, Resource +from opentelemetry.sdk.resources import DEPLOYMENT_ENVIRONMENT, SERVICE_NAME, Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor @@ -17,13 +17,11 @@ MCP_SERVICE_NAME = "mcp" -top_level_span : trace.Span | None = None - -def get_trace_endpoint() -> (str, str): +def get_trace_endpoint() -> tuple[str, str]: """Get the appropriate trace endpoint based on environment.""" env = os.environ.get("ENVIRONMENT", "dev").lower() - + if env == "prod": return (DEFAULT_TRACE_ENDPOINT, "prod") elif env == "local": @@ -31,48 +29,51 @@ def get_trace_endpoint() -> (str, str): else: return (DEFAULT_DEV_ENDPOINT, "dev") + @contextmanager -def initialize_tracing(name: str) -> Generator[trace.Span, None, None]: +def start_tracing(name: str) -> Generator[trace.Span, None, None]: """Initialize OpenTelemetry tracing.""" - (endpoint, env) = get_trace_endpoint() - + # Create resource with basic attributes - resource = Resource.create({ - SERVICE_NAME: MCP_SERVICE_NAME, - DEPLOYMENT_ENVIRONMENT: env, - }) + resource = Resource.create( + { + SERVICE_NAME: MCP_SERVICE_NAME, + DEPLOYMENT_ENVIRONMENT: env, + } + ) # Create tracer provider provider = TracerProvider(resource=resource) - + # Create OTLP exporter exporter = OTLPSpanExporter(endpoint=endpoint) - + # Create span processor processor = BatchSpanProcessor(exporter) provider.add_span_processor(processor) - + # Set the global tracer provider trace.set_tracer_provider(provider) - + # Get tracer instance tracer = trace.get_tracer(MCP_SERVICE_NAME) with tracer.start_as_current_span(name) as span: - top_level_span = span - trace_id = trace.format_trace_id(top_level_span.get_span_context().trace_id) + trace_id = trace.format_trace_id(span.get_span_context().trace_id) # TODO: use logging print("Tracing initialized") print(f"Tracing initialized with trace ID: {trace_id}") - + yield span @contextmanager -def trace_span( - name: str, +def with_span( + parent_span: trace.Span, + name: str, ) -> Generator[trace.Span, None, None]: tracer = trace.get_tracer(MCP_SERVICE_NAME) - - with tracer.start_as_current_span(name) as span: - yield span \ No newline at end of file + + context = trace.set_span_in_context(parent_span) + with tracer.start_span(name, context=context) as span: + yield span From 6bea137e764a407be8d20be0dfc5e340225edd55 Mon Sep 17 00:00:00 2001 From: Katrina Liu Date: Tue, 12 Aug 2025 12:33:19 -0700 Subject: [PATCH 5/7] rebase --- src/semgrep_mcp/semgrep.py | 4 ++-- src/semgrep_mcp/server.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/semgrep_mcp/semgrep.py b/src/semgrep_mcp/semgrep.py index cb0f966..0f89390 100644 --- a/src/semgrep_mcp/semgrep.py +++ b/src/semgrep_mcp/semgrep.py @@ -206,7 +206,7 @@ async def run_semgrep(args: list[str]) -> asyncio.subprocess.Process: return process -async def run_semgrep_daemon() -> SemgrepContext | None: +async def run_semgrep_daemon(top_level_span: trace.Span) -> SemgrepContext | None: """ Runs the semgrep daemon (`semgrep mcp`) if the user has the Pro Engine installed. @@ -225,7 +225,7 @@ async def run_semgrep_daemon() -> SemgrepContext | None: return None else: process = await run_semgrep(["mcp", "--pro"]) - return SemgrepContext(process=process) + return SemgrepContext(process=process, top_level_span=top_level_span) async def run_semgrep_output(args: list[str]) -> str: diff --git a/src/semgrep_mcp/server.py b/src/semgrep_mcp/server.py index b3b63ce..6a3a2bb 100755 --- a/src/semgrep_mcp/server.py +++ b/src/semgrep_mcp/server.py @@ -291,9 +291,9 @@ def remove_temp_dir_from_results(results: SemgrepScanResult, temp_dir: str) -> N async def server_lifespan(_server: FastMCP) -> AsyncIterator[SemgrepContext | None]: """Manage server startup and shutdown lifecycle.""" # Initialize resources on startup with tracing - # MCP requires Pro Engine + # MCP requires Pro Engine with start_tracing("mcp-python-server") as span: - context = await run_semgrep_daemon() + context = await run_semgrep_daemon(top_level_span=span) try: yield context From 01076ea2669b801f6f25cc1ea0561162bdf2bb91 Mon Sep 17 00:00:00 2001 From: Katrina Liu Date: Tue, 12 Aug 2025 13:23:44 -0700 Subject: [PATCH 6/7] wait for process to finish --- src/semgrep_mcp/semgrep.py | 3 +++ src/utilities/tracing.py | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/semgrep_mcp/semgrep.py b/src/semgrep_mcp/semgrep.py index 0f89390..f762947 100644 --- a/src/semgrep_mcp/semgrep.py +++ b/src/semgrep_mcp/semgrep.py @@ -214,6 +214,9 @@ async def run_semgrep_daemon(top_level_span: trace.Span) -> SemgrepContext | Non """ resp = await run_semgrep(["--pro", "--version"]) + # wait for the command to exit so the exit code is set + await resp.communicate() + # The user doesn't seem to have the Pro Engine installed. # That's fine, let's just run the free engine, without the # `semgrep mcp` backend. diff --git a/src/utilities/tracing.py b/src/utilities/tracing.py index 91216e0..6a38c47 100644 --- a/src/utilities/tracing.py +++ b/src/utilities/tracing.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import logging import os from collections.abc import Generator from contextlib import contextmanager @@ -60,9 +61,9 @@ def start_tracing(name: str) -> Generator[trace.Span, None, None]: with tracer.start_as_current_span(name) as span: trace_id = trace.format_trace_id(span.get_span_context().trace_id) - # TODO: use logging - print("Tracing initialized") - print(f"Tracing initialized with trace ID: {trace_id}") + + logging.info("Tracing initialized") + logging.info(f"Tracing initialized with trace ID: {trace_id}") yield span From de12f0efa3f52214b5ce73601a6d6fe4296f7432 Mon Sep 17 00:00:00 2001 From: Katrina Liu Date: Tue, 12 Aug 2025 13:49:35 -0700 Subject: [PATCH 7/7] fix top-level span not spanning the whole time --- src/semgrep_mcp/server.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/semgrep_mcp/server.py b/src/semgrep_mcp/server.py index 6a3a2bb..4050f4c 100755 --- a/src/semgrep_mcp/server.py +++ b/src/semgrep_mcp/server.py @@ -295,11 +295,11 @@ async def server_lifespan(_server: FastMCP) -> AsyncIterator[SemgrepContext | No with start_tracing("mcp-python-server") as span: context = await run_semgrep_daemon(top_level_span=span) - try: - yield context - finally: - if context is not None: - context.shutdown() + try: + yield context + finally: + if context is not None: + context.shutdown() # Create a fast MCP server