diff --git a/pyproject.toml b/pyproject.toml index 4c2a171..39fb5c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ classifiers = [ dependencies = [ "mcp>=1.12.0", "semgrep==1.131.0", + "opentelemetry-api>=1.25.0", + "opentelemetry-sdk>=1.25.0", ] [project.license] diff --git a/src/semgrep_mcp/semgrep.py b/src/semgrep_mcp/semgrep.py index 79bddc7..f762947 100644 --- a/src/semgrep_mcp/semgrep.py +++ b/src/semgrep_mcp/semgrep.py @@ -7,6 +7,7 @@ from mcp.shared.exceptions import McpError from mcp.types import INTERNAL_ERROR, ErrorData +from opentelemetry import trace from semgrep_mcp.models import CodeFile from semgrep_mcp.semgrep_interfaces.semgrep_output_v1 import CliOutput @@ -140,9 +141,11 @@ class SemgrepContext: process: asyncio.subprocess.Process stdin: asyncio.StreamWriter stdout: asyncio.StreamReader + top_level_span: trace.Span - def __init__(self, process: asyncio.subprocess.Process) -> None: + def __init__(self, process: asyncio.subprocess.Process, top_level_span: trace.Span) -> None: self.process = process + self.top_level_span = top_level_span if process.stdin is not None and process.stdout is not None: self.stdin = process.stdin @@ -203,7 +206,7 @@ async def run_semgrep(args: list[str]) -> asyncio.subprocess.Process: return process -async def run_semgrep_daemon() -> SemgrepContext | None: +async def run_semgrep_daemon(top_level_span: trace.Span) -> SemgrepContext | None: """ Runs the semgrep daemon (`semgrep mcp`) if the user has the Pro Engine installed. @@ -211,6 +214,9 @@ async def run_semgrep_daemon() -> SemgrepContext | None: """ resp = await run_semgrep(["--pro", "--version"]) + # wait for the command to exit so the exit code is set + await resp.communicate() + # The user doesn't seem to have the Pro Engine installed. # That's fine, let's just run the free engine, without the # `semgrep mcp` backend. @@ -222,7 +228,7 @@ async def run_semgrep_daemon() -> SemgrepContext | None: return None else: process = await run_semgrep(["mcp", "--pro"]) - return SemgrepContext(process=process) + return SemgrepContext(process=process, top_level_span=top_level_span) async def run_semgrep_output(args: list[str]) -> str: diff --git a/src/semgrep_mcp/server.py b/src/semgrep_mcp/server.py index a8878ad..4050f4c 100755 --- a/src/semgrep_mcp/server.py +++ b/src/semgrep_mcp/server.py @@ -31,6 +31,7 @@ set_semgrep_executable, ) from semgrep_mcp.semgrep_interfaces.semgrep_output_v1 import CliOutput +from utilities.tracing import start_tracing, with_span # --------------------------------------------------------------------------------- # Constants @@ -289,15 +290,16 @@ def remove_temp_dir_from_results(results: SemgrepScanResult, temp_dir: str) -> N @asynccontextmanager async def server_lifespan(_server: FastMCP) -> AsyncIterator[SemgrepContext | None]: """Manage server startup and shutdown lifecycle.""" - # Initialize resources on startup + # Initialize resources on startup with tracing # MCP requires Pro Engine - context = await run_semgrep_daemon() + with start_tracing("mcp-python-server") as span: + context = await run_semgrep_daemon(top_level_span=span) - try: - yield context - finally: - if context is not None: - context.shutdown() + try: + yield context + finally: + if context is not None: + context.shutdown() # Create a fast MCP server @@ -688,7 +690,8 @@ async def semgrep_scan_rpc( temp_dir = None try: # TODO: perhaps should return more interpretable results? - cli_output = await run_semgrep_via_rpc(context, code_files) + with with_span(context.top_level_span, "semgrep_scan_rpc"): + cli_output = await run_semgrep_via_rpc(context, code_files) return cli_output except McpError as e: raise e diff --git a/src/utilities/tracing.py b/src/utilities/tracing.py new file mode 100644 index 0000000..6a38c47 --- /dev/null +++ b/src/utilities/tracing.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +import logging +import os +from collections.abc import Generator +from contextlib import contextmanager + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.resources import DEPLOYMENT_ENVIRONMENT, SERVICE_NAME, Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +# coupling: these need to be kept in sync with semgrep-proprietary/tracing.py +DEFAULT_TRACE_ENDPOINT = "https://telemetry.semgrep.dev/v1/traces" +DEFAULT_DEV_ENDPOINT = "https://telemetry.dev2.semgrep.dev/v1/traces" +DEFAULT_LOCAL_ENDPOINT = "http://localhost:4318/v1/traces" + +MCP_SERVICE_NAME = "mcp" + + +def get_trace_endpoint() -> tuple[str, str]: + """Get the appropriate trace endpoint based on environment.""" + env = os.environ.get("ENVIRONMENT", "dev").lower() + + if env == "prod": + return (DEFAULT_TRACE_ENDPOINT, "prod") + elif env == "local": + return (DEFAULT_LOCAL_ENDPOINT, "local") + else: + return (DEFAULT_DEV_ENDPOINT, "dev") + + +@contextmanager +def start_tracing(name: str) -> Generator[trace.Span, None, None]: + """Initialize OpenTelemetry tracing.""" + (endpoint, env) = get_trace_endpoint() + + # Create resource with basic attributes + resource = Resource.create( + { + SERVICE_NAME: MCP_SERVICE_NAME, + DEPLOYMENT_ENVIRONMENT: env, + } + ) + # Create tracer provider + provider = TracerProvider(resource=resource) + + # Create OTLP exporter + exporter = OTLPSpanExporter(endpoint=endpoint) + + # Create span processor + processor = BatchSpanProcessor(exporter) + provider.add_span_processor(processor) + + # Set the global tracer provider + trace.set_tracer_provider(provider) + + # Get tracer instance + tracer = trace.get_tracer(MCP_SERVICE_NAME) + + with tracer.start_as_current_span(name) as span: + trace_id = trace.format_trace_id(span.get_span_context().trace_id) + + logging.info("Tracing initialized") + logging.info(f"Tracing initialized with trace ID: {trace_id}") + + yield span + + +@contextmanager +def with_span( + parent_span: trace.Span, + name: str, +) -> Generator[trace.Span, None, None]: + tracer = trace.get_tracer(MCP_SERVICE_NAME) + + context = trace.set_span_in_context(parent_span) + with tracer.start_span(name, context=context) as span: + yield span diff --git a/uv.lock b/uv.lock index 2f31bc5..59d5d3d 100644 --- a/uv.lock +++ b/uv.lock @@ -1157,6 +1157,8 @@ version = "0.4.1" source = { editable = "." } dependencies = [ { name = "mcp" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-sdk" }, { name = "semgrep" }, ] @@ -1176,6 +1178,8 @@ dev = [ requires-dist = [ { name = "mcp", specifier = ">=1.12.0" }, { name = "semgrep", specifier = "==1.131.0" }, + { name = "opentelemetry-api", specifier = ">=1.25.0" }, + { name = "opentelemetry-sdk", specifier = ">=1.25.0" }, ] [package.metadata.requires-dev]