Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,13 @@ pip-wheel-metadata/
# Logs
# ---------------------------------------------------------------------------
*.log
logs/

# ---------------------------------------------------------------------------
# Local runtime state (Google ADK / agent sessions, etc.)
# ---------------------------------------------------------------------------
**/.adk/
.adk_state/

# Local runtime data (Qdrant vector store, etc.)
data/
Expand Down
6 changes: 2 additions & 4 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ services:
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- agent_artifacts:/app/src/.adk
- ./.adk_state:/app/src/.adk
- ./logs:/app/logs
restart: always
command: python -m blacki.server

volumes:
agent_artifacts:
2 changes: 2 additions & 0 deletions src/blacki/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
configure_otel_resource,
initialize_environment,
setup_logging,
setup_tracing,
validation,
)

Expand All @@ -39,6 +40,7 @@
GoogleADKInstrumentor().instrument()

setup_logging(log_level=env.log_level)
setup_tracing()

_telegram_bot = None
_container: AppContainer | None = None
Expand Down
3 changes: 2 additions & 1 deletion src/blacki/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
from . import validation
from .config import ServerEnv, initialize_environment
from .exceptions import ConfigurationError
from .observability import configure_otel_resource, setup_logging
from .observability import configure_otel_resource, setup_logging, setup_tracing

__all__ = [
"ConfigurationError",
"ServerEnv",
"configure_otel_resource",
"initialize_environment",
"setup_logging",
"setup_tracing",
"validation",
]
189 changes: 144 additions & 45 deletions src/blacki/utils/observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,29 @@
OpenTelemetry environment variables for vendor-neutral operation.
"""

import json
import logging
import os
import sys
import uuid
from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path

from opentelemetry._logs import set_logger_provider
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry import trace
from opentelemetry.sdk.resources import (
SERVICE_INSTANCE_ID,
SERVICE_NAME,
SERVICE_NAMESPACE,
SERVICE_VERSION,
Resource,
)
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
SpanExporter,
SpanExportResult,
)


def configure_otel_resource(agent_name: str) -> None:
Expand All @@ -41,59 +48,151 @@ def configure_otel_resource(agent_name: str) -> None:
)


def get_log_dir() -> Path:
"""Get the appropriate log directory based on environment."""
return Path("/app/logs") if Path("/.dockerenv").exists() else Path("./logs")


class JSONFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
log_record = {
"timestamp": datetime.fromtimestamp(record.created, tz=UTC).isoformat(),
"level": record.levelname,
"name": record.name,
"message": record.getMessage(),
"process_id": record.process,
"thread_id": record.thread,
}
Comment on lines +58 to +65
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider adding process and thread IDs to the JSON log record. This is especially useful for debugging concurrency issues in a FastAPI application.

Suggested change
log_record = {
"timestamp": self.formatTime(record, self.datefmt),
"level": record.levelname,
"name": record.name,
"message": record.getMessage(),
}
log_record = {
"timestamp": self.formatTime(record, self.datefmt),
"level": record.levelname,
"name": record.name,
"message": record.getMessage(),
"process": record.process,
"thread": record.threadName,
}

if record.exc_info:
log_record["exception"] = self.formatException(record.exc_info)
return json.dumps(log_record, default=str)


def setup_logging(log_level: str) -> None:
"""Set up basic logging with optional OTLP log export.
"""Set up basic logging with local JSON file export.

Configures Python logging to output to stdout. If OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
is set, also exports logs via OTLP for correlation with traces.
Configures Python logging to output to stdout and append to a local JSON file.

Args:
log_level: Logging verbosity level as string
"""
level = getattr(logging, log_level.upper(), logging.INFO)

# Configure root logger with stdout handler
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
# Configure stdout handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(
logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
)

handlers: list[logging.Handler] = [console_handler]

# Configure local JSON file handler
log_dir = get_log_dir()
try:
log_dir.mkdir(parents=True, exist_ok=True)
file_handler = logging.FileHandler(log_dir / "blacki-telemetry.log")
file_handler.setFormatter(JSONFormatter())
handlers.append(file_handler)
except OSError as e:
print(f"⚠️ Failed to create log directory or file handler: {e}")
print(" Continuing with stdout logging only...")

# Configure root logger
logging.basicConfig(level=level, handlers=handlers, force=True)

# Set levels for some noisy libraries if needed
logging.getLogger("urllib3").setLevel(logging.WARNING)

# Set up OTLP log export if endpoint is configured
logs_endpoint = os.getenv("OTEL_EXPORTER_OTLP_LOGS_ENDPOINT")
if logs_endpoint:

class JSONFileSpanExporter(SpanExporter):
"""Exports OpenTelemetry Spans to a local JSON Lines file."""

def __init__(self, log_path: str):
self.log_path = log_path

def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
try:
print(f"📊 Configuring OTLP log export to: {logs_endpoint}")

# Create resource from OTEL_RESOURCE_ATTRIBUTES
resource_attrs = {}
if "OTEL_RESOURCE_ATTRIBUTES" in os.environ:
for pair in os.environ["OTEL_RESOURCE_ATTRIBUTES"].split(","):
if "=" in pair:
key, value = pair.split("=", 1)
resource_attrs[key] = value

resource = Resource.create(resource_attrs)

# Set up logger provider with OTLP exporter
provider = LoggerProvider(resource=resource)
provider.add_log_record_processor(
BatchLogRecordProcessor(OTLPLogExporter())
)
set_logger_provider(provider)

# Add OTLP handler to root logger
handler = LoggingHandler(level=level)
logging.getLogger().addHandler(handler)

print("✅ OTLP log export configured")
with Path(self.log_path).open("a") as f:
for span in spans:
# Convert span attributes and events to serializable dicts
span_data = {
"name": span.name,
"context": {
"trace_id": format(span.context.trace_id, "032x"),
"span_id": format(span.context.span_id, "016x"),
},
Comment on lines +118 to +123
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The exported span data is missing the parent_span_id. Without this field, it is impossible to reconstruct the trace hierarchy (parent-child relationships) in DuckDB or other analysis tools.

Suggested change
span_data = {
"name": span.name,
"context": {
"trace_id": format(span.context.trace_id, "032x"),
"span_id": format(span.context.span_id, "016x"),
},
span_data = {
"name": span.name,
"context": {
"trace_id": format(span.context.trace_id, "032x"),
"span_id": format(span.context.span_id, "016x"),
},
"parent_id": format(span.parent.span_id, "016x") if span.parent else None,

"parent_id": format(span.parent.span_id, "016x")
if span.parent
else None,
"kind": span.kind.name if span.kind else None,
"start_time": datetime.fromtimestamp(
span.start_time / 1e9, tz=UTC
).isoformat()
if span.start_time
else None,
"end_time": datetime.fromtimestamp(
span.end_time / 1e9, tz=UTC
).isoformat()
if span.end_time
else None,
"status": {
"status_code": span.status.status_code.name
if span.status
else None,
"description": span.status.description
if span.status
else None,
},
"attributes": dict(span.attributes) if span.attributes else {},
"events": [
{
"name": event.name,
"timestamp": datetime.fromtimestamp(
event.timestamp / 1e9, tz=UTC
).isoformat()
if event.timestamp
else None,
"attributes": dict(event.attributes)
if event.attributes
else {},
}
for event in span.events
]
if span.events
else [],
}
f.write(json.dumps(span_data, default=str) + "\n")
return SpanExportResult.SUCCESS
except Exception as e:
print(f"⚠️ Failed to configure OTLP log export: {e}")
print(" Continuing with stdout logging only...")
logging.getLogger(__name__).warning(
"OTLP log export setup failed, falling back to stdout-only logging: %s",
e,
)
print(f"⚠️ Failed to write trace to {self.log_path}: {e}")
return SpanExportResult.FAILURE

def shutdown(self) -> None:
pass


def setup_tracing() -> None:
"""Set up OpenTelemetry tracing with local JSON file export."""
log_dir = get_log_dir()
log_path = log_dir / "blacki-traces.log"

try:
log_dir.mkdir(parents=True, exist_ok=True)
except OSError as e:
print(f"⚠️ Failed to create trace directory: {e}")
return

# Resource automatically reads OTEL_RESOURCE_ATTRIBUTES from environment variables
resource = Resource.create()

# Set up tracer provider
provider = TracerProvider(resource=resource)

# Add our custom JSON exporter
exporter = JSONFileSpanExporter(str(log_path))
processor = BatchSpanProcessor(exporter)
provider.add_span_processor(processor)

# Register global tracer provider
trace.set_tracer_provider(provider)
print(f"✅ Local trace export configured to: {log_path}")
Loading