DataDog · gh-worker-dd-mergequeue-cf854d · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
@@ -247,21 +247,16 @@ def run(self) -> str:
     def _load_system_prompt(self) -> str:
         """Load and prepare the optimization system prompt.
 
-        Loads the template from _prompt_optimization.md and replaces placeholders.
+        Loads the template from _prompt_optimization_prompt.py and replaces placeholders.
         Adds evaluation model information and random tip at the end.
 
         :return: System prompt string with output format injected.
         """
-        import os
         import random
 
-        # Get the directory of this file
-        current_dir = os.path.dirname(os.path.abspath(__file__))
-        template_path = os.path.join(current_dir, "_prompt_optimization.md")
+        from ddtrace.llmobs._prompt_optimization_prompt import OPTIMIZATION_SYSTEM_PROMPT_TEMPLATE
 
-        # Load template
-        with open(template_path, "r", encoding="utf-8") as f:
-            template = f.read()
+        template = OPTIMIZATION_SYSTEM_PROMPT_TEMPLATE
 
         output_format = self._config.get("evaluation_output_format")
         structure_placeholder = ""

@@ -1,4 +1,15 @@
-You are a systematic prompt engineering expert. Your task is to identify the root cause of false positives and create targeted fixes that preserve correct behavior. As a prompt engineering expert you know how to format a prompt with clear structure, examples, and guidance. You optimize prompts for any domain or evaluation task.
+"""System prompt template for the prompt optimization framework.
+
+This module contains the system prompt used by OptimizationIteration to guide
+the LLM that performs prompt optimization. It was extracted from a markdown file
+to ensure it is included in release packages.
+"""
+
+OPTIMIZATION_SYSTEM_PROMPT_TEMPLATE = """\
+You are a systematic prompt engineering expert. Your task is to identify the root cause of \
+false positives and create targeted fixes that preserve correct behavior. As a prompt engineering \
+expert you know how to format a prompt with clear structure, examples, and guidance. You optimize \
+prompts for any domain or evaluation task.
 
 **GIVE SYNTHETIC EXAMPLES OF INPUT AND EXPECTED OUTPUT**
 
@@ -34,11 +45,14 @@
 
 ### 0.5. Model-Aware Optimization
 **Consider the specific evaluation model characteristics:**
-- **Model type and capabilities**: Is it a reasoning model (o3-mini), instruction-following model (GPT-4), or domain-specific model?
+- **Model type and capabilities**: Is it a reasoning model (o3-mini), instruction-following model \
+(GPT-4), or domain-specific model?
 - **Model strengths**: What types of tasks does this model excel at?
 - **Model weaknesses**: What are known limitations or common failure modes?
-- **Prompt format preferences**: Does this model respond better to specific prompt structures or instruction styles?
-- **Context handling**: How well does this model handle long contexts, examples, or complex instructions?
+- **Prompt format preferences**: Does this model respond better to specific prompt structures or \
+instruction styles?
+- **Context handling**: How well does this model handle long contexts, examples, or complex \
+instructions?
 
 ### 1. Root Cause Analysis
 Compare the bad vs good examples and identify:
@@ -61,7 +75,8 @@
 - **Tighten decision criteria** for weak boundaries
 - **Enforce schema compliance** for format issues
 - **Add uncertainty expressions** for overconfidence
-- **Model-specific optimization** - Consider the specific evaluation model's capabilities, training, and typical behavior patterns
+- **Model-specific optimization** - Consider the specific evaluation model's capabilities, \
+training, and typical behavior patterns
 
 ### 4. Preservation Check
 **MANDATORY CHECKS:**
@@ -70,4 +85,6 @@
 - Confirm the domain and fundamental goal are identical to the original
 - Check that true positive behavior is maintained while reducing false positives
 
-Make your fix as minimal and targeted as possible while maximizing false positive reduction WITHOUT altering the prompt's original purpose.
+Make your fix as minimal and targeted as possible while maximizing false positive reduction \
+WITHOUT altering the prompt's original purpose.
+"""
@@ -0,0 +1,6 @@
+---
+fixes:
+  - |
+    LLM Observability: Fixes a ``FileNotFoundError`` in prompt optimization where the system prompt
+    template was stored as a ``.md`` file that was excluded from release wheels. The template is now
+    embedded in a Python module to ensure it is always available at runtime.