WIP - with new FF lib

aprilk-ms · aprilk-ms · commit 9bd1ef6c5b23 · 2025-02-17T21:29:35.000Z
diff --git a/.config/feature-flags.json b/.config/feature-flags.json
@@ -2,6 +2,39 @@
     "schemaVersion": "2.0.0",
     "feature_management": {
       "feature_flags": [
+        {
+          "id": "eval_sampling",
+          "enabled": true,
+          "variants": [
+            {
+              "name": "On",
+              "configuration_value": true
+            },
+            {
+              "name": "Off",
+              "configuration_value": false
+            }
+          ],
+          "allocation": {
+            "percentile": [
+              {
+                "variant": "On",
+                "from": 0,
+                "to": 10
+              },
+              {
+                "variant": "Off",
+                "from": 50,
+                "to": 90
+              }
+            ],
+            "default_when_enabled": "Off",
+            "default_when_disabled": "Off"
+          },
+          "telemetry": {
+            "enabled": true
+          }
+        },
         {
           "id": "prompt_asset",
           "enabled": true,
diff --git a/FeatureManagement-2.1.0b1-py3-none-any.whl b/FeatureManagement-2.1.0b1-py3-none-any.whl
diff --git a/eval/setup-eval.py b/eval/setup-eval.py
@@ -15,15 +15,13 @@
 # Name of your online evaluation schedule
 SAMPLE_NAME = "online_eval_name"
 
-
-
 # Connection string to your Azure AI Foundry project
 # Currently, it should be in the format "<HostName>;<AzureSubscriptionId>;<ResourceGroup>;<HubName>"
 PROJECT_CONNECTION_STRING = "eastus2.api.azureml.ms;80d2c6c6-fa64-4ab1-8aa5-4e118c6b16ce;rg-aprilk-azure-ai-basic-01a;ai-project-e6pnryr2q3qeg"
 
 # Your Application Insights resource ID
 
-APPLICATION_INSIGHTS_RESOURCE_ID = "InstrumentationKey=224f19e4-ec6f-4b9b-8ddb-357ff3ca0394;IngestionEndpoint=https://eastus2-3.in.applicationinsights.azure.com/;LiveEndpoint=https://eastus2.livediagnostics.monitor.azure.com/;ApplicationId=f029aec6-4f79-4301-b405-53ae1469a3a5"
+APPLICATION_INSIGHTS_RESOURCE_ID = "/subscriptions/80d2c6c6-fa64-4ab1-8aa5-4e118c6b16ce/resourceGroups/rg-aprilk-azure-ai-basic-01a/providers/Microsoft.Insights/components/appi-e6pnryr2q3qeg"
 
 # Kusto Query Language (KQL) query to query data from Application Insights resource
 # This query is compatible with data logged by the Azure AI Inferencing Tracing SDK (linked in documentation)
@@ -52,7 +50,7 @@
 
 # This is your Azure OpenAI Service connection name, which can be found in your Azure AI Foundry project under the 'Models + Endpoints' tab.
 default_connection = project_client.connections._get_connection(
-    "aoai-e6pnryr2q3qeg_aoai"
+    "aoai-e6pnryr2q3qeg"
 )
 
 model_config = {
@@ -66,11 +64,11 @@
 # id for each evaluator can be found in your Azure AI Foundry registry - please see documentation for more information
 # init_params is the configuration for the model to use to perform the evaluation
 # data_mapping is used to map the output columns of your query to the names required by the evaluator
-relevance_evaluator_config = EvaluatorConfiguration(
-    id="azureml://registries/azureml-staging/models/Relevance-Evaluator/versions/4",
-    init_params={"model_config": model_config},
-    data_mapping={"query": "${data.Input}", "response": "${data.Output}"}
-)
+# relevance_evaluator_config = EvaluatorConfiguration(
+#     id="azureml://registries/azureml-staging/models/Relevance-Evaluator/versions/4",
+#     init_params={"model_config": model_config},
+#     data_mapping={"query": "${data.Input}", "response": "${data.Output}"}
+# )
 
 # CoherenceEvaluator
 coherence_evaluator_config = EvaluatorConfiguration(
@@ -84,15 +82,14 @@
 
 # Dictionary of evaluators
 evaluators = {
-    "relevance": relevance_evaluator_config,
     "coherence" : coherence_evaluator_config
 }
 
 name = SAMPLE_NAME
 description = f"{SAMPLE_NAME} description"
 # AzureMSIClientId is the clientID of the User-assigned managed identity created during set-up - see documentation for how to find it
 # https://ms.portal.azure.com/#view/Microsoft_AAD_IAM/ManagedAppMenuBlade/~/Overview/objectId/83c77e9f-bbc1-41a6-8956-4e36e992336f/appId/4610a06d-56a0-47ab-aeb6-cf95bc662052
-properties = {"AzureMSIClientId": "c623a44d-a3b9-4485-95cc-db46967444e4", "Environment": "azureml://registries/azureml/environments/azureml-evaluations-built-in/versions/9"}
+properties = {"AzureMSIClientId": "c623a44d-a3b9-4485-95cc-db46967444e4", "Environment": "azureml://registries/azureml/environments/azureml-evaluations-built-in/versions/14"}
 
 # Configure the online evaluation schedule
 evaluation_schedule = EvaluationSchedule(
@@ -103,5 +100,5 @@
     properties=properties)
 
 # Create the online evaluation schedule 
-#created_evaluation_schedule = project_client.evaluations.create_or_replace_schedule(name, evaluation_schedule)
-#print(f"Successfully submitted the online evaluation schedule creation request - {created_evaluation_schedule.name}, currently in {created_evaluation_schedule.provisioning_state} state.")
+created_evaluation_schedule = project_client.evaluations.create_or_replace_schedule(name, evaluation_schedule)
+print(f"Successfully submitted the online evaluation schedule creation request - {created_evaluation_schedule.name}, currently in {created_evaluation_schedule.provisioning_state} state.")
diff --git a/src/api/main.py b/src/api/main.py
@@ -6,12 +6,14 @@
 
 import fastapi
 from azure.ai.projects.aio import AIProjectClient
+from azure.ai.projects.models import ConnectionType
 from azure.ai.inference.prompts import PromptTemplate
 from azure.identity import AzureDeveloperCliCredential, ManagedIdentityCredential
 from dotenv import load_dotenv
 from fastapi.staticfiles import StaticFiles
 
 from .shared import globals
+from .routes import get_targeting_context
 
 from azure.ai.inference.tracing import AIInferenceInstrumentor 
 from azure.monitor.opentelemetry import configure_azure_monitor
@@ -21,13 +23,19 @@
 from featuremanagement import FeatureManager
 from featuremanagement.azuremonitor import publish_telemetry
 
+from featuremanagement import FeatureManager
+from featuremanagement.azuremonitor import TargetingSpanProcessor,publish_telemetry
+
+
 from opentelemetry.baggage import get_baggage
 from opentelemetry.sdk.trace import Span, SpanProcessor
 from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
 from opentelemetry.baggage import set_baggage
 from opentelemetry.context import attach
 from opentelemetry.sdk.trace import Span
 
+from azure.core.settings import settings 
+
 import uuid
 
 logger = logging.getLogger("azureaiapp")
@@ -53,13 +61,27 @@ async def lifespan(app: fastapi.FastAPI):
         credential=azure_credential,
         conn_str=os.environ["AZURE_AIPROJECT_CONNECTION_STRING"],
     )
+    # default_connection = await project.connections.get_default(connection_type=ConnectionType.AZURE_OPEN_AI)
+    # deployment_name = os.environ["AZURE_AI_CHAT_DEPLOYMENT_NAME"]
+    # api_version = "2024-08-01-preview"
+    # model_config = default_connection.to_evaluator_model_config(
+    #     deployment_name=deployment_name, api_version=api_version
+    # )
+    model_config = {
+        "type": "azure_openai",
+        "azure_deployment": "gpt-4o-mini",
+        "api_version": "2024-08-01-preview",
+        "azure_endpoint": "https://aoai-e6pnryr2q3qeg.openai.azure.com/"
+    }
 
     chat = await project.inference.get_chat_completions_client()
     prompt = PromptTemplate.from_prompty(pathlib.Path(__file__).parent.resolve() / "prompt.v1.prompty")
 
+
     # Enable tracing
     application_insights_connection_string = await project.telemetry.get_connection_string()
-    configure_azure_monitor(connection_string=application_insights_connection_string, span_processors=[TargetingSpanProcessor()])
+    configure_azure_monitor(connection_string=application_insights_connection_string, span_processors=[TargetingSpanProcessor(targeting_context_accessor=get_targeting_context)])
+    settings.tracing_implementation = "opentelemetry" 
     AIInferenceInstrumentor().instrument() 
 
     # Inititalize the feature manager
@@ -71,36 +93,41 @@ async def lifespan(app: fastapi.FastAPI):
         feature_flag_refresh_enabled=True,
         refresh_interval=30,  # 30 seconds
     )
-    feature_manager = FeatureManager(app_config, on_feature_evaluated=publish_telemetry)
+    feature_manager = FeatureManager(app_config, targeting_context_accessor=get_targeting_context, on_feature_evaluated=publish_telemetry)
+    #feature_manager = FeatureManager(app_config, on_feature_evaluated=publish_telemetry)
+
+
 
+   
     globals["project"] = project
     globals["chat"] = chat
     globals["prompt"] = prompt
     globals["chat_model"] = os.environ["AZURE_AI_CHAT_DEPLOYMENT_NAME"]
     globals["feature_manager"] = feature_manager
+    globals["model_config"] = model_config
 
     yield
 
     await project.close()
 
     await chat.close()
 
-# Below will be replaced by a helper function from App Config SD
+# Below will be replaced by a helper function from App Config SDK
 
-class TargetingSpanProcessor(SpanProcessor):
-    def on_start(
-        self,
-        span: "Span",
-        parent_context = None,
-    ):
-        if (get_baggage("Microsoft.TargetingId", parent_context) != None):
-            span.set_attribute("TargetingId", get_baggage("Microsoft.TargetingId", parent_context))
+# class TargetingSpanProcessor(SpanProcessor):
+#     def on_start(
+#         self,
+#         span: "Span",
+#         parent_context = None,
+#     ):
+#         if (get_baggage("Microsoft.TargetingId", parent_context) != None):
+#             span.set_attribute("TargetingId", get_baggage("Microsoft.TargetingId", parent_context))
 
-def server_request_hook(span: Span, scope: dict[str, Any]):
-     if span and span.is_recording():
-        targeting_id = str(uuid.uuid4())
-        attach(set_baggage("Microsoft.TargetingId", targeting_id))
-        span.set_attribute("TargetingId", targeting_id)
+# def server_request_hook(span: Span, scope: dict[str, Any]):
+#      if span and span.is_recording():
+#         targeting_id = str(uuid.uuid4())
+#         attach(set_baggage("Microsoft.TargetingId", targeting_id))
+#         span.set_attribute("TargetingId", targeting_id)
 
 # End Targeting Id code
 
@@ -118,6 +145,6 @@ def create_app():
 
     app.include_router(routes.router)
 
-    FastAPIInstrumentor.instrument_app(app, server_request_hook=server_request_hook)
+    FastAPIInstrumentor.instrument_app(app) #, server_request_hook=server_request_hook)
 
     return app
diff --git a/src/api/routes.py b/src/api/routes.py
@@ -13,12 +13,14 @@
 from azure.ai.inference.prompts import PromptTemplate
 
 from .shared import globals
-from azure.core.settings import settings 
 
 from opentelemetry.baggage import get_baggage
-from azure.ai.inference.aio import ChatCompletionsClient
+from azure.ai.evaluation import CoherenceEvaluator, FluencyEvaluator, RelevanceEvaluator, ViolenceEvaluator, SexualEvaluator, HateUnfairnessEvaluator, ProtectedMaterialEvaluator, ContentSafetyEvaluator
+import asyncio
+from opentelemetry.baggage import set_baggage, get_baggage
+from opentelemetry.context import attach
+from featuremanagement import TargetingContext
 
-settings.tracing_implementation = "opentelemetry" 
 router = fastapi.APIRouter()
 templates = Jinja2Templates(directory="api/templates")
 
@@ -31,6 +33,7 @@ class Message(pydantic.BaseModel):
 class ChatRequest(pydantic.BaseModel):
     messages: list[Message]
     prompt_override: str = None
+    sessionState: dict = {}
 
 @router.get("/test/hello")
 async def test():
@@ -58,7 +61,7 @@ async def response_stream():
         if chat_request.prompt_override:
             prompt = PromptTemplate.from_prompty(pathlib.Path(__file__).parent.resolve() / chat_request.prompt_override)
         else:                       
-            prompt_variant = feature_manager.get_variant("prompty_file", targeting_id) # replace this with prompt_asset
+            prompt_variant = feature_manager.get_variant("prompty_file") # replace this with prompt_asset
             if prompt_variant and prompt_variant.configuration:
                 prompt = PromptTemplate.from_prompty(pathlib.Path(__file__).parent.resolve() / prompt_variant.configuration)
             else:
@@ -88,9 +91,13 @@ async def response_stream():
     return fastapi.responses.StreamingResponse(response_stream())
 
 
+def get_targeting_context():
+    return TargetingContext(user_id=get_baggage("Microsoft.TargetingId"))
+
 @router.post("/chat")
 async def chat_nostream_handler(
-    chat_request: ChatRequest
+    chat_request: ChatRequest,
+    request: Request
 ):
     chat_client = globals["chat"]
     if chat_client is None:
@@ -99,15 +106,20 @@ async def chat_nostream_handler(
     messages = [{"role": message.role, "content": message.content} for message in chat_request.messages]
     model_deployment_name = globals["chat_model"]
     feature_manager = globals["feature_manager"] 
-    targeting_id = get_baggage("Microsoft.TargetingId") or str(uuid.uuid4())
+
+    targeting_id = chat_request.sessionState['sessionId'] or str(uuid.uuid4())
+    attach(set_baggage("Microsoft.TargetingId", targeting_id))
     
     # figure out which prompty template to use (replace file to API)
+    variant = "none"
     if chat_request.prompt_override:
         prompt = PromptTemplate.from_prompty(pathlib.Path(__file__).parent.resolve() / chat_request.prompt_override)
+        variant = chat_request.prompt_override
     else:                       
-        prompt_variant = feature_manager.get_variant("prompty_file", targeting_id) # replace this with prompt_asset
+        prompt_variant = feature_manager.get_variant("prompty_file") # replace this with prompt_asset
         if prompt_variant and prompt_variant.configuration:
             prompt = PromptTemplate.from_prompty(pathlib.Path(__file__).parent.resolve() / prompt_variant.configuration)
+            variant = prompt_variant.name
         else:
             prompt = globals["prompt"]
 
@@ -117,9 +129,40 @@ async def chat_nostream_handler(
         response = await chat_client.complete(
             model=model_deployment_name, messages=prompt_messages + messages, stream=False
         )
+        track_event("RequestMade", targeting_id)
     except Exception as e:
         error = {"Error": str(e)}
         track_event("ErrorLLM", targeting_id, error)
         
     answer = response.choices[0].message.content
-    return answer
+
+    # eval_sampling = feature_manager.get_variant("eval_sampling", targeting_id)
+    # if eval_sampling and eval_sampling.configuration == True:
+    # eval_input = { "conversation": { "messages": messages } }
+    # project = globals["project"]
+    #asyncio.create_task(run_evals(eval_input, targeting_id, project.scope, DefaultAzureCredential()))
+   
+    return { "answer": answer, "variant": variant }
+
+async def run_evals(eval_input, targeting_id, ai_project_scope, credential):
+    run_eval(FluencyEvaluator, eval_input, targeting_id)
+    run_eval(RelevanceEvaluator, eval_input, targeting_id)
+    run_eval(CoherenceEvaluator, eval_input, targeting_id)
+
+    run_safety_eval(ViolenceEvaluator, eval_input, targeting_id, ai_project_scope, credential)
+    run_safety_eval(SexualEvaluator, eval_input, targeting_id, ai_project_scope, credential)
+    run_safety_eval(HateUnfairnessEvaluator, eval_input, targeting_id, ai_project_scope, credential)
+    run_safety_eval(ProtectedMaterialEvaluator, eval_input, targeting_id, ai_project_scope, credential)
+    run_safety_eval(ContentSafetyEvaluator, eval_input, targeting_id, ai_project_scope, credential)
+
+def run_safety_eval(evaluator, eval_input, targeting_id, ai_project_scope, credential):
+    eval = evaluator(credential=credential, azure_ai_project=ai_project_scope)
+    score = eval(**eval_input)
+    score.update({"evaluator_id": eval.id})
+    track_event("gen.ai." + type(eval).__name__, targeting_id, score)
+
+def run_eval(evaluator, eval_input, targeting_id):
+    eval = evaluator(globals["model_config"])
+    score = eval(**eval_input)
+    score.update({"evaluator_id": evaluator.id})
+    track_event("gen.ai." + evaluator.__name__, targeting_id, score)
diff --git a/src/api/templates/index.html b/src/api/templates/index.html