1313from azure .ai .inference .prompts import PromptTemplate
1414
1515from .shared import globals
16- from azure .core .settings import settings
1716
1817from opentelemetry .baggage import get_baggage
19- from azure .ai .inference .aio import ChatCompletionsClient
18+ from azure .ai .evaluation import CoherenceEvaluator , FluencyEvaluator , RelevanceEvaluator , ViolenceEvaluator , SexualEvaluator , HateUnfairnessEvaluator , ProtectedMaterialEvaluator , ContentSafetyEvaluator
19+ import asyncio
20+ from opentelemetry .baggage import set_baggage , get_baggage
21+ from opentelemetry .context import attach
22+ from featuremanagement import TargetingContext
2023
21- settings .tracing_implementation = "opentelemetry"
2224router = fastapi .APIRouter ()
2325templates = Jinja2Templates (directory = "api/templates" )
2426
@@ -31,6 +33,7 @@ class Message(pydantic.BaseModel):
3133class ChatRequest (pydantic .BaseModel ):
3234 messages : list [Message ]
3335 prompt_override : str = None
36+ sessionState : dict = {}
3437
3538@router .get ("/test/hello" )
3639async def test ():
@@ -58,7 +61,7 @@ async def response_stream():
5861 if chat_request .prompt_override :
5962 prompt = PromptTemplate .from_prompty (pathlib .Path (__file__ ).parent .resolve () / chat_request .prompt_override )
6063 else :
61- prompt_variant = feature_manager .get_variant ("prompty_file" , targeting_id ) # replace this with prompt_asset
64+ prompt_variant = feature_manager .get_variant ("prompty_file" ) # replace this with prompt_asset
6265 if prompt_variant and prompt_variant .configuration :
6366 prompt = PromptTemplate .from_prompty (pathlib .Path (__file__ ).parent .resolve () / prompt_variant .configuration )
6467 else :
@@ -88,9 +91,13 @@ async def response_stream():
8891 return fastapi .responses .StreamingResponse (response_stream ())
8992
9093
94+ def get_targeting_context ():
95+ return TargetingContext (user_id = get_baggage ("Microsoft.TargetingId" ))
96+
9197@router .post ("/chat" )
9298async def chat_nostream_handler (
93- chat_request : ChatRequest
99+ chat_request : ChatRequest ,
100+ request : Request
94101):
95102 chat_client = globals ["chat" ]
96103 if chat_client is None :
@@ -99,15 +106,20 @@ async def chat_nostream_handler(
99106 messages = [{"role" : message .role , "content" : message .content } for message in chat_request .messages ]
100107 model_deployment_name = globals ["chat_model" ]
101108 feature_manager = globals ["feature_manager" ]
102- targeting_id = get_baggage ("Microsoft.TargetingId" ) or str (uuid .uuid4 ())
109+
110+ targeting_id = chat_request .sessionState ['sessionId' ] or str (uuid .uuid4 ())
111+ attach (set_baggage ("Microsoft.TargetingId" , targeting_id ))
103112
104113 # figure out which prompty template to use (replace file to API)
114+ variant = "none"
105115 if chat_request .prompt_override :
106116 prompt = PromptTemplate .from_prompty (pathlib .Path (__file__ ).parent .resolve () / chat_request .prompt_override )
117+ variant = chat_request .prompt_override
107118 else :
108- prompt_variant = feature_manager .get_variant ("prompty_file" , targeting_id ) # replace this with prompt_asset
119+ prompt_variant = feature_manager .get_variant ("prompty_file" ) # replace this with prompt_asset
109120 if prompt_variant and prompt_variant .configuration :
110121 prompt = PromptTemplate .from_prompty (pathlib .Path (__file__ ).parent .resolve () / prompt_variant .configuration )
122+ variant = prompt_variant .name
111123 else :
112124 prompt = globals ["prompt" ]
113125
@@ -117,9 +129,40 @@ async def chat_nostream_handler(
117129 response = await chat_client .complete (
118130 model = model_deployment_name , messages = prompt_messages + messages , stream = False
119131 )
132+ track_event ("RequestMade" , targeting_id )
120133 except Exception as e :
121134 error = {"Error" : str (e )}
122135 track_event ("ErrorLLM" , targeting_id , error )
123136
124137 answer = response .choices [0 ].message .content
125- return answer
138+
139+ # eval_sampling = feature_manager.get_variant("eval_sampling", targeting_id)
140+ # if eval_sampling and eval_sampling.configuration == True:
141+ # eval_input = { "conversation": { "messages": messages } }
142+ # project = globals["project"]
143+ #asyncio.create_task(run_evals(eval_input, targeting_id, project.scope, DefaultAzureCredential()))
144+
145+ return { "answer" : answer , "variant" : variant }
146+
147+ async def run_evals (eval_input , targeting_id , ai_project_scope , credential ):
148+ run_eval (FluencyEvaluator , eval_input , targeting_id )
149+ run_eval (RelevanceEvaluator , eval_input , targeting_id )
150+ run_eval (CoherenceEvaluator , eval_input , targeting_id )
151+
152+ run_safety_eval (ViolenceEvaluator , eval_input , targeting_id , ai_project_scope , credential )
153+ run_safety_eval (SexualEvaluator , eval_input , targeting_id , ai_project_scope , credential )
154+ run_safety_eval (HateUnfairnessEvaluator , eval_input , targeting_id , ai_project_scope , credential )
155+ run_safety_eval (ProtectedMaterialEvaluator , eval_input , targeting_id , ai_project_scope , credential )
156+ run_safety_eval (ContentSafetyEvaluator , eval_input , targeting_id , ai_project_scope , credential )
157+
158+ def run_safety_eval (evaluator , eval_input , targeting_id , ai_project_scope , credential ):
159+ eval = evaluator (credential = credential , azure_ai_project = ai_project_scope )
160+ score = eval (** eval_input )
161+ score .update ({"evaluator_id" : eval .id })
162+ track_event ("gen.ai." + type (eval ).__name__ , targeting_id , score )
163+
164+ def run_eval (evaluator , eval_input , targeting_id ):
165+ eval = evaluator (globals ["model_config" ])
166+ score = eval (** eval_input )
167+ score .update ({"evaluator_id" : evaluator .id })
168+ track_event ("gen.ai." + evaluator .__name__ , targeting_id , score )
0 commit comments