Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
18f5926
Added unit tests and workflow for linting, import checks and running …
sapinderpalsingh Jun 16, 2026
f3fc9b3
Fixed isort issue
sapinderpalsingh Jun 16, 2026
702d4b3
Added security scanning workflow
sapinderpalsingh Jun 16, 2026
7a6c480
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
cac6a46
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
e6623a7
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
8665062
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
b7905c8
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
f5a82d0
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
fb7b788
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
7d1acfa
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
17c639d
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 16, 2026
3270f1f
Fixed linting issue
sapinderpalsingh Jun 16, 2026
06a4ab6
Fixed PR checks
sapinderpalsingh Jun 16, 2026
95d9eb6
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 22, 2026
3d9dbf1
Apply suggestions from code review
sapinderpalsingh Jun 22, 2026
1cbf13c
Updated package versions
sapinderpalsingh Jun 22, 2026
6e47807
Potential fix for pull request finding 'CodeQL / Unused import'
sapinderpalsingh Jun 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: "CodeQL"

on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
- cron: "25 14 * * 1"

jobs:
analyze:
name: Analyze (Python)
runs-on: ubuntu-latest
permissions:
security-events: write
packages: read
actions: read
contents: read

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: python
queries: security-and-quality

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:python"
42 changes: 42 additions & 0 deletions .github/workflows/pr-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: PR Checks

on:
pull_request:
branches: [main]
push:
branches: [main]

jobs:
checks:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install flake8 isort

- name: Lint with flake8
run: |
flake8 src/ tests/ --count --select=E9,F63,F7,F82 --show-source --statistics
flake8 src/ tests/ --count --max-line-length=120 --statistics --exit-zero

- name: Check import ordering with isort
run: |
isort --check-only --diff src/ tests/

- name: Run unit tests
run: |
python -m pytest tests/ -v --tb=short
6 changes: 6 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[pytest]
testpaths = tests
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts = -v --tb=short
11 changes: 7 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
# Azure SDK dependencies
azure-identity>=1.25.3
azure-ai-projects>=2.2.0
azure-ai-evaluation==1.16.9
azure-ai-evaluation==1.17.0
azure-ai-inference>=1.0.0b9
# Core Python packages
python-dotenv>=1.2.2
pyyaml>=6.0.3
pip-system-certs>=5.3
azure-monitor-query>=2.0.0
azure-monitor-opentelemetry>=1.8.8
aiohttp>=3.13.5
agent-framework==1.7.0
aiohttp>=3.14.1
agent-framework>=1.9.0
streamlit>=1.58.0
pandas==2.3.3
plotly>=6.7.0
plotly>=6.8.0
# Test dependencies
pytest>=9.0.0
pytest-asyncio>=1.3.0
2 changes: 1 addition & 1 deletion src/agent_evaluation/agentic_ops/base_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import logging
import os
import re
import logging
from typing import Dict, Union

from .client import LLMClient
Expand Down
7 changes: 4 additions & 3 deletions src/agent_evaluation/agentic_ops/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

import json
import logging
from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import os
from dotenv import load_dotenv
import time

from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from dotenv import load_dotenv
from openai import AzureOpenAI

load_dotenv()

# Configure logging
Expand Down
8 changes: 5 additions & 3 deletions src/agent_evaluation/agentic_ops/run_eval.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
import inspect
import logging
import os
import uuid
from dotenv import load_dotenv

from azure.ai.evaluation import evaluate
from azure.ai.projects import AIProjectClient
import logging
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv


def get_logger(name: str):
level = os.environ.get("LOG_LEVEL", "INFO").upper()
Expand Down
8 changes: 5 additions & 3 deletions src/agent_evaluation/agentic_ops/runner.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import argparse
import importlib
import logging
import os
import sys
import time
from pathlib import Path
from typing import Optional, Any, Dict
from typing import Any, Dict, Optional

import yaml
import os
import logging


def get_logger(name: str):
level = os.environ.get("LOG_LEVEL", "INFO").upper()
Expand Down
6 changes: 3 additions & 3 deletions src/agent_evaluation/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
import argparse
import sys
from pathlib import Path
from typing import List, Dict, Optional
from typing import Dict, List, Optional

import yaml


# Root of the project (two levels up from this file)
ROOT_DIR = Path(__file__).resolve().parents[2]
SAMPLES_DIR = ROOT_DIR / "src" / "evaluations" / "offline"
Expand Down Expand Up @@ -74,7 +73,8 @@ def print_samples_table(samples: List[Dict[str, str]]) -> None:

def run_sample(sample: Dict[str, str], extra_args: Optional[List[str]] = None) -> int:
"""Run a selected evaluation sample."""
from src.agent_evaluation.agentic_ops.runner import run_pipeline, parse_args
from src.agent_evaluation.agentic_ops.runner import (parse_args,
run_pipeline)

config_path = sample["config_path"]
print(f"\n{'='*70}")
Expand Down
12 changes: 8 additions & 4 deletions src/evaluations/offline/agentic_evaluation/eval_factory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from azure.ai.evaluation import RelevanceEvaluator, TaskAdherenceEvaluator, ToolCallAccuracyEvaluator
from .evaluator.evaluator_repo.evaluate_agent_invoked import EvaluateAgentsInvoked

import os
import logging
import os

from azure.ai.evaluation import (RelevanceEvaluator, TaskAdherenceEvaluator,
ToolCallAccuracyEvaluator)

from .evaluator.evaluator_repo.evaluate_agent_invoked import \
EvaluateAgentsInvoked


def get_logger(name: str):
level = os.environ.get("LOG_LEVEL", "INFO").upper()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import os
from pathlib import Path

from src.agent_evaluation.agentic_ops.run_eval import execute_eval
import logging
from src.evaluations.offline.utils.constants import EVAL_NAME
from src.evaluations.offline.utils.file_operations import get_next_run_id

from ..eval_factory import EvaluatorFactory


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .eval_utils.evaluation_utils import agent_invoked_accuracy, compute_recall


class EvaluateAgentsInvoked:
def __init__(self):
pass
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from azure.ai.evaluation import RelevanceEvaluator, CoherenceEvaluator
import logging
import os

from azure.ai.evaluation import CoherenceEvaluator, RelevanceEvaluator

from .evaluator.evaluator_repo.coherence import CoherenceEvaluatorCustom
from .evaluator.evaluator_repo.relevance import RelevanceEvaluatorCustom
from .evaluator.evaluator_repo.fluency import FluencyEvaluatorCustom
from .evaluator.evaluator_repo.relevance import RelevanceEvaluatorCustom
from .evaluator.evaluator_repo.similarity import SimilarityEvaluatorCustom

import os
import logging

def get_logger(name: str):
level = os.environ.get("LOG_LEVEL", "INFO").upper()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import os
from pathlib import Path

from src.agent_evaluation.agentic_ops.run_eval import execute_eval
import logging
from src.evaluations.offline.utils.constants import EVAL_NAME
from src.evaluations.offline.utils.file_operations import get_next_run_id

from ..eval_factory import EvaluatorFactory


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Dict, Union
from ......agent_evaluation.agentic_ops.base_evaluator import BaseCustomEvaluator

from ......agent_evaluation.agentic_ops.base_evaluator import \
BaseCustomEvaluator


class CoherenceEvaluatorCustom(BaseCustomEvaluator):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Dict, Union
from ......agent_evaluation.agentic_ops.base_evaluator import BaseCustomEvaluator

from ......agent_evaluation.agentic_ops.base_evaluator import \
BaseCustomEvaluator


class FluencyEvaluatorCustom(BaseCustomEvaluator):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Dict, Union
from ......agent_evaluation.agentic_ops.base_evaluator import BaseCustomEvaluator

from ......agent_evaluation.agentic_ops.base_evaluator import \
BaseCustomEvaluator


class RelevanceEvaluatorCustom(BaseCustomEvaluator):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Dict, Union
from ......agent_evaluation.agentic_ops.base_evaluator import BaseCustomEvaluator

from ......agent_evaluation.agentic_ops.base_evaluator import \
BaseCustomEvaluator


class SimilarityEvaluatorCustom(BaseCustomEvaluator):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from azure.ai.evaluation import RelevanceEvaluator, TaskAdherenceEvaluator, ToolCallAccuracyEvaluator

import os
import logging
import os

from azure.ai.evaluation import (RelevanceEvaluator, TaskAdherenceEvaluator,
ToolCallAccuracyEvaluator)


def get_logger(name: str):
level = os.environ.get("LOG_LEVEL", "INFO").upper()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import os
from pathlib import Path

from src.agent_evaluation.agentic_ops.run_eval import execute_eval
import logging
from src.evaluations.offline.utils.constants import EVAL_NAME
from src.evaluations.offline.utils.file_operations import get_next_run_id

from ..eval_factory import EvaluatorFactory


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@
FLOW:
Load Queries → Run Inference → Save Responses
"""
import os
import logging
import os
import random
from pathlib import Path
from src.evaluations.offline.utils.file_operations import load_queries_from_jsonl

from src.evaluations.offline.utils.file_operations import \
load_queries_from_jsonl

from .experiment_utils import get_file_paths, prepare_output_file, save_result


Expand Down Expand Up @@ -101,7 +104,7 @@ def inference_main(config: dict, args=None) -> None:
if __name__ == "__main__":
# For standalone execution, load config from experiment.yaml
import yaml

# Get project root (go up 5 levels from this file)
current_file = Path(__file__) # .../experiment/agent_inference.py
project_root = current_file.parent.parent.parent.parent.parent.parent # Go up to project root
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
==============
Helper functions for file management in inference pipelines.
"""
import os
import logging
import os
from pathlib import Path
from src.evaluations.offline.utils.file_operations import append_to_jsonl

from src.evaluations.offline.utils.file_operations import append_to_jsonl

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Tool functions for the device agents in the Multi-Agent system."""

from random import choice, randint
from typing import Annotated
from pydantic import Field
from random import randint, choice

from agent_framework import tool
from pydantic import Field


# =============================================================================
Expand Down
Loading
Loading