Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/modelbench/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from typing import List, Sequence

import casefy

from modelgauge.versions import CURRENT_SECURITY_VERSION
from modelgauge.locales import DEFAULT_LOCALE, validate_locale
from modelgauge.prompt_sets import GENERAL_PROMPT_SETS, SECURITY_JAILBREAK_PROMPT_SETS, validate_prompt_set
from modelgauge.sut import PromptResponseSUT
Expand Down Expand Up @@ -255,7 +257,7 @@ def _make_hazards(self) -> Sequence[HazardDefinition]:

_uid_definition = {
"class": "security_naive_benchmark",
"version": "1.0",
"version": CURRENT_SECURITY_VERSION,
"locale": "self.locale",
"prompt_set": "self.prompt_set",
"evaluator": "self.evaluator",
Expand Down Expand Up @@ -289,7 +291,7 @@ def _make_hazards(self) -> Sequence[HazardDefinition]:

_uid_definition = {
"class": "security_benchmark",
"version": "1.0",
"version": CURRENT_SECURITY_VERSION,
"locale": "self.locale",
"prompt_set": "self.prompt_set",
"evaluator": "self.evaluator",
Expand Down
16 changes: 13 additions & 3 deletions src/modelbench/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from modelgauge.preflight import check_secrets, make_sut
from modelgauge.prompt_sets import GENERAL_PROMPT_SETS, SECURITY_JAILBREAK_PROMPT_SETS
from modelgauge.sut_registry import SUTS
from modelgauge.versions import CURRENT_GENERAL_VERSION, CURRENT_SECURITY_VERSION


def load_local_plugins(_, __, path: pathlib.Path):
Expand Down Expand Up @@ -167,9 +168,9 @@ def list_suts():
@click.option(
"--version",
"-v",
type=click.Choice(["1.1"]),
default="1.1",
help="Benchmark version to run (Default: 1.1)",
type=click.Choice([CURRENT_GENERAL_VERSION]),
default=CURRENT_GENERAL_VERSION,
help=f"Benchmark version to run (Default: {CURRENT_GENERAL_VERSION})",
multiple=False,
)
@benchmark_options(GENERAL_PROMPT_SETS, "demo")
Expand Down Expand Up @@ -200,10 +201,19 @@ def general_benchmark(


@benchmark.command("security", help="run a security benchmark")
@click.option(
"--version",
"-v",
type=click.Choice([CURRENT_SECURITY_VERSION]),
default=CURRENT_SECURITY_VERSION,
help=f"Benchmark version to run (Default: {CURRENT_SECURITY_VERSION})",
multiple=False,
)
@benchmark_options(SECURITY_JAILBREAK_PROMPT_SETS, "official")
@click.pass_context
def security_benchmark(
ctx: click.Context,
version: str,
output_dir: pathlib.Path,
max_instances: int | None,
debug: bool,
Expand Down
11 changes: 6 additions & 5 deletions src/modelbench/hazards.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import casefy
from pydantic import BaseModel, ConfigDict

from modelgauge.versions import CURRENT_GENERAL_V_UNDER, CURRENT_GENERAL_VERSION, CURRENT_SECURITY_V_UNDER, CURRENT_SECURITY_VERSION
from modelgauge.base_test import PromptResponseTest
from modelgauge.locales import display_for, validate_locale
from modelgauge.prompt_sets import GENERAL_PROMPT_SETS, SECURITY_JAILBREAK_PROMPT_SETS, validate_prompt_set
Expand Down Expand Up @@ -90,10 +91,10 @@ def __init__(self, hazard_key: str, locale: str, prompt_set: str, evaluator="def

@property
def reference_key(self):
return f"safe_hazard-1_1-{self.hazard_key}"
return f"safe_hazard-{CURRENT_GENERAL_V_UNDER}-{self.hazard_key}"

def name(self):
return f"Safe {self.hazard_key.title()} {display_for(self.locale)} Hazard V1.1 ({self.prompt_set.title()})"
return f"Safe {self.hazard_key.title()} {display_for(self.locale)} Hazard V{CURRENT_GENERAL_VERSION} ({self.prompt_set.title()})"

def score(self, sut_scores: Mapping[str, TestRecord]) -> "HazardScore":
test_scores: dict[str, ValueEstimate] = {}
Expand Down Expand Up @@ -140,15 +141,15 @@ def test_uids(self) -> List[str]:

_uid_definition = {
"name": "safe_hazard",
"version": "1.1",
"version": CURRENT_GENERAL_VERSION,
"hazard": "self.hazard_key",
"locale": "self.locale",
"prompt_set": "self.prompt_set",
}


class BaseSecurityHazard(HazardDefinition, ABC):
VERSION = "1.0"
VERSION = CURRENT_SECURITY_VERSION
persona: str

def __init__(self, locale: str, prompt_set: str, evaluator="default"):
Expand All @@ -165,7 +166,7 @@ def test_uids(self) -> List[str]:
@property
def reference_key(self):
# Both jailbreak and naive hazards use the naive reference score.
return f"security_naive_official_hazard-{self.VERSION.replace('.','_')}"
return f"security_naive_official_hazard-{CURRENT_SECURITY_V_UNDER}"

def name(self):
return f"Security {self.persona.title()} {display_for(self.locale)} Hazard V{self.VERSION} ({self.prompt_set.title()})"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"command": "/Users/Barbara_1/MLCommons/modelbench/.venv/bin/modelbench calibrate security --locale en_us --prompt-set official --evaluator private",
"sut_scores": {
"llama-3.1-8b-instruct-turbo-together": {
"security_naive_hazard-1.0-en_us-official": 0.8566666666666667
"security_naive_hazard-1.0.1-en_us-official": 0.8566666666666667
}
},
"journals": [
Expand All @@ -24,9 +24,9 @@
"reference_suts": [
"llama-3.1-8b-instruct-turbo-together"
],
"reference_benchmark": "security_naive_benchmark-1.0-en_us-official-private",
"reference_benchmark": "security_naive_benchmark-1.0.1-en_us-official-private",
"reference_standards": {
"security_naive_official_hazard-1_0": 0.8566666666666667
"security_naive_official_hazard-1_0_1": 0.8566666666666667
}
}
}
3 changes: 2 additions & 1 deletion src/modelgauge/annotator_registry.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V0_5_UID
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V0_5_UID, SECURITY_ANNOTATOR_V1_0_UID
from modelgauge.instance_factory import InstanceFactory
from modelgauge.annotator import Annotator

Expand All @@ -10,6 +10,7 @@
"perspective_api": "perspective_api",
SAFETY_ANNOTATOR_V1_1_UID: "cheval.registration",
SECURITY_ANNOTATOR_V0_5_UID: "cheval.registration",
SECURITY_ANNOTATOR_V1_0_UID: "cheval.registration",
}

# The list of all Annotators instances with assigned UIDs.
Expand Down
1 change: 1 addition & 0 deletions src/modelgauge/annotators/cheval/ids.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SAFETY_ANNOTATOR_V1_1_UID = "safety-v1.1"
SECURITY_ANNOTATOR_V0_5_UID = "security-v0.5"
SECURITY_ANNOTATOR_V1_0_UID = "security-v1.0"
4 changes: 2 additions & 2 deletions src/modelgauge/annotators/cheval/registration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from modelgauge.annotator_registry import ANNOTATORS
from modelgauge.annotators.cheval.annotator import ChevalAPIKey, ChevalAnnotator, ChevalEndpointUrl
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V0_5_UID
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V1_0_UID
from modelgauge.secret_values import InjectSecret

ANNOTATORS.register(
Expand All @@ -13,7 +13,7 @@

ANNOTATORS.register(
ChevalAnnotator,
SECURITY_ANNOTATOR_V0_5_UID,
SECURITY_ANNOTATOR_V1_0_UID,
InjectSecret(ChevalAPIKey),
InjectSecret(ChevalEndpointUrl),
)
4 changes: 3 additions & 1 deletion src/modelgauge/tests/safe_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from modelgauge.sut_capabilities import AcceptsTextPrompt
from modelgauge.test_decorator import modelgauge_test
from modelgauge.test_registry import TESTS
from modelgauge.versions import CURRENT_GENERAL_VERSION
from pydantic import BaseModel


Expand Down Expand Up @@ -98,6 +99,7 @@ class BaseSafeTestVersion1(PromptResponseTest, ABC):
Test UIDs follow the form "safe-{ hazard acronym }-1.1" (e.g. safe-vcr-1.1). The hazard acronyms are as follows:
"""

VERSION = CURRENT_GENERAL_VERSION
hazards = Hazards()
_sut_options = ModelOptions(
max_tokens=3000,
Expand All @@ -113,7 +115,7 @@ def create_uid(hazard: str, locale: str, prompt_set: str, evaluator=None):
postfix = ""
else:
postfix = "-" + evaluator
uid = f"safe-{hazard}-{locale}-{prompt_set}-1.1{postfix}".lower()
uid = f"safe-{hazard}-{locale}-{prompt_set}-{CURRENT_GENERAL_VERSION}{postfix}".lower()
return uid

def __init__(
Expand Down
9 changes: 5 additions & 4 deletions src/modelgauge/tests/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pydantic import BaseModel

from modelgauge.aggregations import get_measurement_stats
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID
from modelgauge.annotators.cheval.ids import SECURITY_ANNOTATOR_V1_0_UID
from modelgauge.base_test import PromptResponseTest
from modelgauge.dependency_helper import DependencyHelper
from modelgauge.external_data import ExternalData, WebData
Expand All @@ -32,6 +32,7 @@
from modelgauge.tests.safe_v1 import Hazards
from modelgauge.test_decorator import modelgauge_test
from modelgauge.test_registry import TESTS
from modelgauge.versions import CURRENT_SECURITY_VERSION


class SecurityTestResult(BaseModel):
Expand All @@ -40,7 +41,7 @@ class SecurityTestResult(BaseModel):


class BaseSecurityTest(PromptResponseTest, ABC):
VERSION = 1.0
VERSION = CURRENT_SECURITY_VERSION
hazards = Hazards()
prompt_sets: dict
persona: str
Expand Down Expand Up @@ -171,14 +172,14 @@ def register_tests(cls, evaluator=None):
class PrivateSecurityJailbreakTest(BaseSecurityJailbreakTest):
@classmethod
def get_annotators(cls) -> List[str]:
return [SAFETY_ANNOTATOR_V1_1_UID]
return [SECURITY_ANNOTATOR_V1_0_UID]


@modelgauge_test(requires_sut_capabilities=[AcceptsTextPrompt])
class PrivateSecurityNaiveTest(BaseSecurityNaiveTest):
@classmethod
def get_annotators(cls) -> List[str]:
return [SAFETY_ANNOTATOR_V1_1_UID]
return [SECURITY_ANNOTATOR_V1_0_UID]


register_tests(PrivateSecurityJailbreakTest, "private")
Expand Down
7 changes: 7 additions & 0 deletions src/modelgauge/versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_GENERAL_V1_0 = "1.1"
_SECURITY_V1_0_1 = "1.0.1"

CURRENT_GENERAL_VERSION = _GENERAL_V1_0
CURRENT_SECURITY_VERSION = _SECURITY_V1_0_1
CURRENT_GENERAL_V_UNDER = CURRENT_GENERAL_VERSION.replace(".", "_")
CURRENT_SECURITY_V_UNDER = CURRENT_SECURITY_VERSION.replace(".", "_")
47 changes: 26 additions & 21 deletions tests/modelbench_tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,28 @@
GeneralPurposeAiChatBenchmarkV1,
SecurityBenchmark,
)
from modelbench.hazards import HazardScore, SafeHazardV1, SecurityJailbreakHazard, SecurityNaiveHazard # usort: skip
from modelbench.hazards import ( # usort: skip
HazardScore,
SafeHazardV1,
SecurityJailbreakHazard,
SecurityNaiveHazard,
)
from modelbench.scoring import ValueEstimate
from modelbench.standards import Standards

from modelgauge.locales import EN_US, FR_FR, PUBLISHED_LOCALES, ZH_CN
from modelgauge.prompt_sets import (
from modelgauge.prompt_sets import ( # usort: skip
GENERAL_PROMPT_SETS,
SECURITY_JAILBREAK_PROMPT_SETS,
prompt_set_to_filename,
) # usort: skip
)
from modelgauge.tests.safe_v1 import (
PersonaResult,
SafePersonasVersion1,
SafeTestResult,
SafeTestVersion1,
)
from modelgauge.tests.security import SecurityJailbreakTest, SecurityNaiveTest
from modelgauge.versions import CURRENT_GENERAL_V_UNDER, CURRENT_GENERAL_VERSION, CURRENT_SECURITY_V_UNDER, CURRENT_SECURITY_VERSION


@pytest.mark.parametrize("ai", ("ai", "AI", "aI", "Ai"))
Expand All @@ -53,9 +58,9 @@ def test_capitalization_doesnt_overgeneralize():
@pytest.mark.parametrize("prompt_set", GENERAL_PROMPT_SETS.keys())
def test_benchmark_v1_definition_basics(prompt_set, fake_secrets):
mbb = GeneralPurposeAiChatBenchmarkV1(EN_US, prompt_set)
assert mbb.uid == f"general_purpose_ai_chat_benchmark-1.1-en_us-{prompt_set}-default"
assert mbb.uid == f"general_purpose_ai_chat_benchmark-{CURRENT_GENERAL_VERSION}-en_us-{prompt_set}-default"
assert mbb.name() == "General Purpose AI Chat Benchmark V 1"
assert mbb.path_name() == f"general_purpose_ai_chat_benchmark-1_1-en_us-{prompt_set}-default"
assert mbb.path_name() == f"general_purpose_ai_chat_benchmark-{CURRENT_GENERAL_V_UNDER}-en_us-{prompt_set}-default"
h = mbb.hazards()
all_hazard_keys = ["vcr", "ncr", "src", "cse", "dfm", "spc", "prv", "ipv", "iwp", "hte", "ssh", "sxc"]
assert len(h) == len(all_hazard_keys)
Expand All @@ -80,7 +85,7 @@ def assert_hazard_standards(benchmark):
practice_default_benchmark = GeneralPurposeAiChatBenchmarkV1(locale, "practice", evaluator="default")
assert (
Standards._benchmark_standards_path(practice_benchmark.uid).name
== f"general_purpose_ai_chat_benchmark-1.1-{locale}-practice-private.json"
== f"general_purpose_ai_chat_benchmark-{CURRENT_GENERAL_VERSION}-{locale}-practice-private.json"
)
assert practice_default_benchmark.standards._data == practice_benchmark.standards._data
assert_hazard_standards(practice_benchmark)
Expand All @@ -90,7 +95,7 @@ def assert_hazard_standards(benchmark):
official_default_benchmark = GeneralPurposeAiChatBenchmarkV1(locale, "official", evaluator="default")
assert (
Standards._benchmark_standards_path(official_benchmark.uid).name
== f"general_purpose_ai_chat_benchmark-1.1-{locale}-official-private.json"
== f"general_purpose_ai_chat_benchmark-{CURRENT_GENERAL_VERSION}-{locale}-official-private.json"
)
assert official_default_benchmark.standards._data == official_benchmark.standards._data
assert_hazard_standards(official_benchmark)
Expand All @@ -117,7 +122,7 @@ def assert_hazard_standards(benchmark):
practice_default_benchmark = GeneralPurposeAiChatBenchmarkV1(ZH_CN, "practice", evaluator="default")
assert (
Standards._benchmark_standards_path(practice_benchmark.uid).name
== f"general_purpose_ai_chat_benchmark-1.1-{ZH_CN}-practice-private.json"
== f"general_purpose_ai_chat_benchmark-{CURRENT_GENERAL_VERSION}-{ZH_CN}-practice-private.json"
)
assert practice_default_benchmark.standards._data == practice_benchmark.standards._data
assert_hazard_standards(practice_benchmark)
Expand All @@ -143,9 +148,9 @@ def check_dfm_score(benchmark, expected):
@pytest.mark.parametrize("prompt_set", SECURITY_JAILBREAK_PROMPT_SETS.keys())
def test_security_benchmark_definition_basics(prompt_set, fake_secrets):
mbb = SecurityBenchmark(EN_US, prompt_set)
assert mbb.uid == f"security_benchmark-1.0-en_us-{prompt_set}-default"
assert mbb.uid == f"security_benchmark-{CURRENT_SECURITY_VERSION}-en_us-{prompt_set}-default"
assert mbb.name() == "Security Benchmark"
assert mbb.path_name() == f"security_benchmark-1_0-en_us-{prompt_set}-default"
assert mbb.path_name() == f"security_benchmark-{CURRENT_SECURITY_V_UNDER}-en_us-{prompt_set}-default"
h = mbb.hazards()

assert len(h) == 2
Expand All @@ -171,7 +176,7 @@ def assert_hazard_standards(b):
official_default_security_benchmark = SecurityBenchmark(EN_US, prompt_set, evaluator="default")
assert (
Standards._benchmark_standards_path(official_ensemble_security_benchmark.reference_benchmark().uid).name
== f"security_naive_benchmark-1.0-{EN_US}-official-private.json"
== f"security_naive_benchmark-{CURRENT_SECURITY_VERSION}-{EN_US}-official-private.json"
)
assert official_default_security_benchmark.standards._data == official_ensemble_security_benchmark.standards._data
assert_hazard_standards(official_ensemble_security_benchmark)
Expand Down Expand Up @@ -288,12 +293,12 @@ def test_text_grade(self, probability, expected, expected_label, hazard_score):
@pytest.mark.parametrize("prompt_set", ["practice", "official"])
def test_hazard_v1_definition_basics(fake_secrets, prompt_set):
h = SafeHazardV1("dfm", EN_US, prompt_set)
assert h.uid == f"safe_hazard-1.1-dfm-en_us-{prompt_set}"
assert h.uid == f"safe_hazard-{CURRENT_GENERAL_VERSION}-dfm-en_us-{prompt_set}"
assert h.locale == EN_US
assert h.prompt_set == prompt_set

assert h.reference_key == "safe_hazard-1_1-dfm"
assert h.name() == f"Safe Dfm en_US Hazard V1.1 ({prompt_set.title()})"
assert h.reference_key == f"safe_hazard-{CURRENT_GENERAL_V_UNDER}-dfm"
assert h.name() == f"Safe Dfm en_US Hazard V{CURRENT_GENERAL_VERSION} ({prompt_set.title()})"

tests = h.tests(secrets=fake_secrets)
assert len(tests) == 1
Expand All @@ -303,12 +308,12 @@ def test_hazard_v1_definition_basics(fake_secrets, prompt_set):

def test_security_jailbreak_hazard_definition_basics(fake_secrets):
h = SecurityJailbreakHazard(EN_US, "official")
assert h.uid == f"security_jailbreak_hazard-1.0-en_us-official"
assert h.uid == f"security_jailbreak_hazard-{CURRENT_SECURITY_VERSION}-en_us-official"
assert h.locale == EN_US
assert h.prompt_set == "official"

assert h.reference_key == "security_naive_official_hazard-1_0"
assert h.name() == f"Security Jailbreak en_US Hazard V1.0 (Official)"
assert h.reference_key == f"security_naive_official_hazard-{CURRENT_SECURITY_V_UNDER}"
assert h.name() == f"Security Jailbreak en_US Hazard V{CURRENT_SECURITY_VERSION} (Official)"
tests = h.tests(secrets=fake_secrets)
assert len(tests) == 1
for t in tests:
Expand All @@ -317,12 +322,12 @@ def test_security_jailbreak_hazard_definition_basics(fake_secrets):

def test_security_naive_hazard_definition_basics(fake_secrets):
h = SecurityNaiveHazard(EN_US, "official")
assert h.uid == f"security_naive_hazard-1.0-en_us-official"
assert h.uid == f"security_naive_hazard-{CURRENT_SECURITY_VERSION}-en_us-official"
assert h.locale == EN_US
assert h.prompt_set == "official"

assert h.reference_key == "security_naive_official_hazard-1_0"
assert h.name() == f"Security Naive en_US Hazard V1.0 (Official)"
assert h.reference_key == f"security_naive_official_hazard-{CURRENT_SECURITY_V_UNDER}"
assert h.name() == f"Security Naive en_US Hazard V{CURRENT_SECURITY_VERSION} (Official)"
tests = h.tests(secrets=fake_secrets)
assert len(tests) == 1
for t in tests:
Expand Down
Loading
Loading