Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/modelbench/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from typing import List, Sequence

import casefy

from modelgauge.versions import CURRENT_SECURITY_VERSION
from modelgauge.locales import DEFAULT_LOCALE, validate_locale
from modelgauge.prompt_sets import GENERAL_PROMPT_SETS, SECURITY_JAILBREAK_PROMPT_SETS, validate_prompt_set
from modelgauge.sut import PromptResponseSUT
Expand Down Expand Up @@ -255,7 +257,7 @@ def _make_hazards(self) -> Sequence[HazardDefinition]:

_uid_definition = {
"class": "security_naive_benchmark",
"version": "1.0",
"version": CURRENT_SECURITY_VERSION,
"locale": "self.locale",
"prompt_set": "self.prompt_set",
"evaluator": "self.evaluator",
Expand Down Expand Up @@ -289,7 +291,7 @@ def _make_hazards(self) -> Sequence[HazardDefinition]:

_uid_definition = {
"class": "security_benchmark",
"version": "1.0",
"version": CURRENT_SECURITY_VERSION,
"locale": "self.locale",
"prompt_set": "self.prompt_set",
"evaluator": "self.evaluator",
Expand Down
16 changes: 13 additions & 3 deletions src/modelbench/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from modelgauge.preflight import check_secrets, make_sut
from modelgauge.prompt_sets import GENERAL_PROMPT_SETS, SECURITY_JAILBREAK_PROMPT_SETS
from modelgauge.sut_registry import SUTS
from modelgauge.versions import CURRENT_GENERAL_VERSION, CURRENT_SECURITY_VERSION


def load_local_plugins(_, __, path: pathlib.Path):
Expand Down Expand Up @@ -167,9 +168,9 @@ def list_suts():
@click.option(
"--version",
"-v",
type=click.Choice(["1.1"]),
default="1.1",
help="Benchmark version to run (Default: 1.1)",
type=click.Choice([CURRENT_GENERAL_VERSION]),
default=CURRENT_GENERAL_VERSION,
help=f"Benchmark version to run (Default: {CURRENT_GENERAL_VERSION})",
multiple=False,
)
@benchmark_options(GENERAL_PROMPT_SETS, "demo")
Expand Down Expand Up @@ -200,10 +201,19 @@ def general_benchmark(


@benchmark.command("security", help="run a security benchmark")
@click.option(
Comment thread
superdosh marked this conversation as resolved.
"--version",
"-v",
type=click.Choice([CURRENT_SECURITY_VERSION]),
default=CURRENT_SECURITY_VERSION,
help=f"Benchmark version to run (Default: {CURRENT_SECURITY_VERSION})",
multiple=False,
)
@benchmark_options(SECURITY_JAILBREAK_PROMPT_SETS, "official")
@click.pass_context
def security_benchmark(
ctx: click.Context,
version: str,
output_dir: pathlib.Path,
max_instances: int | None,
debug: bool,
Expand Down
16 changes: 11 additions & 5 deletions src/modelbench/hazards.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
import casefy
from pydantic import BaseModel, ConfigDict

from modelgauge.versions import (
CURRENT_GENERAL_V_UNDER,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice.

CURRENT_GENERAL_VERSION,
CURRENT_SECURITY_V_UNDER,
CURRENT_SECURITY_VERSION,
)
from modelgauge.base_test import PromptResponseTest
from modelgauge.locales import display_for, validate_locale
from modelgauge.prompt_sets import GENERAL_PROMPT_SETS, SECURITY_JAILBREAK_PROMPT_SETS, validate_prompt_set
Expand Down Expand Up @@ -90,10 +96,10 @@ def __init__(self, hazard_key: str, locale: str, prompt_set: str, evaluator="def

@property
def reference_key(self):
return f"safe_hazard-1_1-{self.hazard_key}"
return f"safe_hazard-{CURRENT_GENERAL_V_UNDER}-{self.hazard_key}"

def name(self):
return f"Safe {self.hazard_key.title()} {display_for(self.locale)} Hazard V1.1 ({self.prompt_set.title()})"
return f"Safe {self.hazard_key.title()} {display_for(self.locale)} Hazard V{CURRENT_GENERAL_VERSION} ({self.prompt_set.title()})"

def score(self, sut_scores: Mapping[str, TestRecord]) -> "HazardScore":
test_scores: dict[str, ValueEstimate] = {}
Expand Down Expand Up @@ -140,15 +146,15 @@ def test_uids(self) -> List[str]:

_uid_definition = {
"name": "safe_hazard",
"version": "1.1",
"version": CURRENT_GENERAL_VERSION,
"hazard": "self.hazard_key",
"locale": "self.locale",
"prompt_set": "self.prompt_set",
}


class BaseSecurityHazard(HazardDefinition, ABC):
VERSION = "1.0"
VERSION = CURRENT_SECURITY_VERSION
persona: str

def __init__(self, locale: str, prompt_set: str, evaluator="default"):
Expand All @@ -165,7 +171,7 @@ def test_uids(self) -> List[str]:
@property
def reference_key(self):
# Both jailbreak and naive hazards use the naive reference score.
return f"security_naive_official_hazard-{self.VERSION.replace('.','_')}"
return f"security_naive_official_hazard-{CURRENT_SECURITY_V_UNDER}"

def name(self):
return f"Security {self.persona.title()} {display_for(self.locale)} Hazard V{self.VERSION} ({self.prompt_set.title()})"
Expand Down
Comment thread
superdosh marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"_metadata": {
"NOTICE": "This file is a demonstrative sample and does not reflect an actual benchmark standard.",
"file_format_version": "2.0.0",
"run_info": {
"sut_scores": {
"llama-3.1-8b-instruct-turbo-together": {
"security_naive_hazard-1.0.1-en_us-official": 0.5
}
}
}
},
"standards": {
"reference_suts": [
"llama-3.1-8b-instruct-turbo-together"
],
"reference_benchmark": "security_naive_benchmark-1.0.1-en_us-official-private",
"reference_standards": {
"security_naive_official_hazard-1_0_1": 0.5
}
}
}
7 changes: 6 additions & 1 deletion src/modelgauge/annotator_registry.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V0_5_UID
from modelgauge.annotators.cheval.ids import (
SAFETY_ANNOTATOR_V1_1_UID,
SECURITY_ANNOTATOR_V0_5_UID,
SECURITY_ANNOTATOR_V1_0_1_UID,
)
from modelgauge.instance_factory import InstanceFactory
from modelgauge.annotator import Annotator

Expand All @@ -10,6 +14,7 @@
"perspective_api": "perspective_api",
SAFETY_ANNOTATOR_V1_1_UID: "cheval.registration",
SECURITY_ANNOTATOR_V0_5_UID: "cheval.registration",
SECURITY_ANNOTATOR_V1_0_1_UID: "cheval.registration",
}

# The list of all Annotators instances with assigned UIDs.
Expand Down
5 changes: 4 additions & 1 deletion src/modelgauge/annotators/cheval/ids.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
SAFETY_ANNOTATOR_V1_1_UID = "safety-v1.1"
from modelgauge.versions import CURRENT_GENERAL_VERSION, CURRENT_SECURITY_VERSION

SAFETY_ANNOTATOR_V1_1_UID = f"safety-v{CURRENT_GENERAL_VERSION}"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this!

SECURITY_ANNOTATOR_V0_5_UID = "security-v0.5"
SECURITY_ANNOTATOR_V1_0_1_UID = f"security-v{CURRENT_SECURITY_VERSION}"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this!

4 changes: 2 additions & 2 deletions src/modelgauge/annotators/cheval/registration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from modelgauge.annotator_registry import ANNOTATORS
from modelgauge.annotators.cheval.annotator import ChevalAPIKey, ChevalAnnotator, ChevalEndpointUrl
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V0_5_UID
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V1_0_1_UID
from modelgauge.secret_values import InjectSecret

ANNOTATORS.register(
Expand All @@ -13,7 +13,7 @@

ANNOTATORS.register(
ChevalAnnotator,
SECURITY_ANNOTATOR_V0_5_UID,
SECURITY_ANNOTATOR_V1_0_1_UID,
InjectSecret(ChevalAPIKey),
InjectSecret(ChevalEndpointUrl),
)
4 changes: 3 additions & 1 deletion src/modelgauge/tests/safe_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from modelgauge.sut_capabilities import AcceptsTextPrompt
from modelgauge.test_decorator import modelgauge_test
from modelgauge.test_registry import TESTS
from modelgauge.versions import CURRENT_GENERAL_VERSION
from pydantic import BaseModel


Expand Down Expand Up @@ -98,6 +99,7 @@ class BaseSafeTestVersion1(PromptResponseTest, ABC):
Test UIDs follow the form "safe-{ hazard acronym }-1.1" (e.g. safe-vcr-1.1). The hazard acronyms are as follows:
"""

VERSION = CURRENT_GENERAL_VERSION
hazards = Hazards()
_sut_options = ModelOptions(
max_tokens=3000,
Expand All @@ -113,7 +115,7 @@ def create_uid(hazard: str, locale: str, prompt_set: str, evaluator=None):
postfix = ""
else:
postfix = "-" + evaluator
uid = f"safe-{hazard}-{locale}-{prompt_set}-1.1{postfix}".lower()
uid = f"safe-{hazard}-{locale}-{prompt_set}-{CURRENT_GENERAL_VERSION}{postfix}".lower()
return uid

def __init__(
Expand Down
9 changes: 5 additions & 4 deletions src/modelgauge/tests/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pydantic import BaseModel

from modelgauge.aggregations import get_measurement_stats
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID
from modelgauge.annotators.cheval.ids import SECURITY_ANNOTATOR_V1_0_1_UID
from modelgauge.base_test import PromptResponseTest
from modelgauge.dependency_helper import DependencyHelper
from modelgauge.external_data import ExternalData, WebData
Expand All @@ -32,6 +32,7 @@
from modelgauge.tests.safe_v1 import Hazards
from modelgauge.test_decorator import modelgauge_test
from modelgauge.test_registry import TESTS
from modelgauge.versions import CURRENT_SECURITY_VERSION


class SecurityTestResult(BaseModel):
Expand All @@ -40,7 +41,7 @@ class SecurityTestResult(BaseModel):


class BaseSecurityTest(PromptResponseTest, ABC):
VERSION = 1.0
VERSION = CURRENT_SECURITY_VERSION
hazards = Hazards()
prompt_sets: dict
persona: str
Expand Down Expand Up @@ -171,14 +172,14 @@ def register_tests(cls, evaluator=None):
class PrivateSecurityJailbreakTest(BaseSecurityJailbreakTest):
@classmethod
def get_annotators(cls) -> List[str]:
return [SAFETY_ANNOTATOR_V1_1_UID]
return [SECURITY_ANNOTATOR_V1_0_1_UID]


@modelgauge_test(requires_sut_capabilities=[AcceptsTextPrompt])
class PrivateSecurityNaiveTest(BaseSecurityNaiveTest):
@classmethod
def get_annotators(cls) -> List[str]:
return [SAFETY_ANNOTATOR_V1_1_UID]
return [SECURITY_ANNOTATOR_V1_0_1_UID]


register_tests(PrivateSecurityJailbreakTest, "private")
Expand Down
7 changes: 7 additions & 0 deletions src/modelgauge/versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_GENERAL_V1_0 = "1.1"
_SECURITY_V1_0_1 = "1.0.1"

CURRENT_GENERAL_VERSION = _GENERAL_V1_0
CURRENT_SECURITY_VERSION = _SECURITY_V1_0_1
CURRENT_GENERAL_V_UNDER = CURRENT_GENERAL_VERSION.replace(".", "_")
CURRENT_SECURITY_V_UNDER = CURRENT_SECURITY_VERSION.replace(".", "_")
Loading
Loading