From 64719a640fb066c3a896a6a12e43227d2cd22aed Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 14 May 2026 16:48:19 +1200
Subject: [PATCH 01/26] feat: Add DiscoveryConfig APIs

---
 datamasque/client/__init__.py                |   3 +
 datamasque/client/discovery_configs.py       | 137 ++++++
 datamasque/client/dmclient.py                |   2 +
 datamasque/client/models/discovery_config.py |  18 +
 tests/test_discovery_configs.py              | 430 +++++++++++++++++++
 5 files changed, 590 insertions(+)
 create mode 100644 datamasque/client/discovery_configs.py
 create mode 100644 datamasque/client/models/discovery_config.py
 create mode 100644 tests/test_discovery_configs.py

diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index c4f007d..814ea56 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -70,6 +70,7 @@
     SchemaDiscoveryResult,
     TableConstraints,
 )
+from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
 from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
 from datamasque.client.models.files import (
     DataMasqueFile,
@@ -130,6 +131,8 @@
     "DatabaseConnectionConfig",
     "DatabaseType",
     "DatabricksConnectionConfig",
+    "DiscoveryConfig",
+    "DiscoveryConfigId",
     "DiscoveryMatch",
     "DynamoConnectionConfig",
     "FailedToStartError",
diff --git a/datamasque/client/discovery_configs.py b/datamasque/client/discovery_configs.py
new file mode 100644
index 0000000..617f367
--- /dev/null
+++ b/datamasque/client/discovery_configs.py
@@ -0,0 +1,137 @@
+import logging
+from typing import Iterator, Optional
+
+from datamasque.client.base import BaseClient
+from datamasque.client.exceptions import DataMasqueApiError, DataMasqueException
+from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
+from datamasque.client.models.pagination import Page
+
+logger = logging.getLogger(__name__)
+
+
+class DiscoveryConfigClient(BaseClient):
+    """Discovery config CRUD API methods. Mixed into `DataMasqueClient`."""
+
+    def iter_discovery_configs(self) -> Iterator[DiscoveryConfig]:
+        """Lazily iterate all discovery configs via the paginated endpoint."""
+
+        return self._iter_paginated("/api/discovery/configs/", model=DiscoveryConfig)
+
+    def list_discovery_configs(self) -> list[DiscoveryConfig]:
+        """
+        Lists all (non-archived) discovery configs.
+
+        Note: the YAML content is not included in the list response for performance.
+        Use `get_discovery_config` to retrieve the full config with its YAML body.
+        """
+
+        return list(self.iter_discovery_configs())
+
+    def get_discovery_config(self, config_id: DiscoveryConfigId) -> DiscoveryConfig:
+        """Retrieves a single discovery config by ID."""
+
+        response = self.make_request("GET", f"/api/discovery/configs/{config_id}/")
+        return DiscoveryConfig.model_validate(response.json())
+
+    def get_discovery_config_by_name(self, name: str) -> Optional[DiscoveryConfig]:
+        """
+        Looks for a discovery config matching the given name (case-sensitive, exact match).
+
+        Returns it if found, otherwise `None`.
+        """
+
+        response = self.make_request(
+            "GET",
+            "/api/discovery/configs/",
+            params={"name_exact": name, "limit": 1},
+        )
+        page = Page[DiscoveryConfig].model_validate(response.json())
+        if not page.results:
+            return None
+
+        config_id = page.results[0].id
+        if config_id is None:
+            raise DataMasqueApiError(
+                "Server returned a discovery config list entry without an `id`.",
+                response=response,
+            )
+
+        return self.get_discovery_config(config_id)
+
+    def create_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
+        """
+        Creates a new discovery config on the server.
+
+        Sets the config's server-assigned fields (`id`, `created`, `modified`) and returns the config.
+        """
+
+        data = config.model_dump(exclude_none=True, by_alias=True, mode="json")
+        response = self.make_request("POST", "/api/discovery/configs/", data=data)
+        created = DiscoveryConfig.model_validate(response.json())
+        config.id = created.id
+        config.created = created.created
+        config.modified = created.modified
+        logger.info('Creation of discovery config "%s" successful', config.name)
+        return config
+
+    def update_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
+        """
+        Performs a full update of the discovery config.
+
+        The config must have its `id` set
+        (i.e., it must have been previously created or retrieved from the server).
+        """
+
+        if config.id is None:
+            raise ValueError("Cannot update a discovery config that has not been created yet (id is None)")
+
+        data = config.model_dump(exclude_none=True, by_alias=True, mode="json")
+        response = self.make_request("PUT", f"/api/discovery/configs/{config.id}/", data=data)
+        updated = DiscoveryConfig.model_validate(response.json())
+        config.modified = updated.modified
+        logger.debug('Update of discovery config "%s" successful', config.name)
+        return config
+
+    def create_or_update_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
+        """
+        Creates the config if it doesn't exist, or updates it if one with the same name already exists.
+
+        Sets the config's `id` property.
+        """
+
+        existing = self.get_discovery_config_by_name(config.name)
+        if existing is not None:
+            config.id = existing.id
+            return self.update_discovery_config(config)
+
+        return self.create_discovery_config(config)
+
+    def delete_discovery_config_by_id_if_exists(self, config_id: DiscoveryConfigId) -> None:
+        """
+        Archives the discovery config with the given ID.
+
+        No-op if the config does not exist.
+        """
+
+        self._delete_if_exists(f"/api/discovery/configs/{config_id}/")
+
+    def delete_discovery_config_by_name_if_exists(self, name: str) -> None:
+        """
+        Archives the discovery config with the given name.
+
+        No-op if the config does not exist.
+        """
+
+        all_configs = self.list_discovery_configs()
+        matching = [config for config in all_configs if config.name == name]
+        for config in matching:
+            if config.id is None:
+                raise DataMasqueException(f'Server returned a discovery config named "{config.name}" without an `id`.')
+
+            self.delete_discovery_config_by_id_if_exists(config.id)
+
+    def get_default_discovery_config_yaml(self) -> str:
+        """Returns the server's built-in default discovery configuration as a YAML string."""
+
+        response = self.make_request("GET", "/api/discovery/configs/defaults/")
+        return response.content.decode("utf-8")
diff --git a/datamasque/client/dmclient.py b/datamasque/client/dmclient.py
index 55cc6f3..25efcc6 100644
--- a/datamasque/client/dmclient.py
+++ b/datamasque/client/dmclient.py
@@ -1,6 +1,7 @@
 from datamasque.client.base import FileOrContent, UploadFile
 from datamasque.client.connections import ConnectionClient
 from datamasque.client.discovery import DiscoveryClient
+from datamasque.client.discovery_configs import DiscoveryConfigClient
 from datamasque.client.files import FileClient
 from datamasque.client.license import LicenseClient
 from datamasque.client.ruleset_libraries import RulesetLibraryClient
@@ -20,6 +21,7 @@ class DataMasqueClient(
     FileClient,
     RunClient,
     DiscoveryClient,
+    DiscoveryConfigClient,
     UserClient,
     SettingsClient,
 ):
diff --git a/datamasque/client/models/discovery_config.py b/datamasque/client/models/discovery_config.py
new file mode 100644
index 0000000..8c580b3
--- /dev/null
+++ b/datamasque/client/models/discovery_config.py
@@ -0,0 +1,18 @@
+from datetime import datetime
+from typing import NewType, Optional
+
+from pydantic import BaseModel, ConfigDict, Field
+
+DiscoveryConfigId = NewType("DiscoveryConfigId", str)
+
+
+class DiscoveryConfig(BaseModel):
+    """Represents a named, persisted YAML discovery configuration."""
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    name: str
+    yaml: Optional[str] = Field(default=None, alias="config_yaml")
+    id: Optional[DiscoveryConfigId] = None
+    created: Optional[datetime] = None
+    modified: Optional[datetime] = None
diff --git a/tests/test_discovery_configs.py b/tests/test_discovery_configs.py
new file mode 100644
index 0000000..2c9e71c
--- /dev/null
+++ b/tests/test_discovery_configs.py
@@ -0,0 +1,430 @@
+"""Tests for discovery-config support in the DataMasque client."""
+
+from datetime import datetime
+from typing import Any
+
+import pytest
+import requests_mock
+
+from datamasque.client import DataMasqueClient
+from datamasque.client.exceptions import DataMasqueApiError, DataMasqueException
+from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
+
+CONFIG_ID_1 = "aaaaaaaa-1111-2222-3333-444444444444"
+CONFIG_ID_2 = "bbbbbbbb-1111-2222-3333-444444444444"
+
+
+@pytest.fixture
+def sample_config_list_response() -> dict[str, Any]:
+    return {
+        "count": 2,
+        "next": None,
+        "previous": None,
+        "results": [
+            {
+                "id": CONFIG_ID_1,
+                "name": "my_config",
+                "archived": False,
+                "created": "2025-01-01T12:00:00Z",
+                "modified": "2025-01-02T12:00:00Z",
+            },
+            {
+                "id": CONFIG_ID_2,
+                "name": "another_config",
+                "archived": False,
+                "created": "2025-02-01T12:00:00Z",
+                "modified": "2025-02-02T12:00:00Z",
+            },
+        ],
+    }
+
+
+@pytest.fixture
+def sample_config_detail_response() -> dict[str, Any]:
+    return {
+        "id": CONFIG_ID_1,
+        "name": "my_config",
+        "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "archived": False,
+        "created": "2025-01-01T12:00:00Z",
+        "modified": "2025-01-02T12:00:00Z",
+    }
+
+
+@pytest.fixture
+def discovery_config() -> DiscoveryConfig:
+    return DiscoveryConfig(
+        name="test_config",
+        yaml="labels: []\nmetadata_rules: []\nidd_rules: []\n",
+    )
+
+
+def test_list_discovery_configs(client: DataMasqueClient, sample_config_list_response: dict[str, Any]) -> None:
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/configs/",
+            json=sample_config_list_response,
+            status_code=200,
+        )
+        configs = client.list_discovery_configs()
+
+    assert len(configs) == 2
+    assert configs[0].id == DiscoveryConfigId(CONFIG_ID_1)
+    assert configs[0].name == "my_config"
+    assert configs[0].yaml is None
+    assert configs[1].name == "another_config"
+
+
+def test_list_discovery_configs_pagination(client: DataMasqueClient) -> None:
+    page1 = {
+        "count": 3,
+        "next": "http://test-server/api/discovery/configs/?limit=2&offset=2",
+        "previous": None,
+        "results": [
+            {
+                "id": CONFIG_ID_1,
+                "name": "c1",
+                "archived": False,
+                "created": "2025-01-01T12:00:00Z",
+                "modified": "2025-01-01T12:00:00Z",
+            },
+            {
+                "id": CONFIG_ID_2,
+                "name": "c2",
+                "archived": False,
+                "created": "2025-01-01T12:00:00Z",
+                "modified": "2025-01-01T12:00:00Z",
+            },
+        ],
+    }
+    page2 = {
+        "count": 3,
+        "next": None,
+        "previous": "http://test-server/api/discovery/configs/?limit=2",
+        "results": [
+            {
+                "id": "cccccccc-1111-2222-3333-444444444444",
+                "name": "c3",
+                "archived": False,
+                "created": "2025-01-01T12:00:00Z",
+                "modified": "2025-01-01T12:00:00Z",
+            },
+        ],
+    }
+
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/configs/",
+            [{"json": page1, "status_code": 200}, {"json": page2, "status_code": 200}],
+        )
+        configs = client.list_discovery_configs()
+
+    assert [c.name for c in configs] == ["c1", "c2", "c3"]
+
+
+def test_list_discovery_configs_empty(client: DataMasqueClient) -> None:
+    empty_response = {"count": 0, "next": None, "previous": None, "results": []}
+
+    with requests_mock.Mocker() as m:
+        m.get("http://test-server/api/discovery/configs/", json=empty_response, status_code=200)
+        configs = client.list_discovery_configs()
+
+    assert configs == []
+
+
+def test_get_discovery_config(client: DataMasqueClient, sample_config_detail_response: dict[str, Any]) -> None:
+    with requests_mock.Mocker() as m:
+        m.get(
+            f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/",
+            json=sample_config_detail_response,
+            status_code=200,
+        )
+        config = client.get_discovery_config(DiscoveryConfigId(CONFIG_ID_1))
+
+    assert config.id == DiscoveryConfigId(CONFIG_ID_1)
+    assert config.name == "my_config"
+    assert config.yaml == "labels: []\nmetadata_rules: []\nidd_rules: []\n"
+
+
+def test_get_discovery_config_by_name_found(
+    client: DataMasqueClient, sample_config_detail_response: dict[str, Any]
+) -> None:
+    list_response = {
+        "count": 1,
+        "next": None,
+        "previous": None,
+        "results": [
+            {
+                "id": CONFIG_ID_1,
+                "name": "my_config",
+                "archived": False,
+                "created": "2025-01-01T12:00:00Z",
+                "modified": "2025-01-02T12:00:00Z",
+            },
+        ],
+    }
+
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/configs/",
+            json=list_response,
+            status_code=200,
+        )
+        m.get(
+            f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/",
+            json=sample_config_detail_response,
+            status_code=200,
+        )
+        config = client.get_discovery_config_by_name("my_config")
+
+    assert config is not None
+    assert config.name == "my_config"
+    assert "name_exact=my_config" in m.request_history[0].url
+
+
+def test_get_discovery_config_by_name_not_found(client: DataMasqueClient) -> None:
+    empty_response = {"count": 0, "next": None, "previous": None, "results": []}
+
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/configs/",
+            json=empty_response,
+            status_code=200,
+        )
+        config = client.get_discovery_config_by_name("nonexistent")
+
+    assert config is None
+
+
+def test_get_discovery_config_by_name_raises_when_server_omits_id(client: DataMasqueClient) -> None:
+    list_response_without_id = {
+        "count": 1,
+        "next": None,
+        "previous": None,
+        "results": [
+            {
+                "name": "my_config",
+                "archived": False,
+                "created": "2025-01-01T12:00:00Z",
+                "modified": "2025-01-02T12:00:00Z",
+            },
+        ],
+    }
+
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/configs/",
+            json=list_response_without_id,
+            status_code=200,
+        )
+        with pytest.raises(DataMasqueApiError, match="without an `id`"):
+            client.get_discovery_config_by_name("my_config")
+
+
+def test_create_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
+    create_response = {
+        "id": CONFIG_ID_1,
+        "name": "test_config",
+        "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "archived": False,
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-01T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/discovery/configs/",
+            json=create_response,
+            status_code=201,
+        )
+        result = client.create_discovery_config(discovery_config)
+
+    assert result is discovery_config
+    assert result.id == DiscoveryConfigId(CONFIG_ID_1)
+    assert result.created == datetime.fromisoformat("2025-06-01T10:00:00+00:00")
+    assert result.modified == datetime.fromisoformat("2025-06-01T10:00:00+00:00")
+
+    request_body = m.last_request.json()
+    assert request_body["name"] == "test_config"
+    assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n"
+
+
+def test_update_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
+    discovery_config.id = DiscoveryConfigId(CONFIG_ID_1)
+
+    update_response = {
+        "id": CONFIG_ID_1,
+        "name": "test_config",
+        "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "archived": False,
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-02T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.put(
+            f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/",
+            json=update_response,
+            status_code=200,
+        )
+        result = client.update_discovery_config(discovery_config)
+
+    assert result is discovery_config
+    assert result.modified == datetime.fromisoformat("2025-06-02T10:00:00+00:00")
+
+    request_body = m.last_request.json()
+    assert request_body["name"] == "test_config"
+    assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n"
+
+
+def test_update_discovery_config_no_id_raises(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
+    with pytest.raises(ValueError, match="id is None"):
+        client.update_discovery_config(discovery_config)
+
+
+def test_create_or_update_discovery_config_create(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
+    empty_list = {"count": 0, "next": None, "previous": None, "results": []}
+    create_response = {
+        "id": CONFIG_ID_1,
+        "name": "test_config",
+        "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "archived": False,
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-01T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.get("http://test-server/api/discovery/configs/", json=empty_list, status_code=200)
+        m.post("http://test-server/api/discovery/configs/", json=create_response, status_code=201)
+        result = client.create_or_update_discovery_config(discovery_config)
+
+    assert result.id == DiscoveryConfigId(CONFIG_ID_1)
+    assert m.request_history[0].method == "GET"
+    assert m.request_history[1].method == "POST"
+
+
+def test_create_or_update_discovery_config_update(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
+    list_response = {
+        "count": 1,
+        "next": None,
+        "previous": None,
+        "results": [
+            {
+                "id": CONFIG_ID_1,
+                "name": "test_config",
+                "archived": False,
+                "created": "2025-06-01T10:00:00Z",
+                "modified": "2025-06-01T10:00:00Z",
+            },
+        ],
+    }
+    detail_response = {
+        "id": CONFIG_ID_1,
+        "name": "test_config",
+        "config_yaml": "labels: []",
+        "archived": False,
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-01T10:00:00Z",
+    }
+    update_response = {
+        "id": CONFIG_ID_1,
+        "name": "test_config",
+        "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "archived": False,
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-02T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.get("http://test-server/api/discovery/configs/", json=list_response, status_code=200)
+        m.get(
+            f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/",
+            json=detail_response,
+            status_code=200,
+        )
+        m.put(
+            f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/",
+            json=update_response,
+            status_code=200,
+        )
+        result = client.create_or_update_discovery_config(discovery_config)
+
+    assert result.id == DiscoveryConfigId(CONFIG_ID_1)
+    assert m.request_history[0].method == "GET"
+    assert m.request_history[1].method == "GET"
+    assert m.request_history[2].method == "PUT"
+
+
+def test_delete_discovery_config_by_id(client: DataMasqueClient) -> None:
+    with requests_mock.Mocker() as m:
+        m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", status_code=204)
+        client.delete_discovery_config_by_id_if_exists(DiscoveryConfigId(CONFIG_ID_1))
+
+    assert m.call_count == 1
+
+
+def test_delete_discovery_config_by_id_not_found(client: DataMasqueClient) -> None:
+    with requests_mock.Mocker() as m:
+        m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", status_code=404)
+        client.delete_discovery_config_by_id_if_exists(DiscoveryConfigId(CONFIG_ID_1))
+
+
+def test_delete_discovery_config_by_name(client: DataMasqueClient, sample_config_list_response: dict[str, Any]) -> None:
+    with requests_mock.Mocker() as m:
+        m.get("http://test-server/api/discovery/configs/", json=sample_config_list_response, status_code=200)
+        m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", status_code=204)
+        client.delete_discovery_config_by_name_if_exists("my_config")
+
+    assert m.request_history[0].method == "GET"
+    assert m.request_history[1].method == "DELETE"
+
+
+def test_delete_discovery_config_by_name_not_found(
+    client: DataMasqueClient, sample_config_list_response: dict[str, Any]
+) -> None:
+    with requests_mock.Mocker() as m:
+        m.get("http://test-server/api/discovery/configs/", json=sample_config_list_response, status_code=200)
+        client.delete_discovery_config_by_name_if_exists("nonexistent")
+
+    assert m.call_count == 1
+    assert m.request_history[0].method == "GET"
+
+
+def test_delete_discovery_config_by_name_raises_when_server_omits_id(client: DataMasqueClient) -> None:
+    list_response_without_id = {
+        "count": 1,
+        "next": None,
+        "previous": None,
+        "results": [
+            {
+                "name": "my_config",
+                "archived": False,
+                "created": "2025-01-01T12:00:00Z",
+                "modified": "2025-01-02T12:00:00Z",
+            },
+        ],
+    }
+
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/configs/",
+            json=list_response_without_id,
+            status_code=200,
+        )
+        with pytest.raises(DataMasqueException, match="without an `id`"):
+            client.delete_discovery_config_by_name_if_exists("my_config")
+
+
+def test_get_default_discovery_config_yaml(client: DataMasqueClient) -> None:
+    yaml_body = "labels: []\nmetadata_rules: []\nidd_rules: []\n"
+
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/configs/defaults/",
+            text=yaml_body,
+            status_code=200,
+            headers={"Content-Type": "application/x-yaml"},
+        )
+        result = client.get_default_discovery_config_yaml()
+
+    assert result == yaml_body

From b129ec721320c63da82cc9c5a96ee32297f53a9c Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 21 May 2026 10:23:50 +1200
Subject: [PATCH 02/26] test: Remove trivial tests that only exercise Pydantic

---
 tests/test_ruleset_library.py | 55 -----------------------------------
 1 file changed, 55 deletions(-)

diff --git a/tests/test_ruleset_library.py b/tests/test_ruleset_library.py
index 3e2e476..1ff939b 100644
--- a/tests/test_ruleset_library.py
+++ b/tests/test_ruleset_library.py
@@ -70,61 +70,6 @@ def ruleset_library():
     )
 
 
-def test_ruleset_library_model_dump() -> None:
-    lib = RulesetLibrary(name="lib1", namespace="ns", yaml="content: true")
-    assert lib.model_dump(exclude_none=True, by_alias=True, mode="json") == {
-        "name": "lib1",
-        "namespace": "ns",
-        "config_yaml": "content: true",
-    }
-
-
-def test_ruleset_library_model_dump_no_yaml() -> None:
-    lib = RulesetLibrary(name="lib1", namespace="ns")
-    api_dict = lib.model_dump(exclude_none=True, by_alias=True, mode="json")
-    assert "config_yaml" not in api_dict
-    assert api_dict == {"name": "lib1", "namespace": "ns"}
-
-
-def test_ruleset_library_model_validate() -> None:
-    response = {
-        "id": LIBRARY_ID_1,
-        "name": "my_library",
-        "namespace": "org",
-        "config_yaml": "version: '1.0'",
-        "is_valid": "valid",
-        "created": "2025-01-01T12:00:00Z",
-        "modified": "2025-01-02T12:00:00Z",
-    }
-    lib = RulesetLibrary.model_validate(response)
-    assert lib.id == RulesetLibraryId(LIBRARY_ID_1)
-    assert lib.name == "my_library"
-    assert lib.namespace == "org"
-    assert lib.yaml == "version: '1.0'"
-    assert lib.is_valid is ValidationStatus.valid
-    assert lib.created == datetime.fromisoformat("2025-01-01T12:00:00+00:00")
-    assert lib.modified == datetime.fromisoformat("2025-01-02T12:00:00+00:00")
-
-
-def test_ruleset_library_model_validate_list(
-    sample_library_list_response: dict[str, Any],
-) -> None:
-    """List responses omit config_yaml, so yaml should be None."""
-    result = sample_library_list_response["results"][0]
-    lib = RulesetLibrary.model_validate(result)
-    assert lib.yaml is None
-    assert lib.is_valid is ValidationStatus.valid
-
-
-def test_ruleset_library_equality() -> None:
-    """Pydantic structural equality compares all fields."""
-    lib1 = RulesetLibrary(name="lib", namespace="ns", yaml="content")
-    lib2 = RulesetLibrary(name="lib", namespace="ns", yaml="content")
-    lib3 = RulesetLibrary(name="lib", namespace="other", yaml="content")
-    assert lib1 == lib2
-    assert lib1 != lib3
-
-
 def test_list_ruleset_libraries(client: DataMasqueClient, sample_library_list_response: dict[str, Any]) -> None:
     with requests_mock.Mocker() as m:
         m.get(

From c67c530efba6ae1c0fbb8c3a93188cf96975ff51 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 28 May 2026 10:26:09 +1200
Subject: [PATCH 03/26] feat: Extend discovery-config support across SD/FDD
 APIs

- Add `discovery_config` field to `SchemaDiscoveryRequest`
- Add `FileDataDiscoveryRequest` + `FileDataDiscoveryOptions` models
- Add `start_file_data_discovery_run()` for the file-data-discovery trigger
---
 datamasque/client/__init__.py                |   4 +
 datamasque/client/discovery.py               |  35 ++++++
 datamasque/client/models/discovery.py        |  56 +++++++++
 datamasque/client/models/discovery_config.py |  27 ++++-
 tests/test_discovery.py                      | 121 +++++++++++++++++++
 tests/test_discovery_configs.py              |  56 ++++++++-
 6 files changed, 297 insertions(+), 2 deletions(-)

diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index 814ea56..bb6875a 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -54,6 +54,8 @@
 from datamasque.client.models.discovery import (
     ConstraintColumns,
     DiscoveryMatch,
+    FileDataDiscoveryOptions,
+    FileDataDiscoveryRequest,
     FileDiscoveryFile,
     FileDiscoveryLocatorResult,
     FileDiscoveryMatch,
@@ -137,6 +139,8 @@
     "DynamoConnectionConfig",
     "FailedToStartError",
     "FileConnectionConfig",
+    "FileDataDiscoveryOptions",
+    "FileDataDiscoveryRequest",
     "FileDiscoveryFile",
     "FileDiscoveryLocatorResult",
     "FileDiscoveryMatch",
diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index 19b89f0..e6ba406 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -17,6 +17,7 @@
     SelectedFileData,
 )
 from datamasque.client.models.discovery import (
+    FileDataDiscoveryRequest,
     FileDiscoveryResult,
     FileRulesetGenerationRequest,
     RulesetGenerationRequest,
@@ -230,6 +231,40 @@ def start_schema_discovery_run(self, discovery_config: SchemaDiscoveryRequest) -
             response=response,
         )
 
+    def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> RunId:
+        """
+        Starts a file data discovery run with the given configuration.
+
+        Args:
+            request: A `FileDataDiscoveryRequest` with connection and optional settings.
+
+        Returns:
+            RunId: The ID of the started discovery run
+
+        Raises:
+            FailedToStartError: If run fails to start
+        """
+
+        data = request.model_dump(exclude_none=True, mode="json")
+        response = self.make_request(
+            "POST",
+            "/api/run-file-data-discovery/",
+            data=data,
+            require_status_check=False,
+        )
+        run_data = response.json()
+
+        if response.status_code == 201:
+            logger.info("File data discovery run %s started successfully", run_data["id"])
+            return RunId(run_data["id"])
+
+        logger.error("File data discovery run failed to start: %s", run_data)
+        raise FailedToStartError(
+            f"File data discovery run failed to start "
+            f"(server responded with status {response.status_code}: {response.text}).",
+            response=response,
+        )
+
     def iter_schema_discovery_results(self, run_id: RunId) -> Iterator[SchemaDiscoveryResult]:
         """Lazily iterate all schema discovery results for a run via the paginated v2 endpoint."""
 
diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 7532526..d87a87f 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -6,6 +6,7 @@
 
 from datamasque.client.models.connection import ConnectionConfig, ConnectionId, unwrap_connection_id
 from datamasque.client.models.data_selection import HashColumnsTableConfig, Locator, UserSelection
+from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, unwrap_discovery_config_id
 from datamasque.client.models.pagination import Page
 
 
@@ -35,12 +36,15 @@ class SchemaDiscoveryRequest(BaseModel):
     Request body for `POST /api/schema-discovery/`.
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
+    `discovery_config` (optional) accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`;
+    when omitted the server's built-in default discovery config is used.
     Every other field uses the server's default value when omitted.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
+    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
     custom_keywords: list[str] = Field(default_factory=list)
     ignored_keywords: list[str] = Field(default_factory=list)
     schemas: list[str] = Field(default_factory=list)
@@ -54,6 +58,11 @@ class SchemaDiscoveryRequest(BaseModel):
     def _unwrap_connection(cls, value: Any) -> Any:
         return unwrap_connection_id(value)
 
+    @field_validator("discovery_config", mode="before")
+    @classmethod
+    def _unwrap_discovery_config(cls, value: Any) -> Any:
+        return unwrap_discovery_config_id(value)
+
 
 class RulesetGenerationRequest(BaseModel):
     """
@@ -77,6 +86,53 @@ def _unwrap_connection(cls, value: Any) -> Any:
         return unwrap_connection_id(value)
 
 
+class FileDataDiscoveryOptions(BaseModel):
+    """Run options nested under `FileDataDiscoveryRequest.options`."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    dry_run: Optional[bool] = None
+    diagnostic_logging: Optional[bool] = None
+
+
+class FileDataDiscoveryRequest(BaseModel):
+    """
+    Request body for `POST /api/run-file-data-discovery/`.
+
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
+    `discovery_config` (optional) accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`;
+    when omitted the server's built-in default discovery config is used.
+    All other fields use the server's default value when omitted.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    connection: Union[ConnectionId, ConnectionConfig]
+    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
+    options: Optional[FileDataDiscoveryOptions] = None
+    custom_keywords: list[str] = Field(default_factory=list)
+    ignored_keywords: list[str] = Field(default_factory=list)
+    disable_built_in_keywords: bool = False
+    disable_global_custom_keywords: Optional[bool] = None
+    disable_global_ignored_keywords: Optional[bool] = None
+    in_data_discovery: Optional[InDataDiscoveryConfig] = None
+    recurse: Optional[bool] = None
+    include: Optional[list[str]] = None
+    skip: Optional[list[str]] = None
+    encoding: Optional[str] = None
+    workers: Optional[int] = None
+
+    @field_validator("connection", mode="before")
+    @classmethod
+    def _unwrap_connection(cls, value: Any) -> Any:
+        return unwrap_connection_id(value)
+
+    @field_validator("discovery_config", mode="before")
+    @classmethod
+    def _unwrap_discovery_config(cls, value: Any) -> Any:
+        return unwrap_discovery_config_id(value)
+
+
 class FileRulesetGenerationRequest(BaseModel):
     """
     Request body for `POST /api/generate-file-ruleset/`.
diff --git a/datamasque/client/models/discovery_config.py b/datamasque/client/models/discovery_config.py
index 8c580b3..f0a6b3c 100644
--- a/datamasque/client/models/discovery_config.py
+++ b/datamasque/client/models/discovery_config.py
@@ -1,11 +1,31 @@
 from datetime import datetime
-from typing import NewType, Optional
+from typing import Any, NewType, Optional
 
 from pydantic import BaseModel, ConfigDict, Field
 
+from datamasque.client.models.status import ValidationStatus
+
 DiscoveryConfigId = NewType("DiscoveryConfigId", str)
 
 
+def unwrap_discovery_config_id(value: Any) -> Any:
+    """
+    Coerce a `DiscoveryConfig` to its `id`; pass other values through unchanged.
+
+    Used by request-model validators that accept either a `DiscoveryConfigId`
+    or a full `DiscoveryConfig` for user convenience.
+    Raises `ValueError` if the config has no `id`
+    (i.e. the caller hasn't yet created it on the server).
+    """
+
+    if isinstance(value, DiscoveryConfig):
+        if value.id is None:
+            raise ValueError("Discovery config has not been created yet (id is None)")
+        return value.id
+
+    return value
+
+
 class DiscoveryConfig(BaseModel):
     """Represents a named, persisted YAML discovery configuration."""
 
@@ -14,5 +34,10 @@ class DiscoveryConfig(BaseModel):
     name: str
     yaml: Optional[str] = Field(default=None, alias="config_yaml")
     id: Optional[DiscoveryConfigId] = None
+    # Server-managed validation surface, populated by the DataMasque server.
+    # `is_valid` may be `in_progress` immediately after creating a large config,
+    # transitioning to `valid` or `invalid` once the server finishes validating.
+    is_valid: Optional[ValidationStatus] = None
+    validation_error: Optional[str] = None
     created: Optional[datetime] = None
     modified: Optional[datetime] = None
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 2830287..14a2a0b 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -9,6 +9,10 @@
 
 from datamasque.client import (
     DataMasqueClient,
+    DiscoveryConfig,
+    DiscoveryConfigId,
+    FileDataDiscoveryOptions,
+    FileDataDiscoveryRequest,
     FileRulesetGenerationRequest,
     RulesetGenerationRequest,
     RunId,
@@ -27,6 +31,8 @@
 from datamasque.client.models.status import AsyncRulesetGenerationTaskStatus
 from tests.helpers import parse_multipart_form
 
+DISCOVERY_CONFIG_ID = "aaaaaaaa-1111-2222-3333-444444444444"
+
 
 def test_generate_ruleset(client):
     req = RulesetGenerationRequest(connection="conn-1", selected_columns={"public": {"users": ["email"]}})
@@ -727,3 +733,118 @@ def test_start_schema_discovery_run_raises_on_non_201(client):
         )
         with pytest.raises(FailedToStartError, match="Schema discovery run failed to start"):
             client.start_schema_discovery_run(SchemaDiscoveryRequest(connection="nope"))
+
+
+def test_schema_discovery_request_accepts_discovery_config_id():
+    """A `DiscoveryConfigId` string passes through unchanged in the request body."""
+    req = SchemaDiscoveryRequest(
+        connection="conn-1",
+        discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
+    )
+    dumped = req.model_dump(exclude_none=True, mode="json")
+    assert dumped["discovery_config"] == DISCOVERY_CONFIG_ID
+
+
+def test_schema_discovery_request_unwraps_discovery_config_model():
+    """Passing a full `DiscoveryConfig` object substitutes its `id` for the wire payload."""
+    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    req = SchemaDiscoveryRequest(connection="conn-1", discovery_config=config)
+    assert req.model_dump(exclude_none=True, mode="json")["discovery_config"] == DISCOVERY_CONFIG_ID
+
+
+def test_schema_discovery_request_rejects_unsaved_discovery_config():
+    """A `DiscoveryConfig` without an `id` cannot be used yet — raises immediately."""
+    config = DiscoveryConfig(name="my_cfg")
+    with pytest.raises(ValueError, match="id is None"):
+        SchemaDiscoveryRequest(connection="conn-1", discovery_config=config)
+
+
+def test_start_schema_discovery_run_sends_discovery_config(client):
+    """`start_schema_discovery_run` posts the discovery_config id to the server."""
+    req = SchemaDiscoveryRequest(
+        connection="conn-1",
+        discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
+    )
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/schema-discovery/",
+            json={"id": 11},
+            status_code=201,
+        )
+        assert client.start_schema_discovery_run(req) == 11
+
+    assert m.last_request.json()["discovery_config"] == DISCOVERY_CONFIG_ID
+
+
+def test_start_file_data_discovery_run_minimal(client):
+    """A minimal FDD request — only `connection` set — round-trips through the server."""
+    req = FileDataDiscoveryRequest(connection="conn-1")
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/run-file-data-discovery/",
+            json={"id": 42},
+            status_code=201,
+        )
+        run_id = client.start_file_data_discovery_run(req)
+
+    assert run_id == 42
+    body = m.last_request.json()
+    assert body["connection"] == "conn-1"
+    assert "discovery_config" not in body
+
+
+def test_start_file_data_discovery_run_full(client):
+    """All FDD request fields populate the wire payload and pass through unwrap helpers."""
+    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    req = FileDataDiscoveryRequest(
+        connection="conn-1",
+        discovery_config=config,
+        options=FileDataDiscoveryOptions(dry_run=True, diagnostic_logging=True),
+        custom_keywords=["foo"],
+        ignored_keywords=["bar"],
+        disable_built_in_keywords=True,
+        disable_global_custom_keywords=True,
+        disable_global_ignored_keywords=False,
+        in_data_discovery={"enabled": True, "row_sample_size": 50},
+        recurse=True,
+        include=["*.csv"],
+        skip=["**/tmp/**"],
+        encoding="utf-8",
+        workers=4,
+    )
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/run-file-data-discovery/",
+            json={"id": 99},
+            status_code=201,
+        )
+        assert client.start_file_data_discovery_run(req) == 99
+
+    body = m.last_request.json()
+    assert body == {
+        "connection": "conn-1",
+        "discovery_config": DISCOVERY_CONFIG_ID,
+        "options": {"dry_run": True, "diagnostic_logging": True},
+        "custom_keywords": ["foo"],
+        "ignored_keywords": ["bar"],
+        "disable_built_in_keywords": True,
+        "disable_global_custom_keywords": True,
+        "disable_global_ignored_keywords": False,
+        "in_data_discovery": {"enabled": True, "row_sample_size": 50},
+        "recurse": True,
+        "include": ["*.csv"],
+        "skip": ["**/tmp/**"],
+        "encoding": "utf-8",
+        "workers": 4,
+    }
+
+
+def test_start_file_data_discovery_run_raises_on_non_201(client):
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/run-file-data-discovery/",
+            json={"detail": "connection not found"},
+            status_code=400,
+        )
+        with pytest.raises(FailedToStartError, match="File data discovery run failed to start"):
+            client.start_file_data_discovery_run(FileDataDiscoveryRequest(connection="nope"))
diff --git a/tests/test_discovery_configs.py b/tests/test_discovery_configs.py
index 2c9e71c..f98893e 100644
--- a/tests/test_discovery_configs.py
+++ b/tests/test_discovery_configs.py
@@ -8,7 +8,12 @@
 
 from datamasque.client import DataMasqueClient
 from datamasque.client.exceptions import DataMasqueApiError, DataMasqueException
-from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
+from datamasque.client.models.discovery_config import (
+    DiscoveryConfig,
+    DiscoveryConfigId,
+    unwrap_discovery_config_id,
+)
+from datamasque.client.models.status import ValidationStatus
 
 CONFIG_ID_1 = "aaaaaaaa-1111-2222-3333-444444444444"
 CONFIG_ID_2 = "bbbbbbbb-1111-2222-3333-444444444444"
@@ -428,3 +433,52 @@ def test_get_default_discovery_config_yaml(client: DataMasqueClient) -> None:
         result = client.get_default_discovery_config_yaml()
 
     assert result == yaml_body
+
+
+def test_discovery_config_parses_validation_fields() -> None:
+    """`is_valid` and `validation_error` round-trip from API responses."""
+    config = DiscoveryConfig.model_validate(
+        {
+            "id": CONFIG_ID_1,
+            "name": "my_config",
+            "config_yaml": "labels: []",
+            "is_valid": "invalid",
+            "validation_error": "bad shape on line 3",
+            "created": "2025-01-01T12:00:00Z",
+            "modified": "2025-01-02T12:00:00Z",
+        }
+    )
+
+    assert config.is_valid is ValidationStatus.invalid
+    assert config.validation_error == "bad shape on line 3"
+
+
+def test_discovery_config_validation_fields_optional() -> None:
+    """Older / lighter API responses without validation fields still parse."""
+    config = DiscoveryConfig.model_validate(
+        {
+            "id": CONFIG_ID_1,
+            "name": "my_config",
+            "created": "2025-01-01T12:00:00Z",
+            "modified": "2025-01-02T12:00:00Z",
+        }
+    )
+
+    assert config.is_valid is None
+    assert config.validation_error is None
+
+
+def test_unwrap_discovery_config_id_passes_through_strings() -> None:
+    assert unwrap_discovery_config_id(CONFIG_ID_1) == CONFIG_ID_1
+    assert unwrap_discovery_config_id(None) is None
+
+
+def test_unwrap_discovery_config_id_extracts_id_from_model() -> None:
+    config = DiscoveryConfig(name="x", id=DiscoveryConfigId(CONFIG_ID_1))
+    assert unwrap_discovery_config_id(config) == CONFIG_ID_1
+
+
+def test_unwrap_discovery_config_id_raises_without_id() -> None:
+    config = DiscoveryConfig(name="x")
+    with pytest.raises(ValueError, match="id is None"):
+        unwrap_discovery_config_id(config)

From 0f386c2e3be7fdb91609bba42bc6a91848a0a61f Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 28 May 2026 11:07:55 +1200
Subject: [PATCH 04/26] ci: Add ref and dev_version inputs to TestPyPI release
 workflow

---
 .github/workflows/release-testpypi.yml | 43 ++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release-testpypi.yml b/.github/workflows/release-testpypi.yml
index 4935a55..9bff756 100644
--- a/.github/workflows/release-testpypi.yml
+++ b/.github/workflows/release-testpypi.yml
@@ -2,6 +2,17 @@ name: Release (TestPyPI)
 
 on:
   workflow_dispatch:
+    inputs:
+      ref:
+        description: 'Branch, tag, or commit SHA to build from'
+        required: true
+        default: 'main'
+      dev_version:
+        description: >-
+          Optional PEP 440 pre/dev version override (e.g. 1.1.0.dev1, 1.1.0a2, 1.1.0rc1).
+          Leave blank to use the version in pyproject.toml as-is.
+        required: false
+        default: ''
 
 jobs:
   build:
@@ -9,6 +20,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.ref }}
 
       - name: Set up uv
         uses: astral-sh/setup-uv@v5
@@ -18,11 +31,37 @@ jobs:
       - name: Set up Python
         run: uv python install 3.12
 
+      - name: Apply dev_version override
+        if: inputs.dev_version != ''
+        # Pass the input through the environment, never interpolated into the script body:
+        # `${{ }}` is expanded into the script text before the shell runs, so splicing it inline
+        # would let a crafted dev_version inject shell commands before the validation below could reject it.
+        env:
+          DEV_VERSION: ${{ inputs.dev_version }}
+        run: |
+          # Reject anything that isn't a pre-release / dev version — final releases must go through release.yml.
+          if ! printf '%s' "${DEV_VERSION}" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+(a|b|rc|\.dev)[0-9]+$'; then
+            echo "::error::dev_version '${DEV_VERSION}' is not a PEP 440 pre/dev version (must end in aN, bN, rcN, or .devN)"
+            exit 1
+          fi
+          uv run python -c "
+          import os, re, pathlib
+          version = os.environ['DEV_VERSION']
+          path = pathlib.Path('pyproject.toml')
+          text = path.read_text()
+          new = re.sub(r'^version\s*=\s*\".*\"', f'version = \"{version}\"', text, count=1, flags=re.M)
+          if new == text:
+              raise SystemExit('Failed to locate version line in pyproject.toml')
+          path.write_text(new)
+          "
+
       - name: Show package version
+        env:
+          REF: ${{ inputs.ref }}
         run: |
           VERSION="$(uv run python -c 'import tomllib; print(tomllib.loads(open("pyproject.toml","rb").read().decode())["project"]["version"])')"
-          echo "Publishing version: ${VERSION}"
-          echo "::notice title=TestPyPI version::${VERSION}"
+          echo "Publishing version: ${VERSION} (from ref ${REF})"
+          echo "::notice title=TestPyPI version::${VERSION} from ${REF}"
 
       - name: Build
         run: uv build

From 4edf1fcb0c7005ce63cf5faaf7bed0e6a3e4e2cf Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 28 May 2026 11:08:05 +1200
Subject: [PATCH 05/26] chore: Bump version to 1.1.0.dev0 for epic dev
 publishes

---
 pyproject.toml | 2 +-
 setup.cfg      | 2 +-
 uv.lock        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5fdc7d0..7ba57f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "datamasque-python"
-version = "1.0.5"
+version = "1.1.0.dev0"
 description = "Official Python client for the DataMasque data-masking API."
 authors = [
     { name = "DataMasque Ltd" },
diff --git a/setup.cfg b/setup.cfg
index f04494b..24d750a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.4
+current_version = 1.1.0.dev0
 commit = True
 tag = True
 
diff --git a/uv.lock b/uv.lock
index 6fe9d57..31e21ef 100644
--- a/uv.lock
+++ b/uv.lock
@@ -428,7 +428,7 @@ toml = [
 
 [[package]]
 name = "datamasque-python"
-version = "1.0.3"
+version = "1.1.0.dev0"
 source = { editable = "." }
 dependencies = [
     { name = "pydantic" },

From 6f77b0ce3a88094a8c9593d520aa59721410abdf Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 28 May 2026 11:47:57 +1200
Subject: [PATCH 06/26] feat: Raise InvalidDiscoveryConfigError on SD/FDD start
 failures

The server returns a 400 keyed under `discovery_config`
when the referenced config is missing, archived, or not in `valid` validation state.
Surface that as a specific exception subclass.
---
 datamasque/client/__init__.py   |  2 +
 datamasque/client/discovery.py  | 30 +++++++++++++--
 datamasque/client/exceptions.py |  8 ++++
 tests/test_discovery.py         | 68 +++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index bb6875a..2678e54 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -19,6 +19,7 @@
     DataMasqueUserError,
     FailedToStartError,
     IfmAuthError,
+    InvalidDiscoveryConfigError,
     InvalidLibraryError,
     InvalidRulesetError,
     RunNotCancellableError,
@@ -158,6 +159,7 @@
     "IfmTokenInfo",
     "InDataDiscoveryConfig",
     "InDataDiscoveryRule",
+    "InvalidDiscoveryConfigError",
     "InvalidLibraryError",
     "InvalidRulesetError",
     "JsonPath",
diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index e6ba406..8e30d7c 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -4,11 +4,14 @@
 from pathlib import Path
 from typing import Iterator, Optional, Union
 
+from requests import Response
+
 from datamasque.client.base import BaseClient, UploadFile
 from datamasque.client.exceptions import (
     AsyncRulesetGenerationInProgressError,
     DataMasqueException,
     FailedToStartError,
+    InvalidDiscoveryConfigError,
 )
 from datamasque.client.models.connection import ConnectionId
 from datamasque.client.models.data_selection import (
@@ -208,7 +211,9 @@ def start_schema_discovery_run(self, discovery_config: SchemaDiscoveryRequest) -
             RunId: The ID of the started discovery run
 
         Raises:
-            FailedToStartError: If run fails to start
+            InvalidDiscoveryConfigError: the run failed to start because the referenced
+                discovery config is missing, archived, or not in a `valid` validation state.
+            FailedToStartError: the run failed to start for any other reason.
         """
 
         data = discovery_config.model_dump(exclude_none=True, mode="json")
@@ -218,13 +223,14 @@ def start_schema_discovery_run(self, discovery_config: SchemaDiscoveryRequest) -
             data=data,
             require_status_check=False,
         )
-        run_data = response.json()
+        run_data = response.json() if response.content else {}
 
         if response.status_code == 201:
             logger.info("Schema discovery run %s started successfully", run_data["id"])
             return RunId(run_data["id"])
 
         logger.error("Schema discovery run failed to start: %s", run_data)
+        self._maybe_raise_discovery_config_error(run_data, response, "Schema discovery")
         raise FailedToStartError(
             f"Schema discovery run failed to start "
             f"(server responded with status {response.status_code}: {response.text}).",
@@ -242,7 +248,9 @@ def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> Ru
             RunId: The ID of the started discovery run
 
         Raises:
-            FailedToStartError: If run fails to start
+            InvalidDiscoveryConfigError: the run failed to start because the referenced
+                discovery config is missing, archived, or not in a `valid` validation state.
+            FailedToStartError: the run failed to start for any other reason.
         """
 
         data = request.model_dump(exclude_none=True, mode="json")
@@ -252,19 +260,33 @@ def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> Ru
             data=data,
             require_status_check=False,
         )
-        run_data = response.json()
+        run_data = response.json() if response.content else {}
 
         if response.status_code == 201:
             logger.info("File data discovery run %s started successfully", run_data["id"])
             return RunId(run_data["id"])
 
         logger.error("File data discovery run failed to start: %s", run_data)
+        self._maybe_raise_discovery_config_error(run_data, response, "File data discovery")
         raise FailedToStartError(
             f"File data discovery run failed to start "
             f"(server responded with status {response.status_code}: {response.text}).",
             response=response,
         )
 
+    @staticmethod
+    def _maybe_raise_discovery_config_error(run_data: object, response: Response, run_kind: str) -> None:
+        """Raise `InvalidDiscoveryConfigError` if the server's 400 body cites `discovery_config`."""
+        if not isinstance(run_data, dict) or "discovery_config" not in run_data:
+            return
+
+        errors = run_data["discovery_config"]
+        detail = errors[0] if isinstance(errors, list) and errors else str(errors)
+        raise InvalidDiscoveryConfigError(
+            f"{run_kind} run failed to start due to discovery config error: {detail}",
+            response=response,
+        )
+
     def iter_schema_discovery_results(self, run_id: RunId) -> Iterator[SchemaDiscoveryResult]:
         """Lazily iterate all schema discovery results for a run via the paginated v2 endpoint."""
 
diff --git a/datamasque/client/exceptions.py b/datamasque/client/exceptions.py
index 05fe339..f034c4d 100644
--- a/datamasque/client/exceptions.py
+++ b/datamasque/client/exceptions.py
@@ -41,6 +41,14 @@ class InvalidLibraryError(FailedToStartError):
     """Specific error for when runs fail to start due to having an invalid ruleset library."""
 
 
+class InvalidDiscoveryConfigError(FailedToStartError):
+    """
+    Raised when a discovery run fails to start due to an unusable discovery config.
+
+    The referenced config is missing, archived, or not in a `valid` validation state.
+    """
+
+
 class DataMasqueTransportError(DataMasqueException):
     """
     Raised when a request to the DataMasque server fails before any response is received.
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 14a2a0b..0cab7f9 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -25,6 +25,7 @@
     DataMasqueApiError,
     DataMasqueException,
     FailedToStartError,
+    InvalidDiscoveryConfigError,
 )
 from datamasque.client.models.connection import ConnectionId, DatabaseConnectionConfig, DatabaseType
 from datamasque.client.models.data_selection import SelectedColumns, SelectedFileData, UserSelection
@@ -848,3 +849,70 @@ def test_start_file_data_discovery_run_raises_on_non_201(client):
         )
         with pytest.raises(FailedToStartError, match="File data discovery run failed to start"):
             client.start_file_data_discovery_run(FileDataDiscoveryRequest(connection="nope"))
+
+
+def test_start_schema_discovery_raises_invalid_discovery_config_when_not_valid(client):
+    """A 400 with a `discovery_config` validation message raises the specific subclass."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/schema-discovery/",
+            json={
+                "discovery_config": ['Discovery config "my_cfg" cannot be used: validation status is `invalid`.'],
+            },
+            status_code=400,
+        )
+        with pytest.raises(
+            InvalidDiscoveryConfigError,
+            match=r'Schema discovery run failed to start due to discovery config error: .*"my_cfg".*invalid',
+        ):
+            client.start_schema_discovery_run(
+                SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            )
+
+
+def test_start_schema_discovery_raises_invalid_discovery_config_when_missing(client):
+    """A 400 from DRF's PrimaryKeyRelatedField (config not found / archived) is also classified."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/schema-discovery/",
+            json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']},
+            status_code=400,
+        )
+        with pytest.raises(InvalidDiscoveryConfigError, match="object does not exist"):
+            client.start_schema_discovery_run(
+                SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            )
+
+
+def test_start_file_data_discovery_raises_invalid_discovery_config(client):
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/run-file-data-discovery/",
+            json={
+                "discovery_config": ['Discovery config "my_cfg" cannot be used: validation status is `in_progress`.'],
+            },
+            status_code=400,
+        )
+        with pytest.raises(
+            InvalidDiscoveryConfigError,
+            match=r"File data discovery run failed to start due to discovery config error: .*in_progress",
+        ):
+            client.start_file_data_discovery_run(
+                FileDataDiscoveryRequest(
+                    connection="conn-1",
+                    discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
+                ),
+            )
+
+
+def test_start_schema_discovery_non_discovery_config_400_still_raises_generic_error(client):
+    """Other 400s (e.g. unknown connection) keep raising the base FailedToStartError."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/schema-discovery/",
+            json={"connection": ['Invalid pk "nope" - object does not exist.']},
+            status_code=400,
+        )
+        with pytest.raises(FailedToStartError) as exc_info:
+            client.start_schema_discovery_run(SchemaDiscoveryRequest(connection="nope"))
+        assert not isinstance(exc_info.value, InvalidDiscoveryConfigError)

From 8646c123742753a88721d6b962f5612159872534 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 28 May 2026 16:39:29 +1200
Subject: [PATCH 07/26] fix: Implement correct file filter syntax and
 ignore_rules for FDD

---
 datamasque/client/__init__.py         |  4 +++
 datamasque/client/models/discovery.py | 37 ++++++++++++++++++--
 tests/test_discovery.py               | 49 ++++++++++++++++++++++++---
 3 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index 2678e54..2f261f4 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -61,6 +61,8 @@
     FileDiscoveryLocatorResult,
     FileDiscoveryMatch,
     FileDiscoveryResult,
+    FileFilter,
+    FileFilterMatchAgainst,
     FileRulesetGenerationRequest,
     ForeignKeyRef,
     InDataDiscoveryConfig,
@@ -146,6 +148,8 @@
     "FileDiscoveryLocatorResult",
     "FileDiscoveryMatch",
     "FileDiscoveryResult",
+    "FileFilter",
+    "FileFilterMatchAgainst",
     "FileId",
     "FileOrContent",
     "FileRulesetGenerationRequest",
diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index d87a87f..d249cf2 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -1,8 +1,9 @@
 """Typed request and response shapes for schema-discovery and ruleset-generation endpoints."""
 
+from enum import Enum
 from typing import Any, Optional, Union
 
-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 from datamasque.client.models.connection import ConnectionConfig, ConnectionId, unwrap_connection_id
 from datamasque.client.models.data_selection import HashColumnsTableConfig, Locator, UserSelection
@@ -28,6 +29,7 @@ class InDataDiscoveryConfig(BaseModel):
     row_sample_size: Optional[int] = None
     custom_rules: Optional[list[InDataDiscoveryRule]] = None
     non_sensitive_rules: Optional[list[InDataDiscoveryRule]] = None
+    ignore_rules: Optional[list[InDataDiscoveryRule]] = None
     force: Optional[bool] = None
 
 
@@ -86,6 +88,35 @@ def _unwrap_connection(cls, value: Any) -> Any:
         return unwrap_connection_id(value)
 
 
+class FileFilterMatchAgainst(Enum):
+    """Which part of a file's path an `include`/`skip` filter is matched against."""
+
+    path = "path"
+    filename = "filename"
+
+
+class FileFilter(BaseModel):
+    """
+    A single `include` or `skip` filter for file data discovery.
+
+    Exactly one of `glob` or `regex` must be set.
+    `match_against` selects whether the pattern is applied to the full path or just the filename
+    (defaults to the full path when omitted).
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    glob: Optional[str] = Field(default=None, min_length=1)
+    regex: Optional[str] = Field(default=None, min_length=1)
+    match_against: Optional[FileFilterMatchAgainst] = None
+
+    @model_validator(mode="after")
+    def _check_glob_xor_regex(self) -> "FileFilter":
+        if (self.glob is None) == (self.regex is None):
+            raise ValueError("A `FileFilter` must set exactly one of `glob` or `regex`.")
+        return self
+
+
 class FileDataDiscoveryOptions(BaseModel):
     """Run options nested under `FileDataDiscoveryRequest.options`."""
 
@@ -117,8 +148,8 @@ class FileDataDiscoveryRequest(BaseModel):
     disable_global_ignored_keywords: Optional[bool] = None
     in_data_discovery: Optional[InDataDiscoveryConfig] = None
     recurse: Optional[bool] = None
-    include: Optional[list[str]] = None
-    skip: Optional[list[str]] = None
+    include: Optional[list[FileFilter]] = None
+    skip: Optional[list[FileFilter]] = None
     encoding: Optional[str] = None
     workers: Optional[int] = None
 
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 0cab7f9..7c36ce8 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -6,6 +6,7 @@
 
 import pytest
 import requests_mock
+from pydantic import ValidationError
 
 from datamasque.client import (
     DataMasqueClient,
@@ -13,7 +14,10 @@
     DiscoveryConfigId,
     FileDataDiscoveryOptions,
     FileDataDiscoveryRequest,
+    FileFilter,
+    FileFilterMatchAgainst,
     FileRulesetGenerationRequest,
+    InDataDiscoveryConfig,
     RulesetGenerationRequest,
     RunId,
     SchemaDiscoveryPage,
@@ -808,8 +812,8 @@ def test_start_file_data_discovery_run_full(client):
         disable_global_ignored_keywords=False,
         in_data_discovery={"enabled": True, "row_sample_size": 50},
         recurse=True,
-        include=["*.csv"],
-        skip=["**/tmp/**"],
+        include=[{"glob": "*.csv"}],
+        skip=[{"regex": r".*/tmp/.*", "match_against": "path"}],
         encoding="utf-8",
         workers=4,
     )
@@ -833,13 +837,50 @@ def test_start_file_data_discovery_run_full(client):
         "disable_global_ignored_keywords": False,
         "in_data_discovery": {"enabled": True, "row_sample_size": 50},
         "recurse": True,
-        "include": ["*.csv"],
-        "skip": ["**/tmp/**"],
+        "include": [{"glob": "*.csv"}],
+        "skip": [{"regex": r".*/tmp/.*", "match_against": "path"}],
         "encoding": "utf-8",
         "workers": 4,
     }
 
 
+def test_file_filter_requires_exactly_one_of_glob_or_regex():
+    """A `FileFilter` with neither, or both, of `glob`/`regex` is rejected."""
+    FileFilter(glob="*.csv")
+    FileFilter(regex=r".*\.csv")
+    FileFilter(glob="*.csv", match_against=FileFilterMatchAgainst.filename)
+
+    with pytest.raises(ValidationError, match="exactly one of `glob` or `regex`"):
+        FileFilter()
+
+    with pytest.raises(ValidationError, match="exactly one of `glob` or `regex`"):
+        FileFilter(glob="*.csv", regex=r".*\.csv")
+
+
+def test_file_filter_rejects_empty_pattern():
+    """An empty `glob`/`regex` is rejected rather than silently treated as unset."""
+    with pytest.raises(ValidationError):
+        FileFilter(glob="")
+
+    with pytest.raises(ValidationError):
+        FileFilter(regex="", glob="*.csv")
+
+
+def test_file_data_discovery_ignore_rules_serialize():
+    """`in_data_discovery.ignore_rules` round-trips into the wire payload."""
+    req = FileDataDiscoveryRequest(
+        connection="conn-1",
+        in_data_discovery=InDataDiscoveryConfig(
+            enabled=True,
+            custom_rules=[{"name": "cc", "pattern": r"^1234"}],
+            non_sensitive_rules=[{"pattern": r"^5555"}],
+            ignore_rules=[{"pattern": r"^4321"}],
+        ),
+    )
+    dumped = req.model_dump(exclude_none=True, mode="json")
+    assert dumped["in_data_discovery"]["ignore_rules"] == [{"pattern": r"^4321"}]
+
+
 def test_start_file_data_discovery_run_raises_on_non_201(client):
     with requests_mock.Mocker() as m:
         m.post(

From 38827c4e8ae266c1f156445e52c2cd60f8d0280f Mon Sep 17 00:00:00 2001
From: Andrew Chester <andrew.chester@datamasque.com>
Date: Tue, 9 Jun 2026 17:27:22 +1200
Subject: [PATCH 08/26] feat: Add v2 saved-config discovery endpoints, keep v1
 keyword-only

---
 HISTORY.rst                           |   8 ++
 datamasque/client/__init__.py         |   4 +
 datamasque/client/discovery.py        |  91 +++++++++++++-
 datamasque/client/models/discovery.py |  56 ++++++++-
 tests/test_discovery.py               | 170 +++++++++++++++++---------
 5 files changed, 262 insertions(+), 67 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 73bb66e..a0655fc 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,6 +2,14 @@
 History
 =======
 
+1.1.0 (unreleased)
+------------------
+
+* Added discovery-config management APIs on ``DataMasqueClient`` (``list_discovery_configs``, ``get_discovery_config``, ``create_discovery_config``, ``update_discovery_config``, and friends), backed by the ``DiscoveryConfig`` / ``DiscoveryConfigId`` models.
+* Added ``start_schema_discovery_run_v2`` and ``start_file_data_discovery_run_v2`` for starting discovery runs from a saved discovery config, via the ``/api/schema-discovery/v2/`` and ``/api/run-file-data-discovery/v2/`` endpoints. The discovery config is a full override of all detection options, so the v2 request models (``SchemaDiscoveryV2Request``, ``FileDataDiscoveryV2Request``) reject the legacy keyword/schema/in-data-discovery fields.
+* The v1 ``start_schema_discovery_run`` and ``start_file_data_discovery_run`` endpoints are unchanged and do not accept a discovery config; ``start_file_data_discovery_run`` (with ``FileDataDiscoveryRequest``, the ``FileFilter`` include/skip model, and ``FileDataDiscoveryOptions``) is newly exposed for ad-hoc file data discovery.
+* Added ``InvalidDiscoveryConfigError``, raised by the v2 run-start methods when the referenced config is missing, archived, or not in a ``valid`` validation state.
+
 1.0.5 (2026-06-18)
 ------------------
 
diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index 2f261f4..b9a4b85 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -57,6 +57,7 @@
     DiscoveryMatch,
     FileDataDiscoveryOptions,
     FileDataDiscoveryRequest,
+    FileDataDiscoveryV2Request,
     FileDiscoveryFile,
     FileDiscoveryLocatorResult,
     FileDiscoveryMatch,
@@ -73,6 +74,7 @@
     SchemaDiscoveryPage,
     SchemaDiscoveryRequest,
     SchemaDiscoveryResult,
+    SchemaDiscoveryV2Request,
     TableConstraints,
 )
 from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
@@ -144,6 +146,7 @@
     "FileConnectionConfig",
     "FileDataDiscoveryOptions",
     "FileDataDiscoveryRequest",
+    "FileDataDiscoveryV2Request",
     "FileDiscoveryFile",
     "FileDiscoveryLocatorResult",
     "FileDiscoveryMatch",
@@ -198,6 +201,7 @@
     "SchemaDiscoveryPage",
     "SchemaDiscoveryRequest",
     "SchemaDiscoveryResult",
+    "SchemaDiscoveryV2Request",
     "SeedFile",
     "SelectedColumns",
     "SelectedData",
diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index 8e30d7c..ca2a8ff 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -21,12 +21,14 @@
 )
 from datamasque.client.models.discovery import (
     FileDataDiscoveryRequest,
+    FileDataDiscoveryV2Request,
     FileDiscoveryResult,
     FileRulesetGenerationRequest,
     RulesetGenerationRequest,
     SchemaDiscoveryPage,
     SchemaDiscoveryRequest,
     SchemaDiscoveryResult,
+    SchemaDiscoveryV2Request,
 )
 from datamasque.client.models.ruleset import Ruleset
 from datamasque.client.models.runs import RunId
@@ -211,9 +213,7 @@ def start_schema_discovery_run(self, discovery_config: SchemaDiscoveryRequest) -
             RunId: The ID of the started discovery run
 
         Raises:
-            InvalidDiscoveryConfigError: the run failed to start because the referenced
-                discovery config is missing, archived, or not in a `valid` validation state.
-            FailedToStartError: the run failed to start for any other reason.
+            FailedToStartError: If run fails to start
         """
 
         data = discovery_config.model_dump(exclude_none=True, mode="json")
@@ -223,14 +223,13 @@ def start_schema_discovery_run(self, discovery_config: SchemaDiscoveryRequest) -
             data=data,
             require_status_check=False,
         )
-        run_data = response.json() if response.content else {}
+        run_data = response.json()
 
         if response.status_code == 201:
             logger.info("Schema discovery run %s started successfully", run_data["id"])
             return RunId(run_data["id"])
 
         logger.error("Schema discovery run failed to start: %s", run_data)
-        self._maybe_raise_discovery_config_error(run_data, response, "Schema discovery")
         raise FailedToStartError(
             f"Schema discovery run failed to start "
             f"(server responded with status {response.status_code}: {response.text}).",
@@ -247,6 +246,44 @@ def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> Ru
         Returns:
             RunId: The ID of the started discovery run
 
+        Raises:
+            FailedToStartError: If run fails to start
+        """
+
+        data = request.model_dump(exclude_none=True, mode="json")
+        response = self.make_request(
+            "POST",
+            "/api/run-file-data-discovery/",
+            data=data,
+            require_status_check=False,
+        )
+        run_data = response.json()
+
+        if response.status_code == 201:
+            logger.info("File data discovery run %s started successfully", run_data["id"])
+            return RunId(run_data["id"])
+
+        logger.error("File data discovery run failed to start: %s", run_data)
+        raise FailedToStartError(
+            f"File data discovery run failed to start "
+            f"(server responded with status {response.status_code}: {response.text}).",
+            response=response,
+        )
+
+    def start_schema_discovery_run_v2(self, request: SchemaDiscoveryV2Request) -> RunId:
+        """
+        Starts a schema discovery run from a saved discovery config (v2 API).
+
+        All detection options come from the saved discovery config referenced by `request.discovery_config`;
+        the legacy keyword/schema/in-data-discovery options accepted by `start_schema_discovery_run`
+        are not accepted here.
+
+        Args:
+            request: A `SchemaDiscoveryV2Request` with the connection and saved discovery-config.
+
+        Returns:
+            RunId: The ID of the started discovery run
+
         Raises:
             InvalidDiscoveryConfigError: the run failed to start because the referenced
                 discovery config is missing, archived, or not in a `valid` validation state.
@@ -256,7 +293,49 @@ def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> Ru
         data = request.model_dump(exclude_none=True, mode="json")
         response = self.make_request(
             "POST",
-            "/api/run-file-data-discovery/",
+            "/api/schema-discovery/v2/",
+            data=data,
+            require_status_check=False,
+        )
+        run_data = response.json() if response.content else {}
+
+        if response.status_code == 201:
+            logger.info("Schema discovery run %s started successfully", run_data["id"])
+            return RunId(run_data["id"])
+
+        logger.error("Schema discovery run failed to start: %s", run_data)
+        self._maybe_raise_discovery_config_error(run_data, response, "Schema discovery")
+        raise FailedToStartError(
+            f"Schema discovery run failed to start "
+            f"(server responded with status {response.status_code}: {response.text}).",
+            response=response,
+        )
+
+    def start_file_data_discovery_run_v2(self, request: FileDataDiscoveryV2Request) -> RunId:
+        """
+        Starts a file data discovery run from a saved discovery config (v2 API).
+
+        Detection options come from the saved discovery config referenced by `request.discovery_config`;
+        the file-handling parameters (`recurse`, `include`, `skip`, `encoding`, `workers`) and run
+        `options` are still accepted, since the discovery config does not own them.
+
+        Args:
+            request: A `FileDataDiscoveryV2Request` with the connection, saved discovery-config, and
+                optional file-handling parameters.
+
+        Returns:
+            RunId: The ID of the started discovery run
+
+        Raises:
+            InvalidDiscoveryConfigError: the run failed to start because the referenced
+                discovery config is missing, archived, or not in a `valid` validation state.
+            FailedToStartError: the run failed to start for any other reason.
+        """
+
+        data = request.model_dump(exclude_none=True, mode="json")
+        response = self.make_request(
+            "POST",
+            "/api/run-file-data-discovery/v2/",
             data=data,
             require_status_check=False,
         )
diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index d249cf2..267f145 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -38,15 +38,12 @@ class SchemaDiscoveryRequest(BaseModel):
     Request body for `POST /api/schema-discovery/`.
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    `discovery_config` (optional) accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`;
-    when omitted the server's built-in default discovery config is used.
     Every other field uses the server's default value when omitted.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
-    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
     custom_keywords: list[str] = Field(default_factory=list)
     ignored_keywords: list[str] = Field(default_factory=list)
     schemas: list[str] = Field(default_factory=list)
@@ -60,6 +57,27 @@ class SchemaDiscoveryRequest(BaseModel):
     def _unwrap_connection(cls, value: Any) -> Any:
         return unwrap_connection_id(value)
 
+
+class SchemaDiscoveryV2Request(BaseModel):
+    """
+    Request body for `POST /api/schema-discovery/v2/` (start a run from a saved discovery config).
+
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
+    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`, and is a full
+    override: it supplies every detection option, so the legacy keyword/schema/in-data-discovery fields
+    accepted by `SchemaDiscoveryRequest` are rejected here.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    connection: Union[ConnectionId, ConnectionConfig]
+    discovery_config: Union[DiscoveryConfigId, DiscoveryConfig]
+
+    @field_validator("connection", mode="before")
+    @classmethod
+    def _unwrap_connection(cls, value: Any) -> Any:
+        return unwrap_connection_id(value)
+
     @field_validator("discovery_config", mode="before")
     @classmethod
     def _unwrap_discovery_config(cls, value: Any) -> Any:
@@ -131,15 +149,12 @@ class FileDataDiscoveryRequest(BaseModel):
     Request body for `POST /api/run-file-data-discovery/`.
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    `discovery_config` (optional) accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`;
-    when omitted the server's built-in default discovery config is used.
     All other fields use the server's default value when omitted.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
-    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
     options: Optional[FileDataDiscoveryOptions] = None
     custom_keywords: list[str] = Field(default_factory=list)
     ignored_keywords: list[str] = Field(default_factory=list)
@@ -158,6 +173,35 @@ class FileDataDiscoveryRequest(BaseModel):
     def _unwrap_connection(cls, value: Any) -> Any:
         return unwrap_connection_id(value)
 
+
+class FileDataDiscoveryV2Request(BaseModel):
+    """
+    Request body for `POST /api/run-file-data-discovery/v2/` (start a run from a saved discovery config).
+
+    `discovery_config` is a full override of the detection options, so the legacy keyword and
+    in-data-discovery fields accepted by `FileDataDiscoveryRequest` are rejected here.
+    The file-handling parameters (`recurse`, `include`, `skip`, `encoding`, `workers`) and run
+    `options` remain accepted, since the discovery config does not own them.
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`, and `discovery_config`
+    accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    connection: Union[ConnectionId, ConnectionConfig]
+    discovery_config: Union[DiscoveryConfigId, DiscoveryConfig]
+    options: Optional[FileDataDiscoveryOptions] = None
+    recurse: Optional[bool] = None
+    include: Optional[list[FileFilter]] = None
+    skip: Optional[list[FileFilter]] = None
+    encoding: Optional[str] = None
+    workers: Optional[int] = None
+
+    @field_validator("connection", mode="before")
+    @classmethod
+    def _unwrap_connection(cls, value: Any) -> Any:
+        return unwrap_connection_id(value)
+
     @field_validator("discovery_config", mode="before")
     @classmethod
     def _unwrap_discovery_config(cls, value: Any) -> Any:
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 7c36ce8..6eb2643 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -14,6 +14,7 @@
     DiscoveryConfigId,
     FileDataDiscoveryOptions,
     FileDataDiscoveryRequest,
+    FileDataDiscoveryV2Request,
     FileFilter,
     FileFilterMatchAgainst,
     FileRulesetGenerationRequest,
@@ -23,6 +24,7 @@
     SchemaDiscoveryPage,
     SchemaDiscoveryRequest,
     SchemaDiscoveryResult,
+    SchemaDiscoveryV2Request,
 )
 from datamasque.client.exceptions import (
     AsyncRulesetGenerationInProgressError,
@@ -740,45 +742,19 @@ def test_start_schema_discovery_run_raises_on_non_201(client):
             client.start_schema_discovery_run(SchemaDiscoveryRequest(connection="nope"))
 
 
-def test_schema_discovery_request_accepts_discovery_config_id():
-    """A `DiscoveryConfigId` string passes through unchanged in the request body."""
-    req = SchemaDiscoveryRequest(
-        connection="conn-1",
-        discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
-    )
-    dumped = req.model_dump(exclude_none=True, mode="json")
-    assert dumped["discovery_config"] == DISCOVERY_CONFIG_ID
-
+# --- v1 (legacy) discovery: no discovery_config accepted ---
 
-def test_schema_discovery_request_unwraps_discovery_config_model():
-    """Passing a full `DiscoveryConfig` object substitutes its `id` for the wire payload."""
-    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
-    req = SchemaDiscoveryRequest(connection="conn-1", discovery_config=config)
-    assert req.model_dump(exclude_none=True, mode="json")["discovery_config"] == DISCOVERY_CONFIG_ID
-
-
-def test_schema_discovery_request_rejects_unsaved_discovery_config():
-    """A `DiscoveryConfig` without an `id` cannot be used yet — raises immediately."""
-    config = DiscoveryConfig(name="my_cfg")
-    with pytest.raises(ValueError, match="id is None"):
-        SchemaDiscoveryRequest(connection="conn-1", discovery_config=config)
 
+def test_schema_discovery_request_rejects_discovery_config():
+    """The v1 schema-discovery request no longer accepts `discovery_config` (it is v2-only)."""
+    with pytest.raises(ValidationError):
+        SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
 
-def test_start_schema_discovery_run_sends_discovery_config(client):
-    """`start_schema_discovery_run` posts the discovery_config id to the server."""
-    req = SchemaDiscoveryRequest(
-        connection="conn-1",
-        discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
-    )
-    with requests_mock.Mocker() as m:
-        m.post(
-            "http://test-server/api/schema-discovery/",
-            json={"id": 11},
-            status_code=201,
-        )
-        assert client.start_schema_discovery_run(req) == 11
 
-    assert m.last_request.json()["discovery_config"] == DISCOVERY_CONFIG_ID
+def test_file_data_discovery_request_rejects_discovery_config():
+    """The v1 file-data-discovery request no longer accepts `discovery_config` (it is v2-only)."""
+    with pytest.raises(ValidationError):
+        FileDataDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
 
 
 def test_start_file_data_discovery_run_minimal(client):
@@ -799,11 +775,9 @@ def test_start_file_data_discovery_run_minimal(client):
 
 
 def test_start_file_data_discovery_run_full(client):
-    """All FDD request fields populate the wire payload and pass through unwrap helpers."""
-    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    """All legacy FDD request fields populate the wire payload and pass through unwrap helpers."""
     req = FileDataDiscoveryRequest(
         connection="conn-1",
-        discovery_config=config,
         options=FileDataDiscoveryOptions(dry_run=True, diagnostic_logging=True),
         custom_keywords=["foo"],
         ignored_keywords=["bar"],
@@ -828,7 +802,6 @@ def test_start_file_data_discovery_run_full(client):
     body = m.last_request.json()
     assert body == {
         "connection": "conn-1",
-        "discovery_config": DISCOVERY_CONFIG_ID,
         "options": {"dry_run": True, "diagnostic_logging": True},
         "custom_keywords": ["foo"],
         "ignored_keywords": ["bar"],
@@ -892,11 +865,96 @@ def test_start_file_data_discovery_run_raises_on_non_201(client):
             client.start_file_data_discovery_run(FileDataDiscoveryRequest(connection="nope"))
 
 
-def test_start_schema_discovery_raises_invalid_discovery_config_when_not_valid(client):
+# --- v2 discovery: discovery_config only, posted to the /v2/ endpoints ---
+
+
+def test_schema_discovery_v2_request_accepts_discovery_config_id():
+    """A `DiscoveryConfigId` string passes through unchanged in the v2 request body."""
+    req = SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    assert req.model_dump(exclude_none=True, mode="json") == {
+        "connection": "conn-1",
+        "discovery_config": DISCOVERY_CONFIG_ID,
+    }
+
+
+def test_schema_discovery_v2_request_unwraps_discovery_config_model():
+    """Passing a full `DiscoveryConfig` object substitutes its `id` for the wire payload."""
+    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    req = SchemaDiscoveryV2Request(connection="conn-1", discovery_config=config)
+    assert req.model_dump(exclude_none=True, mode="json")["discovery_config"] == DISCOVERY_CONFIG_ID
+
+
+def test_schema_discovery_v2_request_rejects_unsaved_discovery_config():
+    """A `DiscoveryConfig` without an `id` cannot be used yet — raises immediately."""
+    config = DiscoveryConfig(name="my_cfg")
+    with pytest.raises(ValueError, match="id is None"):
+        SchemaDiscoveryV2Request(connection="conn-1", discovery_config=config)
+
+
+def test_schema_discovery_v2_request_rejects_legacy_fields():
+    """`config_id` is a full override — the v2 request rejects any legacy detection options."""
+    with pytest.raises(ValidationError):
+        SchemaDiscoveryV2Request(
+            connection="conn-1",
+            discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
+            schemas=["public"],
+        )
+
+
+def test_file_data_discovery_v2_request_rejects_legacy_detection_fields():
+    """The v2 FDD request rejects detection options owned by the config, while keeping file-handling params."""
+    with pytest.raises(ValidationError):
+        FileDataDiscoveryV2Request(
+            connection="conn-1",
+            discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
+            custom_keywords=["foo"],
+        )
+
+
+def test_start_schema_discovery_run_v2_sends_discovery_config(client):
+    """`start_schema_discovery_run_v2` posts the discovery_config id to the v2 endpoint."""
+    req = SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/schema-discovery/v2/", json={"id": 11}, status_code=201)
+        assert client.start_schema_discovery_run_v2(req) == 11
+
+    assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID}
+
+
+def test_start_file_data_discovery_run_v2_full(client):
+    """The v2 FDD request carries the config plus the file-handling params the config does not own."""
+    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    req = FileDataDiscoveryV2Request(
+        connection="conn-1",
+        discovery_config=config,
+        options=FileDataDiscoveryOptions(dry_run=True, diagnostic_logging=True),
+        recurse=True,
+        include=[{"glob": "*.csv"}],
+        skip=[{"regex": r".*/tmp/.*", "match_against": "path"}],
+        encoding="utf-8",
+        workers=4,
+    )
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 99}, status_code=201)
+        assert client.start_file_data_discovery_run_v2(req) == 99
+
+    assert m.last_request.json() == {
+        "connection": "conn-1",
+        "discovery_config": DISCOVERY_CONFIG_ID,
+        "options": {"dry_run": True, "diagnostic_logging": True},
+        "recurse": True,
+        "include": [{"glob": "*.csv"}],
+        "skip": [{"regex": r".*/tmp/.*", "match_against": "path"}],
+        "encoding": "utf-8",
+        "workers": 4,
+    }
+
+
+def test_start_schema_discovery_run_v2_raises_invalid_discovery_config_when_not_valid(client):
     """A 400 with a `discovery_config` validation message raises the specific subclass."""
     with requests_mock.Mocker() as m:
         m.post(
-            "http://test-server/api/schema-discovery/",
+            "http://test-server/api/schema-discovery/v2/",
             json={
                 "discovery_config": ['Discovery config "my_cfg" cannot be used: validation status is `invalid`.'],
             },
@@ -906,29 +964,29 @@ def test_start_schema_discovery_raises_invalid_discovery_config_when_not_valid(c
             InvalidDiscoveryConfigError,
             match=r'Schema discovery run failed to start due to discovery config error: .*"my_cfg".*invalid',
         ):
-            client.start_schema_discovery_run(
-                SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            client.start_schema_discovery_run_v2(
+                SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
             )
 
 
-def test_start_schema_discovery_raises_invalid_discovery_config_when_missing(client):
+def test_start_schema_discovery_run_v2_raises_invalid_discovery_config_when_missing(client):
     """A 400 from DRF's PrimaryKeyRelatedField (config not found / archived) is also classified."""
     with requests_mock.Mocker() as m:
         m.post(
-            "http://test-server/api/schema-discovery/",
+            "http://test-server/api/schema-discovery/v2/",
             json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']},
             status_code=400,
         )
         with pytest.raises(InvalidDiscoveryConfigError, match="object does not exist"):
-            client.start_schema_discovery_run(
-                SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            client.start_schema_discovery_run_v2(
+                SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
             )
 
 
-def test_start_file_data_discovery_raises_invalid_discovery_config(client):
+def test_start_file_data_discovery_run_v2_raises_invalid_discovery_config(client):
     with requests_mock.Mocker() as m:
         m.post(
-            "http://test-server/api/run-file-data-discovery/",
+            "http://test-server/api/run-file-data-discovery/v2/",
             json={
                 "discovery_config": ['Discovery config "my_cfg" cannot be used: validation status is `in_progress`.'],
             },
@@ -938,22 +996,24 @@ def test_start_file_data_discovery_raises_invalid_discovery_config(client):
             InvalidDiscoveryConfigError,
             match=r"File data discovery run failed to start due to discovery config error: .*in_progress",
         ):
-            client.start_file_data_discovery_run(
-                FileDataDiscoveryRequest(
+            client.start_file_data_discovery_run_v2(
+                FileDataDiscoveryV2Request(
                     connection="conn-1",
                     discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
                 ),
             )
 
 
-def test_start_schema_discovery_non_discovery_config_400_still_raises_generic_error(client):
-    """Other 400s (e.g. unknown connection) keep raising the base FailedToStartError."""
+def test_start_schema_discovery_run_v2_non_discovery_config_400_still_raises_generic_error(client):
+    """Other 400s (e.g. unknown connection) keep raising the base FailedToStartError, not the config subclass."""
     with requests_mock.Mocker() as m:
         m.post(
-            "http://test-server/api/schema-discovery/",
+            "http://test-server/api/schema-discovery/v2/",
             json={"connection": ['Invalid pk "nope" - object does not exist.']},
             status_code=400,
         )
         with pytest.raises(FailedToStartError) as exc_info:
-            client.start_schema_discovery_run(SchemaDiscoveryRequest(connection="nope"))
+            client.start_schema_discovery_run_v2(
+                SchemaDiscoveryV2Request(connection="nope", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            )
         assert not isinstance(exc_info.value, InvalidDiscoveryConfigError)

From 567e6721011856a84659b7589ec0c3dd8303eb5d Mon Sep 17 00:00:00 2001
From: Andrew Chester <andrew.chester@datamasque.com>
Date: Wed, 10 Jun 2026 13:49:20 +1200
Subject: [PATCH 09/26] feat: Reject discovery_config on v1 discovery requests
 with a v2 redirect

---
 datamasque/client/models/discovery.py | 34 +++++++++++++++++++++++----
 tests/test_discovery.py               |  8 +++----
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 267f145..37393de 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -35,10 +35,12 @@ class InDataDiscoveryConfig(BaseModel):
 
 class SchemaDiscoveryRequest(BaseModel):
     """
-    Request body for `POST /api/schema-discovery/`.
+    Request body for `POST /api/schema-discovery/` (the keyword-driven schema-discovery trigger).
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    Every other field uses the server's default value when omitted.
+    This request does not accept a `discovery_config`; to run from a saved discovery config use
+    `start_schema_discovery_run_v2` with a `SchemaDiscoveryV2Request`. Every other field uses the
+    server's default value when omitted.
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -57,6 +59,17 @@ class SchemaDiscoveryRequest(BaseModel):
     def _unwrap_connection(cls, value: Any) -> Any:
         return unwrap_connection_id(value)
 
+    @model_validator(mode="before")
+    @classmethod
+    def _reject_discovery_config(cls, data: Any) -> Any:
+        if isinstance(data, dict) and "discovery_config" in data:
+            raise ValueError(
+                "`discovery_config` is not accepted by the keyword-driven schema-discovery request; "
+                "use `start_schema_discovery_run_v2` with a `SchemaDiscoveryV2Request` to run from a saved "
+                "discovery config."
+            )
+        return data
+
 
 class SchemaDiscoveryV2Request(BaseModel):
     """
@@ -146,10 +159,12 @@ class FileDataDiscoveryOptions(BaseModel):
 
 class FileDataDiscoveryRequest(BaseModel):
     """
-    Request body for `POST /api/run-file-data-discovery/`.
+    Request body for `POST /api/run-file-data-discovery/` (the keyword-driven file-data-discovery trigger).
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    All other fields use the server's default value when omitted.
+    This request does not accept a `discovery_config`; to run from a saved discovery config use
+    `start_file_data_discovery_run_v2` with a `FileDataDiscoveryV2Request`. All other fields use the
+    server's default value when omitted.
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -173,6 +188,17 @@ class FileDataDiscoveryRequest(BaseModel):
     def _unwrap_connection(cls, value: Any) -> Any:
         return unwrap_connection_id(value)
 
+    @model_validator(mode="before")
+    @classmethod
+    def _reject_discovery_config(cls, data: Any) -> Any:
+        if isinstance(data, dict) and "discovery_config" in data:
+            raise ValueError(
+                "`discovery_config` is not accepted by the keyword-driven file-data-discovery request; "
+                "use `start_file_data_discovery_run_v2` with a `FileDataDiscoveryV2Request` to run from a saved "
+                "discovery config."
+            )
+        return data
+
 
 class FileDataDiscoveryV2Request(BaseModel):
     """
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 6eb2643..08b911b 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -746,14 +746,14 @@ def test_start_schema_discovery_run_raises_on_non_201(client):
 
 
 def test_schema_discovery_request_rejects_discovery_config():
-    """The v1 schema-discovery request no longer accepts `discovery_config` (it is v2-only)."""
-    with pytest.raises(ValidationError):
+    """The v1 schema-discovery request rejects `discovery_config` and points the user at the v2 method."""
+    with pytest.raises(ValidationError, match="start_schema_discovery_run_v2"):
         SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
 
 
 def test_file_data_discovery_request_rejects_discovery_config():
-    """The v1 file-data-discovery request no longer accepts `discovery_config` (it is v2-only)."""
-    with pytest.raises(ValidationError):
+    """The v1 file-data-discovery request rejects `discovery_config` and points the user at the v2 method."""
+    with pytest.raises(ValidationError, match="start_file_data_discovery_run_v2"):
         FileDataDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
 
 

From a6e1736735e664323374838ceba9eea4efde4b1e Mon Sep 17 00:00:00 2001
From: Andrew Chester <andrew.chester@datamasque.com>
Date: Wed, 10 Jun 2026 15:55:33 +1200
Subject: [PATCH 10/26] feat: Restrict v2 file discovery request to connection
 and discovery_config

---
 HISTORY.rst                           |  2 +-
 datamasque/client/discovery.py        |  8 ++---
 datamasque/client/models/discovery.py | 17 +++-------
 tests/test_discovery.py               | 45 ++++++++++++---------------
 4 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index a0655fc..1a345ed 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -6,7 +6,7 @@ History
 ------------------
 
 * Added discovery-config management APIs on ``DataMasqueClient`` (``list_discovery_configs``, ``get_discovery_config``, ``create_discovery_config``, ``update_discovery_config``, and friends), backed by the ``DiscoveryConfig`` / ``DiscoveryConfigId`` models.
-* Added ``start_schema_discovery_run_v2`` and ``start_file_data_discovery_run_v2`` for starting discovery runs from a saved discovery config, via the ``/api/schema-discovery/v2/`` and ``/api/run-file-data-discovery/v2/`` endpoints. The discovery config is a full override of all detection options, so the v2 request models (``SchemaDiscoveryV2Request``, ``FileDataDiscoveryV2Request``) reject the legacy keyword/schema/in-data-discovery fields.
+* Added ``start_schema_discovery_run_v2`` and ``start_file_data_discovery_run_v2`` for starting discovery runs from a saved discovery config, via the ``/api/schema-discovery/v2/`` and ``/api/run-file-data-discovery/v2/`` endpoints. The discovery config is a full override, so the v2 request models (``SchemaDiscoveryV2Request``, ``FileDataDiscoveryV2Request``) accept only the connection and discovery config: they reject the legacy keyword/schema/in-data-discovery fields, and ``FileDataDiscoveryV2Request`` additionally rejects the file-handling parameters (``recurse``, ``include``, ``skip``, ``encoding``, ``workers``) and run ``options``, which now live in the config.
 * The v1 ``start_schema_discovery_run`` and ``start_file_data_discovery_run`` endpoints are unchanged and do not accept a discovery config; ``start_file_data_discovery_run`` (with ``FileDataDiscoveryRequest``, the ``FileFilter`` include/skip model, and ``FileDataDiscoveryOptions``) is newly exposed for ad-hoc file data discovery.
 * Added ``InvalidDiscoveryConfigError``, raised by the v2 run-start methods when the referenced config is missing, archived, or not in a ``valid`` validation state.
 
diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index ca2a8ff..183ab43 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -315,13 +315,11 @@ def start_file_data_discovery_run_v2(self, request: FileDataDiscoveryV2Request)
         """
         Starts a file data discovery run from a saved discovery config (v2 API).
 
-        Detection options come from the saved discovery config referenced by `request.discovery_config`;
-        the file-handling parameters (`recurse`, `include`, `skip`, `encoding`, `workers`) and run
-        `options` are still accepted, since the discovery config does not own them.
+        All detection options, file-handling parameters, and run options come from the saved discovery
+        config referenced by `request.discovery_config`; no per-run overrides are accepted.
 
         Args:
-            request: A `FileDataDiscoveryV2Request` with the connection, saved discovery-config, and
-                optional file-handling parameters.
+            request: A `FileDataDiscoveryV2Request` with the connection and saved discovery-config.
 
         Returns:
             RunId: The ID of the started discovery run
diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 37393de..5619258 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -204,24 +204,17 @@ class FileDataDiscoveryV2Request(BaseModel):
     """
     Request body for `POST /api/run-file-data-discovery/v2/` (start a run from a saved discovery config).
 
-    `discovery_config` is a full override of the detection options, so the legacy keyword and
-    in-data-discovery fields accepted by `FileDataDiscoveryRequest` are rejected here.
-    The file-handling parameters (`recurse`, `include`, `skip`, `encoding`, `workers`) and run
-    `options` remain accepted, since the discovery config does not own them.
-    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`, and `discovery_config`
-    accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`.
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
+    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`, and is a full
+    override: it supplies every detection option as well as the file-handling and run options, so the
+    legacy keyword/in-data-discovery fields and the file-handling parameters (`recurse`, `include`, `skip`,
+    `encoding`, `workers`) accepted by `FileDataDiscoveryRequest` are rejected here.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
     discovery_config: Union[DiscoveryConfigId, DiscoveryConfig]
-    options: Optional[FileDataDiscoveryOptions] = None
-    recurse: Optional[bool] = None
-    include: Optional[list[FileFilter]] = None
-    skip: Optional[list[FileFilter]] = None
-    encoding: Optional[str] = None
-    workers: Optional[int] = None
 
     @field_validator("connection", mode="before")
     @classmethod
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 08b911b..32de2f7 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -901,13 +901,26 @@ def test_schema_discovery_v2_request_rejects_legacy_fields():
         )
 
 
-def test_file_data_discovery_v2_request_rejects_legacy_detection_fields():
-    """The v2 FDD request rejects detection options owned by the config, while keeping file-handling params."""
+@pytest.mark.parametrize(
+    "extra_field",
+    [
+        {"custom_keywords": ["foo"]},
+        {"in_data_discovery": {"enabled": True}},
+        {"options": {"dry_run": True}},
+        {"recurse": True},
+        {"include": [{"glob": "*.csv"}]},
+        {"skip": [{"glob": "*.tmp"}]},
+        {"encoding": "utf-8"},
+        {"workers": 4},
+    ],
+)
+def test_file_data_discovery_v2_request_rejects_non_config_fields(extra_field):
+    """The config is a full override: detection options, file-handling params, and run options are all rejected."""
     with pytest.raises(ValidationError):
         FileDataDiscoveryV2Request(
             connection="conn-1",
             discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
-            custom_keywords=["foo"],
+            **extra_field,
         )
 
 
@@ -921,33 +934,15 @@ def test_start_schema_discovery_run_v2_sends_discovery_config(client):
     assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID}
 
 
-def test_start_file_data_discovery_run_v2_full(client):
-    """The v2 FDD request carries the config plus the file-handling params the config does not own."""
+def test_start_file_data_discovery_run_v2_sends_discovery_config(client):
+    """`start_file_data_discovery_run_v2` posts only the connection and discovery_config id to the v2 endpoint."""
     config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
-    req = FileDataDiscoveryV2Request(
-        connection="conn-1",
-        discovery_config=config,
-        options=FileDataDiscoveryOptions(dry_run=True, diagnostic_logging=True),
-        recurse=True,
-        include=[{"glob": "*.csv"}],
-        skip=[{"regex": r".*/tmp/.*", "match_against": "path"}],
-        encoding="utf-8",
-        workers=4,
-    )
+    req = FileDataDiscoveryV2Request(connection="conn-1", discovery_config=config)
     with requests_mock.Mocker() as m:
         m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 99}, status_code=201)
         assert client.start_file_data_discovery_run_v2(req) == 99
 
-    assert m.last_request.json() == {
-        "connection": "conn-1",
-        "discovery_config": DISCOVERY_CONFIG_ID,
-        "options": {"dry_run": True, "diagnostic_logging": True},
-        "recurse": True,
-        "include": [{"glob": "*.csv"}],
-        "skip": [{"regex": r".*/tmp/.*", "match_against": "path"}],
-        "encoding": "utf-8",
-        "workers": 4,
-    }
+    assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID}
 
 
 def test_start_schema_discovery_run_v2_raises_invalid_discovery_config_when_not_valid(client):

From f18a2e4a15b595da0fb1b8fcbcdee9da2cde81ac Mon Sep 17 00:00:00 2001
From: Andrew Chester <andrew.chester@datamasque.com>
Date: Thu, 11 Jun 2026 11:54:08 +1200
Subject: [PATCH 11/26] feat: Rename discovery v2 endpoints to from_config;
 allow optional config and run options

---
 HISTORY.rst                           |   4 +-
 datamasque/client/__init__.py         |   8 +-
 datamasque/client/discovery.py        |  24 ++---
 datamasque/client/models/discovery.py |  50 +++++-----
 tests/test_discovery.py               | 133 +++++++++++++++++---------
 5 files changed, 128 insertions(+), 91 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 1a345ed..2cbee8b 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -6,9 +6,9 @@ History
 ------------------
 
 * Added discovery-config management APIs on ``DataMasqueClient`` (``list_discovery_configs``, ``get_discovery_config``, ``create_discovery_config``, ``update_discovery_config``, and friends), backed by the ``DiscoveryConfig`` / ``DiscoveryConfigId`` models.
-* Added ``start_schema_discovery_run_v2`` and ``start_file_data_discovery_run_v2`` for starting discovery runs from a saved discovery config, via the ``/api/schema-discovery/v2/`` and ``/api/run-file-data-discovery/v2/`` endpoints. The discovery config is a full override, so the v2 request models (``SchemaDiscoveryV2Request``, ``FileDataDiscoveryV2Request``) accept only the connection and discovery config: they reject the legacy keyword/schema/in-data-discovery fields, and ``FileDataDiscoveryV2Request`` additionally rejects the file-handling parameters (``recurse``, ``include``, ``skip``, ``encoding``, ``workers``) and run ``options``, which now live in the config.
+* Added ``start_schema_discovery_run_from_config`` and ``start_file_data_discovery_run_from_config`` for starting discovery runs from a saved discovery config, via the ``/api/schema-discovery/v2/`` and ``/api/run-file-data-discovery/v2/`` endpoints. Their request models (``SchemaDiscoveryFromConfigRequest``, ``FileDataDiscoveryFromConfigRequest``) take a ``connection`` and an optional ``discovery_config`` (``None`` runs with the server's default discovery options); the legacy keyword/schema/in-data-discovery fields and the file-handling parameters (``recurse``, ``include``, ``skip``, ``encoding``, ``workers``) are rejected because the config owns them. ``FileDataDiscoveryFromConfigRequest`` also accepts run ``options`` (``dry_run``, ``diagnostic_logging``).
 * The v1 ``start_schema_discovery_run`` and ``start_file_data_discovery_run`` endpoints are unchanged and do not accept a discovery config; ``start_file_data_discovery_run`` (with ``FileDataDiscoveryRequest``, the ``FileFilter`` include/skip model, and ``FileDataDiscoveryOptions``) is newly exposed for ad-hoc file data discovery.
-* Added ``InvalidDiscoveryConfigError``, raised by the v2 run-start methods when the referenced config is missing, archived, or not in a ``valid`` validation state.
+* Added ``InvalidDiscoveryConfigError``, raised by the from-config run-start methods when the referenced config is missing, archived, or not in a ``valid`` validation state.
 
 1.0.5 (2026-06-18)
 ------------------
diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index b9a4b85..304864f 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -55,9 +55,9 @@
 from datamasque.client.models.discovery import (
     ConstraintColumns,
     DiscoveryMatch,
+    FileDataDiscoveryFromConfigRequest,
     FileDataDiscoveryOptions,
     FileDataDiscoveryRequest,
-    FileDataDiscoveryV2Request,
     FileDiscoveryFile,
     FileDiscoveryLocatorResult,
     FileDiscoveryMatch,
@@ -71,10 +71,10 @@
     ReferencingForeignKey,
     RulesetGenerationRequest,
     SchemaDiscoveryColumn,
+    SchemaDiscoveryFromConfigRequest,
     SchemaDiscoveryPage,
     SchemaDiscoveryRequest,
     SchemaDiscoveryResult,
-    SchemaDiscoveryV2Request,
     TableConstraints,
 )
 from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
@@ -144,9 +144,9 @@
     "DynamoConnectionConfig",
     "FailedToStartError",
     "FileConnectionConfig",
+    "FileDataDiscoveryFromConfigRequest",
     "FileDataDiscoveryOptions",
     "FileDataDiscoveryRequest",
-    "FileDataDiscoveryV2Request",
     "FileDiscoveryFile",
     "FileDiscoveryLocatorResult",
     "FileDiscoveryMatch",
@@ -198,10 +198,10 @@
     "RunNotCancellableError",
     "S3ConnectionConfig",
     "SchemaDiscoveryColumn",
+    "SchemaDiscoveryFromConfigRequest",
     "SchemaDiscoveryPage",
     "SchemaDiscoveryRequest",
     "SchemaDiscoveryResult",
-    "SchemaDiscoveryV2Request",
     "SeedFile",
     "SelectedColumns",
     "SelectedData",
diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index 183ab43..a1db3f5 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -20,15 +20,15 @@
     SelectedFileData,
 )
 from datamasque.client.models.discovery import (
+    FileDataDiscoveryFromConfigRequest,
     FileDataDiscoveryRequest,
-    FileDataDiscoveryV2Request,
     FileDiscoveryResult,
     FileRulesetGenerationRequest,
     RulesetGenerationRequest,
+    SchemaDiscoveryFromConfigRequest,
     SchemaDiscoveryPage,
     SchemaDiscoveryRequest,
     SchemaDiscoveryResult,
-    SchemaDiscoveryV2Request,
 )
 from datamasque.client.models.ruleset import Ruleset
 from datamasque.client.models.runs import RunId
@@ -270,16 +270,12 @@ def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> Ru
             response=response,
         )
 
-    def start_schema_discovery_run_v2(self, request: SchemaDiscoveryV2Request) -> RunId:
+    def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromConfigRequest) -> RunId:
         """
-        Starts a schema discovery run from a saved discovery config (v2 API).
-
-        All detection options come from the saved discovery config referenced by `request.discovery_config`;
-        the legacy keyword/schema/in-data-discovery options accepted by `start_schema_discovery_run`
-        are not accepted here.
+        Starts a schema discovery run from a saved discovery config.
 
         Args:
-            request: A `SchemaDiscoveryV2Request` with the connection and saved discovery-config.
+            request: A `SchemaDiscoveryFromConfigRequest` with the connection and an optional saved discovery-config.
 
         Returns:
             RunId: The ID of the started discovery run
@@ -311,15 +307,13 @@ def start_schema_discovery_run_v2(self, request: SchemaDiscoveryV2Request) -> Ru
             response=response,
         )
 
-    def start_file_data_discovery_run_v2(self, request: FileDataDiscoveryV2Request) -> RunId:
+    def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFromConfigRequest) -> RunId:
         """
-        Starts a file data discovery run from a saved discovery config (v2 API).
-
-        All detection options, file-handling parameters, and run options come from the saved discovery
-        config referenced by `request.discovery_config`; no per-run overrides are accepted.
+        Starts a file data discovery run from a saved discovery config.
 
         Args:
-            request: A `FileDataDiscoveryV2Request` with the connection and saved discovery-config.
+            request: A `FileDataDiscoveryFromConfigRequest` with the connection,
+                an optional saved discovery-config, and optional run `options`.
 
         Returns:
             RunId: The ID of the started discovery run
diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 5619258..eb75282 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -37,10 +37,9 @@ class SchemaDiscoveryRequest(BaseModel):
     """
     Request body for `POST /api/schema-discovery/` (the keyword-driven schema-discovery trigger).
 
-    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    This request does not accept a `discovery_config`; to run from a saved discovery config use
-    `start_schema_discovery_run_v2` with a `SchemaDiscoveryV2Request`. Every other field uses the
-    server's default value when omitted.
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
+    returned by an earlier client call.
+    This request does not accept a `discovery_config`
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -65,26 +64,26 @@ def _reject_discovery_config(cls, data: Any) -> Any:
         if isinstance(data, dict) and "discovery_config" in data:
             raise ValueError(
                 "`discovery_config` is not accepted by the keyword-driven schema-discovery request; "
-                "use `start_schema_discovery_run_v2` with a `SchemaDiscoveryV2Request` to run from a saved "
-                "discovery config."
+                "use `start_schema_discovery_run_from_config` with a `SchemaDiscoveryFromConfigRequest` "
+                "to run from a saved discovery config."
             )
         return data
 
 
-class SchemaDiscoveryV2Request(BaseModel):
+class SchemaDiscoveryFromConfigRequest(BaseModel):
     """
     Request body for `POST /api/schema-discovery/v2/` (start a run from a saved discovery config).
 
-    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`, and is a full
-    override: it supplies every detection option, so the legacy keyword/schema/in-data-discovery fields
-    accepted by `SchemaDiscoveryRequest` are rejected here.
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
+    returned by an earlier client call.
+    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`,
+    or `None` to run with the server's default discovery options.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
-    discovery_config: Union[DiscoveryConfigId, DiscoveryConfig]
+    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
 
     @field_validator("connection", mode="before")
     @classmethod
@@ -161,10 +160,9 @@ class FileDataDiscoveryRequest(BaseModel):
     """
     Request body for `POST /api/run-file-data-discovery/` (the keyword-driven file-data-discovery trigger).
 
-    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    This request does not accept a `discovery_config`; to run from a saved discovery config use
-    `start_file_data_discovery_run_v2` with a `FileDataDiscoveryV2Request`. All other fields use the
-    server's default value when omitted.
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
+    returned by an earlier client call.
+    This request does not accept a `discovery_config`.
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -194,27 +192,29 @@ def _reject_discovery_config(cls, data: Any) -> Any:
         if isinstance(data, dict) and "discovery_config" in data:
             raise ValueError(
                 "`discovery_config` is not accepted by the keyword-driven file-data-discovery request; "
-                "use `start_file_data_discovery_run_v2` with a `FileDataDiscoveryV2Request` to run from a saved "
-                "discovery config."
+                "use `start_file_data_discovery_run_from_config` with a `FileDataDiscoveryFromConfigRequest` "
+                "to run from a saved discovery config."
             )
         return data
 
 
-class FileDataDiscoveryV2Request(BaseModel):
+class FileDataDiscoveryFromConfigRequest(BaseModel):
     """
     Request body for `POST /api/run-file-data-discovery/v2/` (start a run from a saved discovery config).
 
-    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call.
-    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`, and is a full
-    override: it supplies every detection option as well as the file-handling and run options, so the
-    legacy keyword/in-data-discovery fields and the file-handling parameters (`recurse`, `include`, `skip`,
-    `encoding`, `workers`) accepted by `FileDataDiscoveryRequest` are rejected here.
+    `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
+    returned by an earlier client call.
+    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`,
+    or `None` to run with the server's default discovery options.
+    `options` carries run-time toggles (`dry_run`, `diagnostic_logging`);
+    detection and file-handling settings come from the discovery config, not the request.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
-    discovery_config: Union[DiscoveryConfigId, DiscoveryConfig]
+    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
+    options: Optional[FileDataDiscoveryOptions] = None
 
     @field_validator("connection", mode="before")
     @classmethod
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 32de2f7..c5f1fd8 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -12,19 +12,19 @@
     DataMasqueClient,
     DiscoveryConfig,
     DiscoveryConfigId,
+    FileDataDiscoveryFromConfigRequest,
     FileDataDiscoveryOptions,
     FileDataDiscoveryRequest,
-    FileDataDiscoveryV2Request,
     FileFilter,
     FileFilterMatchAgainst,
     FileRulesetGenerationRequest,
     InDataDiscoveryConfig,
     RulesetGenerationRequest,
     RunId,
+    SchemaDiscoveryFromConfigRequest,
     SchemaDiscoveryPage,
     SchemaDiscoveryRequest,
     SchemaDiscoveryResult,
-    SchemaDiscoveryV2Request,
 )
 from datamasque.client.exceptions import (
     AsyncRulesetGenerationInProgressError,
@@ -742,18 +742,15 @@ def test_start_schema_discovery_run_raises_on_non_201(client):
             client.start_schema_discovery_run(SchemaDiscoveryRequest(connection="nope"))
 
 
-# --- v1 (legacy) discovery: no discovery_config accepted ---
-
-
 def test_schema_discovery_request_rejects_discovery_config():
-    """The v1 schema-discovery request rejects `discovery_config` and points the user at the v2 method."""
-    with pytest.raises(ValidationError, match="start_schema_discovery_run_v2"):
+    """The v1 schema-discovery request rejects `discovery_config` and points the user at the from-config method."""
+    with pytest.raises(ValidationError, match="start_schema_discovery_run_from_config"):
         SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
 
 
 def test_file_data_discovery_request_rejects_discovery_config():
-    """The v1 file-data-discovery request rejects `discovery_config` and points the user at the v2 method."""
-    with pytest.raises(ValidationError, match="start_file_data_discovery_run_v2"):
+    """The v1 file-data-discovery request rejects `discovery_config` and points the user at the from-config method."""
+    with pytest.raises(ValidationError, match="start_file_data_discovery_run_from_config"):
         FileDataDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
 
 
@@ -865,36 +862,33 @@ def test_start_file_data_discovery_run_raises_on_non_201(client):
             client.start_file_data_discovery_run(FileDataDiscoveryRequest(connection="nope"))
 
 
-# --- v2 discovery: discovery_config only, posted to the /v2/ endpoints ---
-
-
-def test_schema_discovery_v2_request_accepts_discovery_config_id():
+def test_schema_discovery_from_config_request_accepts_discovery_config_id():
     """A `DiscoveryConfigId` string passes through unchanged in the v2 request body."""
-    req = SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
     assert req.model_dump(exclude_none=True, mode="json") == {
         "connection": "conn-1",
         "discovery_config": DISCOVERY_CONFIG_ID,
     }
 
 
-def test_schema_discovery_v2_request_unwraps_discovery_config_model():
+def test_schema_discovery_from_config_request_unwraps_discovery_config_model():
     """Passing a full `DiscoveryConfig` object substitutes its `id` for the wire payload."""
     config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
-    req = SchemaDiscoveryV2Request(connection="conn-1", discovery_config=config)
+    req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
     assert req.model_dump(exclude_none=True, mode="json")["discovery_config"] == DISCOVERY_CONFIG_ID
 
 
-def test_schema_discovery_v2_request_rejects_unsaved_discovery_config():
+def test_schema_discovery_from_config_request_rejects_unsaved_discovery_config():
     """A `DiscoveryConfig` without an `id` cannot be used yet — raises immediately."""
     config = DiscoveryConfig(name="my_cfg")
     with pytest.raises(ValueError, match="id is None"):
-        SchemaDiscoveryV2Request(connection="conn-1", discovery_config=config)
+        SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
 
 
-def test_schema_discovery_v2_request_rejects_legacy_fields():
-    """`config_id` is a full override — the v2 request rejects any legacy detection options."""
+def test_schema_discovery_from_config_request_rejects_legacy_fields():
+    """The saved-config request rejects any legacy detection options — they live in the config."""
     with pytest.raises(ValidationError):
-        SchemaDiscoveryV2Request(
+        SchemaDiscoveryFromConfigRequest(
             connection="conn-1",
             discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
             schemas=["public"],
@@ -906,7 +900,6 @@ def test_schema_discovery_v2_request_rejects_legacy_fields():
     [
         {"custom_keywords": ["foo"]},
         {"in_data_discovery": {"enabled": True}},
-        {"options": {"dry_run": True}},
         {"recurse": True},
         {"include": [{"glob": "*.csv"}]},
         {"skip": [{"glob": "*.tmp"}]},
@@ -914,38 +907,82 @@ def test_schema_discovery_v2_request_rejects_legacy_fields():
         {"workers": 4},
     ],
 )
-def test_file_data_discovery_v2_request_rejects_non_config_fields(extra_field):
-    """The config is a full override: detection options, file-handling params, and run options are all rejected."""
+def test_file_data_discovery_from_config_request_rejects_non_config_fields(extra_field):
+    """Detection options and file-handling params are rejected — they live in the config, not the request."""
     with pytest.raises(ValidationError):
-        FileDataDiscoveryV2Request(
+        FileDataDiscoveryFromConfigRequest(
             connection="conn-1",
             discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
             **extra_field,
         )
 
 
-def test_start_schema_discovery_run_v2_sends_discovery_config(client):
-    """`start_schema_discovery_run_v2` posts the discovery_config id to the v2 endpoint."""
-    req = SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+def test_schema_discovery_from_config_request_allows_none_discovery_config():
+    """`discovery_config` may be omitted or None; it is dropped from the payload so the server uses its defaults."""
+    omitted = SchemaDiscoveryFromConfigRequest(connection="conn-1")
+    explicit_none = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
+    assert omitted.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
+    assert explicit_none.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
+
+
+def test_file_data_discovery_from_config_request_allows_none_discovery_config():
+    """`discovery_config` may be omitted or None; it is dropped from the payload so the server uses its defaults."""
+    omitted = FileDataDiscoveryFromConfigRequest(connection="conn-1")
+    explicit_none = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
+    assert omitted.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
+    assert explicit_none.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
+
+
+def test_start_schema_discovery_run_from_config_sends_discovery_config(client):
+    """`start_schema_discovery_run_from_config` posts the discovery_config id to the saved-config endpoint."""
+    req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
     with requests_mock.Mocker() as m:
         m.post("http://test-server/api/schema-discovery/v2/", json={"id": 11}, status_code=201)
-        assert client.start_schema_discovery_run_v2(req) == 11
+        assert client.start_schema_discovery_run_from_config(req) == 11
 
     assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID}
 
 
-def test_start_file_data_discovery_run_v2_sends_discovery_config(client):
-    """`start_file_data_discovery_run_v2` posts only the connection and discovery_config id to the v2 endpoint."""
+def test_start_file_data_discovery_run_from_config_sends_discovery_config(client):
+    """`start_file_data_discovery_run_from_config` posts only the connection and discovery_config id."""
     config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
-    req = FileDataDiscoveryV2Request(connection="conn-1", discovery_config=config)
+    req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
     with requests_mock.Mocker() as m:
         m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 99}, status_code=201)
-        assert client.start_file_data_discovery_run_v2(req) == 99
+        assert client.start_file_data_discovery_run_from_config(req) == 99
 
     assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID}
 
 
-def test_start_schema_discovery_run_v2_raises_invalid_discovery_config_when_not_valid(client):
+def test_start_file_data_discovery_run_from_config_sends_options(client):
+    """`options` (dry_run / diagnostic_logging) is posted alongside the config on the file from-config trigger."""
+    req = FileDataDiscoveryFromConfigRequest(
+        connection="conn-1",
+        discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
+        options=FileDataDiscoveryOptions(dry_run=True, diagnostic_logging=True),
+    )
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 77}, status_code=201)
+        assert client.start_file_data_discovery_run_from_config(req) == 77
+
+    assert m.last_request.json() == {
+        "connection": "conn-1",
+        "discovery_config": DISCOVERY_CONFIG_ID,
+        "options": {"dry_run": True, "diagnostic_logging": True},
+    }
+
+
+def test_start_schema_discovery_run_from_config_none_omits_discovery_config(client):
+    """With `discovery_config=None` the client posts only the connection; the server falls back to its defaults."""
+    req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/schema-discovery/v2/", json={"id": 13}, status_code=201)
+        assert client.start_schema_discovery_run_from_config(req) == 13
+
+    assert m.last_request.json() == {"connection": "conn-1"}
+
+
+def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_when_not_valid(client):
     """A 400 with a `discovery_config` validation message raises the specific subclass."""
     with requests_mock.Mocker() as m:
         m.post(
@@ -959,12 +996,14 @@ def test_start_schema_discovery_run_v2_raises_invalid_discovery_config_when_not_
             InvalidDiscoveryConfigError,
             match=r'Schema discovery run failed to start due to discovery config error: .*"my_cfg".*invalid',
         ):
-            client.start_schema_discovery_run_v2(
-                SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            client.start_schema_discovery_run_from_config(
+                SchemaDiscoveryFromConfigRequest(
+                    connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
+                ),
             )
 
 
-def test_start_schema_discovery_run_v2_raises_invalid_discovery_config_when_missing(client):
+def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_when_missing(client):
     """A 400 from DRF's PrimaryKeyRelatedField (config not found / archived) is also classified."""
     with requests_mock.Mocker() as m:
         m.post(
@@ -973,12 +1012,14 @@ def test_start_schema_discovery_run_v2_raises_invalid_discovery_config_when_miss
             status_code=400,
         )
         with pytest.raises(InvalidDiscoveryConfigError, match="object does not exist"):
-            client.start_schema_discovery_run_v2(
-                SchemaDiscoveryV2Request(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            client.start_schema_discovery_run_from_config(
+                SchemaDiscoveryFromConfigRequest(
+                    connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
+                ),
             )
 
 
-def test_start_file_data_discovery_run_v2_raises_invalid_discovery_config(client):
+def test_start_file_data_discovery_run_from_config_raises_invalid_discovery_config(client):
     with requests_mock.Mocker() as m:
         m.post(
             "http://test-server/api/run-file-data-discovery/v2/",
@@ -991,15 +1032,15 @@ def test_start_file_data_discovery_run_v2_raises_invalid_discovery_config(client
             InvalidDiscoveryConfigError,
             match=r"File data discovery run failed to start due to discovery config error: .*in_progress",
         ):
-            client.start_file_data_discovery_run_v2(
-                FileDataDiscoveryV2Request(
+            client.start_file_data_discovery_run_from_config(
+                FileDataDiscoveryFromConfigRequest(
                     connection="conn-1",
                     discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
                 ),
             )
 
 
-def test_start_schema_discovery_run_v2_non_discovery_config_400_still_raises_generic_error(client):
+def test_start_schema_discovery_run_from_config_non_discovery_config_400_still_raises_generic_error(client):
     """Other 400s (e.g. unknown connection) keep raising the base FailedToStartError, not the config subclass."""
     with requests_mock.Mocker() as m:
         m.post(
@@ -1008,7 +1049,9 @@ def test_start_schema_discovery_run_v2_non_discovery_config_400_still_raises_gen
             status_code=400,
         )
         with pytest.raises(FailedToStartError) as exc_info:
-            client.start_schema_discovery_run_v2(
-                SchemaDiscoveryV2Request(connection="nope", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)),
+            client.start_schema_discovery_run_from_config(
+                SchemaDiscoveryFromConfigRequest(
+                    connection="nope", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
+                ),
             )
         assert not isinstance(exc_info.value, InvalidDiscoveryConfigError)

From ee52e054b147bdea9d71f60ee407684978439b12 Mon Sep 17 00:00:00 2001
From: Andrew Chester <andrew.chester@datamasque.com>
Date: Thu, 11 Jun 2026 19:02:00 +1200
Subject: [PATCH 12/26] refactor: Drop unused dry_run from file discovery
 options and condense the changelog

---
 HISTORY.rst                           | 10 ++++++----
 datamasque/client/models/discovery.py |  5 ++---
 tests/test_discovery.py               | 10 +++++-----
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 2cbee8b..584d294 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -5,10 +5,12 @@ History
 1.1.0 (unreleased)
 ------------------
 
-* Added discovery-config management APIs on ``DataMasqueClient`` (``list_discovery_configs``, ``get_discovery_config``, ``create_discovery_config``, ``update_discovery_config``, and friends), backed by the ``DiscoveryConfig`` / ``DiscoveryConfigId`` models.
-* Added ``start_schema_discovery_run_from_config`` and ``start_file_data_discovery_run_from_config`` for starting discovery runs from a saved discovery config, via the ``/api/schema-discovery/v2/`` and ``/api/run-file-data-discovery/v2/`` endpoints. Their request models (``SchemaDiscoveryFromConfigRequest``, ``FileDataDiscoveryFromConfigRequest``) take a ``connection`` and an optional ``discovery_config`` (``None`` runs with the server's default discovery options); the legacy keyword/schema/in-data-discovery fields and the file-handling parameters (``recurse``, ``include``, ``skip``, ``encoding``, ``workers``) are rejected because the config owns them. ``FileDataDiscoveryFromConfigRequest`` also accepts run ``options`` (``dry_run``, ``diagnostic_logging``).
-* The v1 ``start_schema_discovery_run`` and ``start_file_data_discovery_run`` endpoints are unchanged and do not accept a discovery config; ``start_file_data_discovery_run`` (with ``FileDataDiscoveryRequest``, the ``FileFilter`` include/skip model, and ``FileDataDiscoveryOptions``) is newly exposed for ad-hoc file data discovery.
-* Added ``InvalidDiscoveryConfigError``, raised by the from-config run-start methods when the referenced config is missing, archived, or not in a ``valid`` validation state.
+* Added discovery-config management APIs (``list_discovery_configs``, ``create_discovery_config``, and friends).
+* Added schema-discovery and file-data-discovery APIs that take a saved discovery configuration
+  (``start_schema_discovery_run_from_config`` / ``start_file_data_discovery_run_from_config``).
+  Adoption is recommended; the older APIs that take individual options will be deprecated in a future release.
+* Corrected the file-data-discovery ``include``/``skip`` filter syntax and added ``ignore_rules`` support.
+* Added ``InvalidDiscoveryConfigError``, raised when a discovery run can't start due to an invalid discovery config.
 
 1.0.5 (2026-06-18)
 ------------------
diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index eb75282..86215ab 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -39,7 +39,7 @@ class SchemaDiscoveryRequest(BaseModel):
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
     returned by an earlier client call.
-    This request does not accept a `discovery_config`
+    This request does not accept a `discovery_config`.
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -152,7 +152,6 @@ class FileDataDiscoveryOptions(BaseModel):
 
     model_config = ConfigDict(extra="forbid")
 
-    dry_run: Optional[bool] = None
     diagnostic_logging: Optional[bool] = None
 
 
@@ -206,7 +205,7 @@ class FileDataDiscoveryFromConfigRequest(BaseModel):
     returned by an earlier client call.
     `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`,
     or `None` to run with the server's default discovery options.
-    `options` carries run-time toggles (`dry_run`, `diagnostic_logging`);
+    `options` carries the `diagnostic_logging` run-time toggle;
     detection and file-handling settings come from the discovery config, not the request.
     """
 
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index c5f1fd8..7305d26 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -775,7 +775,7 @@ def test_start_file_data_discovery_run_full(client):
     """All legacy FDD request fields populate the wire payload and pass through unwrap helpers."""
     req = FileDataDiscoveryRequest(
         connection="conn-1",
-        options=FileDataDiscoveryOptions(dry_run=True, diagnostic_logging=True),
+        options=FileDataDiscoveryOptions(diagnostic_logging=True),
         custom_keywords=["foo"],
         ignored_keywords=["bar"],
         disable_built_in_keywords=True,
@@ -799,7 +799,7 @@ def test_start_file_data_discovery_run_full(client):
     body = m.last_request.json()
     assert body == {
         "connection": "conn-1",
-        "options": {"dry_run": True, "diagnostic_logging": True},
+        "options": {"diagnostic_logging": True},
         "custom_keywords": ["foo"],
         "ignored_keywords": ["bar"],
         "disable_built_in_keywords": True,
@@ -955,11 +955,11 @@ def test_start_file_data_discovery_run_from_config_sends_discovery_config(client
 
 
 def test_start_file_data_discovery_run_from_config_sends_options(client):
-    """`options` (dry_run / diagnostic_logging) is posted alongside the config on the file from-config trigger."""
+    """`options` (diagnostic_logging) is posted alongside the config on the file from-config trigger."""
     req = FileDataDiscoveryFromConfigRequest(
         connection="conn-1",
         discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
-        options=FileDataDiscoveryOptions(dry_run=True, diagnostic_logging=True),
+        options=FileDataDiscoveryOptions(diagnostic_logging=True),
     )
     with requests_mock.Mocker() as m:
         m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 77}, status_code=201)
@@ -968,7 +968,7 @@ def test_start_file_data_discovery_run_from_config_sends_options(client):
     assert m.last_request.json() == {
         "connection": "conn-1",
         "discovery_config": DISCOVERY_CONFIG_ID,
-        "options": {"dry_run": True, "diagnostic_logging": True},
+        "options": {"diagnostic_logging": True},
     }
 
 

From 1f2745805d5a2cb4c095d8465ff8f744ee06897f Mon Sep 17 00:00:00 2001
From: Andrew Chester <andrew.chester@datamasque.com>
Date: Fri, 12 Jun 2026 16:47:30 +1200
Subject: [PATCH 13/26] feat(discovery): DM-3562 send explicit null
 discovery_config from saved-config triggers

---
 datamasque/client/discovery.py        | 14 ++++++--
 datamasque/client/models/discovery.py |  8 ++---
 tests/test_discovery.py               | 48 ++++++++++++++++++---------
 3 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index a1db3f5..ca27772 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -275,7 +275,8 @@ def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromCon
         Starts a schema discovery run from a saved discovery config.
 
         Args:
-            request: A `SchemaDiscoveryFromConfigRequest` with the connection and an optional saved discovery-config.
+            request: A `SchemaDiscoveryFromConfigRequest` with the `connection` and a required `discovery_config`
+                (a saved config, or `None` for the server's defaults).
 
         Returns:
             RunId: The ID of the started discovery run
@@ -287,6 +288,9 @@ def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromCon
         """
 
         data = request.model_dump(exclude_none=True, mode="json")
+        # The server requires `discovery_config` to be present; a null selects its built-in defaults,
+        # so send it explicitly rather than letting `exclude_none` drop a None.
+        data.setdefault("discovery_config", None)
         response = self.make_request(
             "POST",
             "/api/schema-discovery/v2/",
@@ -312,8 +316,9 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
         Starts a file data discovery run from a saved discovery config.
 
         Args:
-            request: A `FileDataDiscoveryFromConfigRequest` with the connection,
-                an optional saved discovery-config, and optional run `options`.
+            request: A `FileDataDiscoveryFromConfigRequest` with the `connection`,
+                a required `discovery_config` (a saved config, or `None` for the server's defaults),
+                and optional run `options`.
 
         Returns:
             RunId: The ID of the started discovery run
@@ -325,6 +330,9 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
         """
 
         data = request.model_dump(exclude_none=True, mode="json")
+        # The server requires `discovery_config` to be present; a null selects its built-in defaults,
+        # so send it explicitly rather than letting `exclude_none` drop a None.
+        data.setdefault("discovery_config", None)
         response = self.make_request(
             "POST",
             "/api/run-file-data-discovery/v2/",
diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 86215ab..0fce513 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -76,14 +76,14 @@ class SchemaDiscoveryFromConfigRequest(BaseModel):
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
     returned by an earlier client call.
-    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`,
+    `discovery_config` is required: pass a `DiscoveryConfigId`, a full `DiscoveryConfig`,
     or `None` to run with the server's default discovery options.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
-    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
+    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]]
 
     @field_validator("connection", mode="before")
     @classmethod
@@ -203,7 +203,7 @@ class FileDataDiscoveryFromConfigRequest(BaseModel):
 
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
     returned by an earlier client call.
-    `discovery_config` accepts either a `DiscoveryConfigId` or a full `DiscoveryConfig`,
+    `discovery_config` is required: pass a `DiscoveryConfigId`, a full `DiscoveryConfig`,
     or `None` to run with the server's default discovery options.
     `options` carries the `diagnostic_logging` run-time toggle;
     detection and file-handling settings come from the discovery config, not the request.
@@ -212,7 +212,7 @@ class FileDataDiscoveryFromConfigRequest(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
-    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] = None
+    discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]]
     options: Optional[FileDataDiscoveryOptions] = None
 
     @field_validator("connection", mode="before")
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 7305d26..dcf736a 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -917,20 +917,28 @@ def test_file_data_discovery_from_config_request_rejects_non_config_fields(extra
         )
 
 
-def test_schema_discovery_from_config_request_allows_none_discovery_config():
-    """`discovery_config` may be omitted or None; it is dropped from the payload so the server uses its defaults."""
-    omitted = SchemaDiscoveryFromConfigRequest(connection="conn-1")
-    explicit_none = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
-    assert omitted.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
-    assert explicit_none.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
+def test_schema_discovery_from_config_request_requires_discovery_config():
+    """`discovery_config` must be set explicitly; omitting it is a validation error (it may be None, but not absent)."""
+    with pytest.raises(ValidationError, match="discovery_config"):
+        SchemaDiscoveryFromConfigRequest(connection="conn-1")
 
 
-def test_file_data_discovery_from_config_request_allows_none_discovery_config():
-    """`discovery_config` may be omitted or None; it is dropped from the payload so the server uses its defaults."""
-    omitted = FileDataDiscoveryFromConfigRequest(connection="conn-1")
-    explicit_none = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
-    assert omitted.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
-    assert explicit_none.model_dump(exclude_none=True, mode="json") == {"connection": "conn-1"}
+def test_schema_discovery_from_config_request_accepts_none_discovery_config():
+    """An explicit None is accepted and means the server uses its built-in defaults."""
+    req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
+    assert req.discovery_config is None
+
+
+def test_file_data_discovery_from_config_request_requires_discovery_config():
+    """`discovery_config` must be set explicitly; omitting it is a validation error (it may be None, but not absent)."""
+    with pytest.raises(ValidationError, match="discovery_config"):
+        FileDataDiscoveryFromConfigRequest(connection="conn-1")
+
+
+def test_file_data_discovery_from_config_request_accepts_none_discovery_config():
+    """An explicit None is accepted and means the server uses its built-in defaults."""
+    req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
+    assert req.discovery_config is None
 
 
 def test_start_schema_discovery_run_from_config_sends_discovery_config(client):
@@ -972,14 +980,24 @@ def test_start_file_data_discovery_run_from_config_sends_options(client):
     }
 
 
-def test_start_schema_discovery_run_from_config_none_omits_discovery_config(client):
-    """With `discovery_config=None` the client posts only the connection; the server falls back to its defaults."""
+def test_start_schema_discovery_run_from_config_none_sends_null_discovery_config(client):
+    """With `discovery_config=None` the client posts an explicit null so the server applies its defaults."""
     req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
     with requests_mock.Mocker() as m:
         m.post("http://test-server/api/schema-discovery/v2/", json={"id": 13}, status_code=201)
         assert client.start_schema_discovery_run_from_config(req) == 13
 
-    assert m.last_request.json() == {"connection": "conn-1"}
+    assert m.last_request.json() == {"connection": "conn-1", "discovery_config": None}
+
+
+def test_start_file_data_discovery_run_from_config_none_sends_null_discovery_config(client):
+    """With `discovery_config=None` the file-data trigger posts an explicit null so the server applies its defaults."""
+    req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None)
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 21}, status_code=201)
+        assert client.start_file_data_discovery_run_from_config(req) == 21
+
+    assert m.last_request.json() == {"connection": "conn-1", "discovery_config": None}
 
 
 def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_when_not_valid(client):

From 76a3921f9b06a68e3b04b1965234df075f3237a1 Mon Sep 17 00:00:00 2001
From: Andrew Chester <andrew.chester@datamasque.com>
Date: Mon, 15 Jun 2026 10:19:39 +1200
Subject: [PATCH 14/26] feat(discovery): DM-3562 accept schemas on saved-config
 schema discovery

---
 datamasque/client/models/discovery.py |  4 +++-
 tests/test_discovery.py               | 34 +++++++++++++++++++++++++--
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 0fce513..6a41b1c 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -77,13 +77,15 @@ class SchemaDiscoveryFromConfigRequest(BaseModel):
     `connection` accepts either a `ConnectionId` or a full `ConnectionConfig`
     returned by an earlier client call.
     `discovery_config` is required: pass a `DiscoveryConfigId`, a full `DiscoveryConfig`,
-    or `None` to run with the server's default discovery options.
+    or `None` to run with the default discovery options.
+    `schemas` optionally scopes the run to specific schemas; omit it to scan the connection's default schema.
     """
 
     model_config = ConfigDict(extra="forbid")
 
     connection: Union[ConnectionId, ConnectionConfig]
     discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]]
+    schemas: Optional[list[str]] = None
 
     @field_validator("connection", mode="before")
     @classmethod
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index dcf736a..0e14e57 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -886,15 +886,29 @@ def test_schema_discovery_from_config_request_rejects_unsaved_discovery_config()
 
 
 def test_schema_discovery_from_config_request_rejects_legacy_fields():
-    """The saved-config request rejects any legacy detection options — they live in the config."""
+    """The saved-config request rejects legacy detection options — they live in the config."""
     with pytest.raises(ValidationError):
         SchemaDiscoveryFromConfigRequest(
             connection="conn-1",
             discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
-            schemas=["public"],
+            custom_keywords=["foo"],
         )
 
 
+def test_schema_discovery_from_config_request_accepts_schemas():
+    """`schemas` scopes the saved-config schema run to specific schemas and is forwarded as-is."""
+    req = SchemaDiscoveryFromConfigRequest(
+        connection="conn-1",
+        discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID),
+        schemas=["public", "sales"],
+    )
+    assert req.model_dump(exclude_none=True, mode="json") == {
+        "connection": "conn-1",
+        "discovery_config": DISCOVERY_CONFIG_ID,
+        "schemas": ["public", "sales"],
+    }
+
+
 @pytest.mark.parametrize(
     "extra_field",
     [
@@ -951,6 +965,22 @@ def test_start_schema_discovery_run_from_config_sends_discovery_config(client):
     assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID}
 
 
+def test_start_schema_discovery_run_from_config_sends_schemas(client):
+    """`start_schema_discovery_run_from_config` forwards a `schemas` scope to the saved-config endpoint."""
+    req = SchemaDiscoveryFromConfigRequest(
+        connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID), schemas=["public"]
+    )
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/schema-discovery/v2/", json={"id": 12}, status_code=201)
+        assert client.start_schema_discovery_run_from_config(req) == 12
+
+    assert m.last_request.json() == {
+        "connection": "conn-1",
+        "discovery_config": DISCOVERY_CONFIG_ID,
+        "schemas": ["public"],
+    }
+
+
 def test_start_file_data_discovery_run_from_config_sends_discovery_config(client):
     """`start_file_data_discovery_run_from_config` posts only the connection and discovery_config id."""
     config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))

From 11b75e4a734a1eba1d2a4fb7eabb3eacce4886c5 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Tue, 16 Jun 2026 10:49:04 +1200
Subject: [PATCH 15/26] feat: DM-3400: add missing client features for the
 release-testing migration

- Add `is_user_subscribed` to `MaskingRunRequest` (subscribe the requesting user to run notifications).
- Add `auto_pull` / `auto_pull_branch` to `MaskingRunOptions` (refresh the ruleset from git before a run).
- Surface `validation_error` (and `validation_error_type` for rulesets) on `Ruleset` and `RulesetLibrary`.
- Expose git provenance as a nested `git` (`GitSnapshot`) field on both, collapsed from the flat `git_*` fields.
- Exclude read-only fields (`id`, `is_valid`, `validation_error`, `git`, ...) from create/update request bodies.
- Fix `SslZipFile` uploads to send the required `database_type=mysql` form field.
---
 HISTORY.rst                                 |  10 ++
 datamasque/client/__init__.py               |  10 +-
 datamasque/client/files.py                  |   2 +-
 datamasque/client/models/files.py           |  11 ++
 datamasque/client/models/git.py             |  60 ++++++++
 datamasque/client/models/ruleset.py         |  19 ++-
 datamasque/client/models/ruleset_library.py |  19 +--
 datamasque/client/models/runs.py            |   9 ++
 datamasque/client/models/status.py          |   9 ++
 datamasque/client/ruleset_libraries.py      |   7 +-
 datamasque/client/rulesets.py               |  16 +--
 tests/test_files.py                         |  39 +++++
 tests/test_ruleset_library.py               | 120 ++++++++++++++++
 tests/test_rulesets.py                      | 151 +++++++++++++++++++-
 tests/test_runs.py                          |  47 ++++++
 15 files changed, 500 insertions(+), 29 deletions(-)
 create mode 100644 datamasque/client/models/git.py

diff --git a/HISTORY.rst b/HISTORY.rst
index 584d294..67055e4 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -11,6 +11,16 @@ History
   Adoption is recommended; the older APIs that take individual options will be deprecated in a future release.
 * Corrected the file-data-discovery ``include``/``skip`` filter syntax and added ``ignore_rules`` support.
 * Added ``InvalidDiscoveryConfigError``, raised when a discovery run can't start due to an invalid discovery config.
+* Added ``is_user_subscribed`` to ``MaskingRunRequest``
+  to subscribe the requesting user to a run's email notifications.
+* Added ``auto_pull`` / ``auto_pull_branch`` to ``MaskingRunOptions``
+  to refresh the run's ruleset from git before starting.
+* Added ``validation_error`` (and ``validation_error_type`` for rulesets)
+  to ``Ruleset`` and ``RulesetLibrary``.
+* Exposed git provenance on ``Ruleset`` and ``RulesetLibrary`` as a nested ``git`` field (``GitSnapshot``).
+* Read-only fields (``id``, ``is_valid``, ``validation_error``, etc.)
+  are no longer echoed back in ``Ruleset`` / ``RulesetLibrary`` create/update request bodies.
+* Fixed ``SslZipFile`` uploads to send the required ``database_type=mysql`` form field.
 
 1.0.5 (2026-06-18)
 ------------------
diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index 304864f..7908151 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -87,6 +87,7 @@
     SnowflakeKeyFile,
     SslZipFile,
 )
+from datamasque.client.models.git import GitSnapshot
 from datamasque.client.models.ifm import (
     DataMasqueIfmInstanceConfig,
     IfmLog,
@@ -112,7 +113,12 @@
     RunInfo,
     UnfinishedRun,
 )
-from datamasque.client.models.status import AsyncRulesetGenerationTaskStatus, MaskingRunStatus, ValidationStatus
+from datamasque.client.models.status import (
+    AsyncRulesetGenerationTaskStatus,
+    MaskingRunStatus,
+    ValidationErrorType,
+    ValidationStatus,
+)
 from datamasque.client.models.user import User, UserId, UserRole
 
 __version__ = version("datamasque-python")
@@ -157,6 +163,7 @@
     "FileOrContent",
     "FileRulesetGenerationRequest",
     "ForeignKeyRef",
+    "GitSnapshot",
     "HashColumnsTableConfig",
     "IfmAuthError",
     "IfmLog",
@@ -219,5 +226,6 @@
     "UserId",
     "UserRole",
     "UserSelection",
+    "ValidationErrorType",
     "ValidationStatus",
 ]
diff --git a/datamasque/client/files.py b/datamasque/client/files.py
index 2f476e6..89d1a67 100644
--- a/datamasque/client/files.py
+++ b/datamasque/client/files.py
@@ -30,7 +30,7 @@ def upload_file(
         response = self.make_request(
             "POST",
             file_type.get_url(),
-            data={"name": file_name},
+            data={"name": file_name, **file_type.get_extra_form_data()},
             files=[
                 UploadFile(
                     field_name=file_type.get_content_param_name(),
diff --git a/datamasque/client/models/files.py b/datamasque/client/models/files.py
index 59bfb7f..13c9846 100644
--- a/datamasque/client/models/files.py
+++ b/datamasque/client/models/files.py
@@ -40,6 +40,12 @@ def get_content_param_name(cls) -> str:
 
         raise NotImplementedError  # pragma: no cover
 
+    @classmethod
+    def get_extra_form_data(cls) -> dict[str, str]:
+        """Extra multipart form fields to send alongside the file on upload. Empty by default."""
+
+        return {}
+
 
 class SeedFile(DataMasqueFile):
     """Represents a seed file (CSV file)."""
@@ -76,6 +82,11 @@ def get_url(cls) -> str:
     def get_content_param_name(cls) -> str:
         return "zip_archive"
 
+    @classmethod
+    def get_extra_form_data(cls) -> dict[str, str]:
+        # The connection-filesets endpoint requires a database_type; SSL filesets are MySQL-only today.
+        return {"database_type": "mysql"}
+
 
 class SnowflakeKeyFile(DataMasqueFile):
     """Represents a private SSH key file for Snowflake connections."""
diff --git a/datamasque/client/models/git.py b/datamasque/client/models/git.py
new file mode 100644
index 0000000..df8e71a
--- /dev/null
+++ b/datamasque/client/models/git.py
@@ -0,0 +1,60 @@
+from collections.abc import Mapping
+from datetime import datetime
+from typing import Any, Optional
+
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+
+
+class GitSnapshot(BaseModel):
+    """
+    Git provenance for a ruleset or ruleset library.
+
+    Identifies the commit the entity's contents came from —
+    `commit_sha` on `branch` in `repo_url`, as of `synced_at`.
+    """
+
+    branch: str
+    commit_sha: str
+    repo_url: str
+    synced_at: datetime
+
+
+GIT_RESPONSE_FIELDS = ("git_branch", "git_commit_sha", "git_repo_url", "git_synced_at")
+
+
+def git_snapshot_from_response(data: Mapping[str, Any]) -> Optional[GitSnapshot]:
+    """Build a `GitSnapshot` from the server's flat `git_*` fields, or `None` when not git-synced."""
+    if data.get("git_branch") is None:
+        return None
+
+    return GitSnapshot(
+        branch=data["git_branch"],
+        commit_sha=data["git_commit_sha"],
+        repo_url=data["git_repo_url"],
+        synced_at=data["git_synced_at"],
+    )
+
+
+class GitTrackedEntity(BaseModel):
+    """Base class for rulesets and ruleset libraries that carry git provenance."""
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    git: Optional[GitSnapshot] = Field(default=None, exclude=True)
+    """Git provenance, or `None` when the entity is not currently in sync with a git commit."""
+
+    @model_validator(mode="before")
+    @classmethod
+    def _collapse_git_fields(cls, data: Any) -> Any:
+        if not isinstance(data, dict) or "git" in data:
+            return data
+
+        data = dict(data)
+        snapshot = git_snapshot_from_response(data)
+        for field in GIT_RESPONSE_FIELDS:
+            data.pop(field, None)
+
+        if snapshot is not None:
+            data["git"] = snapshot
+
+        return data
diff --git a/datamasque/client/models/ruleset.py b/datamasque/client/models/ruleset.py
index 564b7c1..362fc8b 100644
--- a/datamasque/client/models/ruleset.py
+++ b/datamasque/client/models/ruleset.py
@@ -1,9 +1,10 @@
 import enum
 from typing import Any, NewType, Optional
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import Field
 
-from datamasque.client.models.status import ValidationStatus
+from datamasque.client.models.git import GitTrackedEntity
+from datamasque.client.models.status import ValidationErrorType, ValidationStatus
 
 RulesetId = NewType("RulesetId", str)
 
@@ -33,13 +34,17 @@ class RulesetType(enum.Enum):
     database = "database"
 
 
-class Ruleset(BaseModel):
+class Ruleset(GitTrackedEntity):
     """Represents a ruleset."""
 
-    model_config = ConfigDict(extra="allow", populate_by_name=True)
-
     name: str
     yaml: str = Field(default="", alias="config_yaml")
     ruleset_type: RulesetType = Field(default=RulesetType.database, alias="mask_type")
-    id: Optional[RulesetId] = None
-    is_valid: Optional[ValidationStatus] = None
+
+    # Server-populated read-only fields, excluded from request bodies.
+    id: Optional[RulesetId] = Field(default=None, exclude=True)
+    is_valid: Optional[ValidationStatus] = Field(default=None, exclude=True)
+    validation_error: Optional[str] = Field(default=None, exclude=True)
+    """Human-readable validation error, or `None` when valid."""
+    validation_error_type: Optional[ValidationErrorType] = Field(default=None, exclude=True)
+    """Category of the validation failure, or `None` when valid."""
diff --git a/datamasque/client/models/ruleset_library.py b/datamasque/client/models/ruleset_library.py
index 1ebe789..d1e1d3b 100644
--- a/datamasque/client/models/ruleset_library.py
+++ b/datamasque/client/models/ruleset_library.py
@@ -1,22 +1,25 @@
 from datetime import datetime
 from typing import NewType, Optional
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import Field
 
+from datamasque.client.models.git import GitTrackedEntity
 from datamasque.client.models.status import ValidationStatus
 
 RulesetLibraryId = NewType("RulesetLibraryId", str)
 
 
-class RulesetLibrary(BaseModel):
+class RulesetLibrary(GitTrackedEntity):
     """Represents a ruleset library."""
 
-    model_config = ConfigDict(extra="allow", populate_by_name=True)
-
     name: str
     namespace: str = ""
     yaml: Optional[str] = Field(default=None, alias="config_yaml")
-    id: Optional[RulesetLibraryId] = None
-    is_valid: Optional[ValidationStatus] = None
-    created: Optional[datetime] = None
-    modified: Optional[datetime] = None
+
+    # Server-populated read-only fields, excluded from request bodies.
+    id: Optional[RulesetLibraryId] = Field(default=None, exclude=True)
+    is_valid: Optional[ValidationStatus] = Field(default=None, exclude=True)
+    validation_error: Optional[str] = Field(default=None, exclude=True)
+    """Human-readable validation error, or `None` when valid."""
+    created: Optional[datetime] = Field(default=None, exclude=True)
+    modified: Optional[datetime] = Field(default=None, exclude=True)
diff --git a/datamasque/client/models/runs.py b/datamasque/client/models/runs.py
index 379f82a..dc15663 100644
--- a/datamasque/client/models/runs.py
+++ b/datamasque/client/models/runs.py
@@ -30,6 +30,7 @@ class MaskingRunOptions(BaseModel):
     if supplied,
     must be 16–256 characters and is used as the per-run encryption key;
     the server auto-generates one when omitted.
+    Set `auto_pull` to refresh the run's ruleset from git before the run starts.
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -41,6 +42,10 @@ class MaskingRunOptions(BaseModel):
     diagnostic_logging: Optional[bool] = None
     run_secret: Optional[str] = Field(default=None, min_length=16, max_length=256)
     disable_instance_secret: Optional[bool] = None
+    auto_pull: Optional[bool] = None
+    """When `True`, pull the run's ruleset from git before starting."""
+    auto_pull_branch: Optional[str] = None
+    """Branch to auto-pull from; omitted or empty uses the instance's configured branch."""
 
 
 class MaskingRunRequest(BaseModel):
@@ -50,6 +55,9 @@ class MaskingRunRequest(BaseModel):
     `connection`, `destination_connection`, and `ruleset` accept either the server-assigned ID
     or the corresponding object returned by an earlier client call (e.g. a `ConnectionConfig`
     or `Ruleset`); the object's `id` is extracted at construction time.
+
+    Set `is_user_subscribed=True` to subscribe the requesting user to the run's email notifications;
+    when omitted the server applies its default (no subscription).
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -60,6 +68,7 @@ class MaskingRunRequest(BaseModel):
     destination_connection: Optional[Union[ConnectionId, ConnectionConfig]] = None
     options: MaskingRunOptions = Field(default_factory=MaskingRunOptions)
     name: Optional[str] = None
+    is_user_subscribed: Optional[bool] = None
 
     @field_validator("connection", "destination_connection", mode="before")
     @classmethod
diff --git a/datamasque/client/models/status.py b/datamasque/client/models/status.py
index b09e18f..f470fd0 100644
--- a/datamasque/client/models/status.py
+++ b/datamasque/client/models/status.py
@@ -10,6 +10,15 @@ class ValidationStatus(enum.Enum):
     unknown = "unknown"
 
 
+class ValidationErrorType(enum.Enum):
+    """Categorises why a ruleset failed validation (see `Ruleset.validation_error_type`)."""
+
+    ruleset = "ruleset"
+    library_missing = "library_missing"
+    library_invalid = "library_invalid"
+    expansion = "expansion"  # The ruleset is not valid once its library references are expanded.
+
+
 class MaskingRunStatus(enum.Enum):
     """List of valid masking run statuses."""
 
diff --git a/datamasque/client/ruleset_libraries.py b/datamasque/client/ruleset_libraries.py
index 81acb8f..8943553 100644
--- a/datamasque/client/ruleset_libraries.py
+++ b/datamasque/client/ruleset_libraries.py
@@ -63,7 +63,8 @@ def create_ruleset_library(self, library: RulesetLibrary) -> RulesetLibrary:
         """
         Creates a new ruleset library on the server.
 
-        Sets the library's server-assigned fields (`id`, `is_valid`, `created`, `modified`) and returns the library.
+        Sets the library's server-assigned fields
+        (`id`, `is_valid`, `validation_error`, `git`, `created`, `modified`) and returns the library.
         """
 
         data = library.model_dump(exclude_none=True, by_alias=True, mode="json")
@@ -71,6 +72,8 @@ def create_ruleset_library(self, library: RulesetLibrary) -> RulesetLibrary:
         created_library = RulesetLibrary.model_validate(response.json())
         library.id = created_library.id
         library.is_valid = created_library.is_valid
+        library.validation_error = created_library.validation_error
+        library.git = created_library.git
         library.created = created_library.created
         library.modified = created_library.modified
         logger.info('Creation of ruleset library "%s" successful', library.name)
@@ -90,6 +93,8 @@ def update_ruleset_library(self, library: RulesetLibrary) -> RulesetLibrary:
         response = self.make_request("PUT", f"/api/ruleset-libraries/{library.id}/", data=data)
         updated_library = RulesetLibrary.model_validate(response.json())
         library.is_valid = updated_library.is_valid
+        library.validation_error = updated_library.validation_error
+        library.git = updated_library.git
         library.modified = updated_library.modified
         logger.debug('Update of ruleset library "%s" successful', library.name)
         return library
diff --git a/datamasque/client/rulesets.py b/datamasque/client/rulesets.py
index 4b9e038..77ab54d 100644
--- a/datamasque/client/rulesets.py
+++ b/datamasque/client/rulesets.py
@@ -3,7 +3,6 @@
 from datamasque.client.base import BaseClient
 from datamasque.client.exceptions import DataMasqueException
 from datamasque.client.models.ruleset import Ruleset, RulesetId
-from datamasque.client.models.status import ValidationStatus
 
 logger = logging.getLogger(__name__)
 
@@ -21,17 +20,18 @@ def create_or_update_ruleset(self, ruleset: Ruleset) -> Ruleset:
         """
         Creates or updates a ruleset.
 
-        Populates the given ruleset's `id` and `is_valid` fields from the server response,
-        and returns the same ruleset instance for convenience.
+        Populates the given ruleset's `id`, `is_valid`, `validation_error`, `validation_error_type`,
+        and `git` fields from the server response, and returns the same ruleset instance for convenience.
         """
 
         data = ruleset.model_dump(exclude_none=True, by_alias=True, mode="json")
         response = self.make_request("POST", "/api/rulesets/", data=data, params={"upsert": "true"})
-        response_data = response.json()
-        ruleset.id = RulesetId(response_data["id"])
-        is_valid = response_data.get("is_valid")
-        if is_valid is not None:
-            ruleset.is_valid = ValidationStatus(is_valid)
+        created = Ruleset.model_validate(response.json())
+        ruleset.id = created.id
+        ruleset.is_valid = created.is_valid
+        ruleset.validation_error = created.validation_error
+        ruleset.validation_error_type = created.validation_error_type
+        ruleset.git = created.git
 
         if response.status_code == 201:
             logger.info('Creation of ruleset "%s" successful', ruleset.name)
diff --git a/tests/test_files.py b/tests/test_files.py
index 5f5574c..3b8f99b 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -271,3 +271,42 @@ def test_upload_file_retries_on_401(config):
             second_form = parse_multipart_form(m_request.request_history[1])
             assert first_form["seed_file"]["content"] == b"seed content"
             assert second_form["seed_file"]["content"] == b"seed content"
+
+
+def test_upload_ssl_zip_file_sends_mysql_database_type(client):
+    """`SslZipFile` uploads must include `database_type=mysql`; the connection-filesets endpoint requires it."""
+    with requests_mock.Mocker() as m_request:
+        m_request.post(
+            f"http://test-server/{SslZipFile.get_url()}",
+            status_code=201,
+            json={
+                "id": str(uuid.uuid4()),
+                "name": "certs.zip",
+                "created_date": "2024-01-01T00:00:00.000000Z",
+                "modified_date": "2024-01-01T00:00:00.000000Z",
+            },
+        )
+        client.upload_file(SslZipFile, "certs.zip", b"zip bytes")
+
+    form = parse_multipart_form(m_request.request_history[0])
+    assert form["database_type"] == "mysql"
+
+
+@pytest.mark.parametrize("file_type", [SeedFile, OracleWalletFile, SnowflakeKeyFile])
+def test_upload_non_ssl_zip_file_omits_database_type(client, file_type):
+    """Only `SslZipFile` carries the `database_type` form field; other file types must not send it."""
+    with requests_mock.Mocker() as m_request:
+        m_request.post(
+            f"http://test-server/{file_type.get_url()}",
+            status_code=201,
+            json={
+                "id": str(uuid.uuid4()),
+                "name": "file.bin",
+                "created_date": "2024-01-01T00:00:00.000000Z",
+                "modified_date": "2024-01-01T00:00:00.000000Z",
+            },
+        )
+        client.upload_file(file_type, "file.bin", b"content")
+
+    form = parse_multipart_form(m_request.request_history[0])
+    assert "database_type" not in form
diff --git a/tests/test_ruleset_library.py b/tests/test_ruleset_library.py
index 1ff939b..6698531 100644
--- a/tests/test_ruleset_library.py
+++ b/tests/test_ruleset_library.py
@@ -616,3 +616,123 @@ def test_list_rulesets_using_library_pagination(client: DataMasqueClient) -> Non
     assert rulesets[0].name == "r1"
     assert rulesets[1].name == "r2"
     assert rulesets[2].name == "r3"
+
+
+def test_create_ruleset_library_populates_validation_error(
+    client: DataMasqueClient, ruleset_library: RulesetLibrary
+) -> None:
+    """The server's `validation_error` string is surfaced on the returned library."""
+    create_response = {
+        "id": LIBRARY_ID_1,
+        "name": "test_library",
+        "namespace": "test_ns",
+        "config_yaml": "version: '1.0'\nfunctions: []",
+        "is_valid": "invalid",
+        "validation_error": "Invalid function definition",
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-01T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/ruleset-libraries/",
+            json=create_response,
+            status_code=201,
+        )
+        result = client.create_ruleset_library(ruleset_library)
+
+    assert result.validation_error == "Invalid function definition"
+
+
+def test_update_ruleset_library_populates_validation_error(
+    client: DataMasqueClient, ruleset_library: RulesetLibrary
+) -> None:
+    """A full update also surfaces the server's `validation_error` string."""
+    ruleset_library.id = RulesetLibraryId(LIBRARY_ID_1)
+    update_response = {
+        "id": LIBRARY_ID_1,
+        "name": "test_library",
+        "namespace": "test_ns",
+        "config_yaml": "version: '1.0'\nfunctions: []",
+        "is_valid": "invalid",
+        "validation_error": "Invalid function definition",
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-02T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.put(
+            f"http://test-server/api/ruleset-libraries/{LIBRARY_ID_1}/",
+            json=update_response,
+            status_code=200,
+        )
+        result = client.update_ruleset_library(ruleset_library)
+
+    assert result.validation_error == "Invalid function definition"
+
+
+def test_create_ruleset_library_does_not_send_read_only_fields(
+    client: DataMasqueClient, ruleset_library: RulesetLibrary
+) -> None:
+    """Read-only server fields must never appear in the create request body."""
+    ruleset_library.id = RulesetLibraryId(LIBRARY_ID_1)
+    ruleset_library.is_valid = ValidationStatus.invalid
+    ruleset_library.validation_error = "stale error"
+    create_response = {
+        "id": LIBRARY_ID_1,
+        "name": "test_library",
+        "namespace": "test_ns",
+        "config_yaml": "version: '1.0'\nfunctions: []",
+        "is_valid": "invalid",
+        "validation_error": "Invalid function definition",
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-01T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/ruleset-libraries/",
+            json=create_response,
+            status_code=201,
+        )
+        client.create_ruleset_library(ruleset_library)
+
+    body = m.last_request.json()
+    for read_only_field in ("id", "is_valid", "validation_error", "created", "modified"):
+        assert read_only_field not in body
+    # Input fields are still present.
+    assert body["name"] == "test_library"
+    assert body["config_yaml"] == "version: '1.0'\nfunctions: []"
+
+
+def test_create_ruleset_library_collapses_git_snapshot(
+    client: DataMasqueClient, ruleset_library: RulesetLibrary
+) -> None:
+    """The server's flat `git_*` fields are collapsed into a nested `git` snapshot on the library."""
+    create_response = {
+        "id": LIBRARY_ID_1,
+        "name": "test_library",
+        "namespace": "test_ns",
+        "config_yaml": "version: '1.0'\nfunctions: []",
+        "is_valid": "valid",
+        "git_branch": "main",
+        "git_commit_sha": "abc123",
+        "git_repo_url": "https://git.example.com/repo.git",
+        "git_synced_at": "2025-06-01T10:00:00Z",
+        "created": "2025-06-01T10:00:00Z",
+        "modified": "2025-06-01T10:00:00Z",
+    }
+
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/ruleset-libraries/",
+            json=create_response,
+            status_code=201,
+        )
+        result = client.create_ruleset_library(ruleset_library)
+
+    assert result.git is not None
+    assert result.git.branch == "main"
+    assert result.git.commit_sha == "abc123"
+    assert result.git.repo_url == "https://git.example.com/repo.git"
+    assert result.git.synced_at == datetime.fromisoformat("2025-06-01T10:00:00+00:00")
diff --git a/tests/test_rulesets.py b/tests/test_rulesets.py
index ae97b3a..aff5f32 100644
--- a/tests/test_rulesets.py
+++ b/tests/test_rulesets.py
@@ -1,11 +1,13 @@
 """Tests for `RulesetClient`."""
 
+from datetime import datetime
+
 import pytest
 import requests_mock
 
 from datamasque.client.exceptions import DataMasqueApiError
 from datamasque.client.models.ruleset import RulesetType
-from datamasque.client.models.status import ValidationStatus
+from datamasque.client.models.status import ValidationErrorType, ValidationStatus
 
 
 def test_list_rulesets(client, existing_rulesets_json):
@@ -32,7 +34,7 @@ def test_create_or_update_ruleset_create(client, ruleset):
         # Test creating a new ruleset with upsert
         m.post(
             "http://test-server/api/rulesets/?upsert=true",
-            json={"id": "2", "is_valid": "in_progress"},
+            json={"id": "2", "name": "test_ruleset", "is_valid": "in_progress"},
             status_code=201,
         )
 
@@ -62,7 +64,7 @@ def test_create_or_update_ruleset_update(client, ruleset):
         # Test updating an existing ruleset with upsert (returns 200 status for update)
         m.post(
             "http://test-server/api/rulesets/?upsert=true",
-            json={"id": "1", "is_valid": "valid"},
+            json={"id": "1", "name": "test_ruleset", "is_valid": "valid"},
             status_code=200,
         )
 
@@ -117,3 +119,146 @@ def test_delete_ruleset_that_does_not_exist(client, existing_rulesets_json):
 
     assert m.call_count == 1
     assert m.request_history[0].method == "GET"
+
+
+def test_create_or_update_ruleset_populates_validation_error(client, ruleset):
+    """The server's `validation_error` string and `validation_error_type` are surfaced on the returned ruleset."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/rulesets/?upsert=true",
+            json={
+                "id": "2",
+                "name": "test_ruleset",
+                "is_valid": "invalid",
+                "validation_error": "Ruleset error: Missing required field: table",
+                "validation_error_type": "ruleset",
+            },
+            status_code=201,
+        )
+        result = client.create_or_update_ruleset(ruleset)
+
+    assert result.validation_error == "Ruleset error: Missing required field: table"
+    assert result.validation_error_type is ValidationErrorType.ruleset
+
+
+def test_create_or_update_ruleset_validation_error_none_when_absent(client, ruleset):
+    """`validation_error` and `validation_error_type` are `None` when the server omits them (a valid ruleset)."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/rulesets/?upsert=true",
+            json={"id": "2", "name": "test_ruleset", "is_valid": "valid"},
+            status_code=201,
+        )
+        result = client.create_or_update_ruleset(ruleset)
+
+    assert result.validation_error is None
+    assert result.validation_error_type is None
+
+
+def test_create_or_update_ruleset_does_not_send_read_only_fields(client, ruleset):
+    """Read-only server fields must never be echoed back into a re-submit's request body."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/rulesets/?upsert=true",
+            json={
+                "id": "2",
+                "name": "test_ruleset",
+                "is_valid": "invalid",
+                "validation_error": "Ruleset error: bad",
+                "validation_error_type": "ruleset",
+            },
+            status_code=201,
+        )
+        client.create_or_update_ruleset(ruleset)
+
+        # Re-submit the same object (read-only fields now populated).
+        client.create_or_update_ruleset(ruleset)
+
+    body = m.last_request.json()
+    for read_only_field in ("id", "is_valid", "validation_error", "validation_error_type"):
+        assert read_only_field not in body
+    # Input fields are still present.
+    assert body["config_yaml"] == "version: '1.0'\ntasks: []"
+
+
+def test_create_or_update_ruleset_collapses_git_snapshot(client, ruleset):
+    """The server's flat `git_*` fields are collapsed into a nested `git` snapshot."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/rulesets/?upsert=true",
+            json={
+                "id": "2",
+                "name": "test_ruleset",
+                "is_valid": "valid",
+                "git_branch": "main",
+                "git_commit_sha": "abc123",
+                "git_repo_url": "https://git.example.com/repo.git",
+                "git_synced_at": "2025-06-01T10:00:00Z",
+            },
+            status_code=201,
+        )
+        result = client.create_or_update_ruleset(ruleset)
+
+    assert result.git is not None
+    assert result.git.branch == "main"
+    assert result.git.commit_sha == "abc123"
+    assert result.git.repo_url == "https://git.example.com/repo.git"
+    assert result.git.synced_at == datetime.fromisoformat("2025-06-01T10:00:00+00:00")
+
+
+def test_create_or_update_ruleset_git_none_when_not_synced(client, ruleset):
+    """`git` is `None` when the server reports no git provenance."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/rulesets/?upsert=true",
+            json={
+                "id": "2",
+                "name": "test_ruleset",
+                "is_valid": "valid",
+                "git_branch": None,
+                "git_commit_sha": None,
+                "git_repo_url": None,
+                "git_synced_at": None,
+            },
+            status_code=201,
+        )
+        result = client.create_or_update_ruleset(ruleset)
+
+    assert result.git is None
+
+
+def test_list_rulesets_collapses_git_snapshot(client):
+    """`git_*` fields are collapsed (and consumed, not left as extras) when listing rulesets."""
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/v2/rulesets/",
+            json=[
+                {
+                    "id": "1",
+                    "name": "synced_ruleset",
+                    "mask_type": "database",
+                    "config_yaml": "version: '1.0'",
+                    "is_valid": "valid",
+                    "git_branch": "main",
+                    "git_commit_sha": "abc123",
+                    "git_repo_url": "https://git.example.com/repo.git",
+                    "git_synced_at": "2025-06-01T10:00:00Z",
+                },
+                {
+                    "id": "2",
+                    "name": "local_ruleset",
+                    "mask_type": "database",
+                    "config_yaml": "version: '1.0'",
+                    "is_valid": "valid",
+                },
+            ],
+            status_code=200,
+        )
+        rulesets = client.list_rulesets()
+
+    assert rulesets[0].git is not None
+    assert rulesets[0].git.branch == "main"
+    assert rulesets[1].git is None
+    # The flat keys were consumed, not retained as extras that would leak back on re-submit.
+    assert "git_branch" not in rulesets[0].model_dump(by_alias=True)
+    assert "git" not in rulesets[0].model_dump(by_alias=True)
diff --git a/tests/test_runs.py b/tests/test_runs.py
index 3360093..24cbcd6 100644
--- a/tests/test_runs.py
+++ b/tests/test_runs.py
@@ -161,6 +161,53 @@ def test_start_masking_run(client):
         assert client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name=fake.word())) == "1"
 
 
+def test_start_masking_run_sends_is_user_subscribed_when_set(client):
+    """`is_user_subscribed=True` is sent top-level so the server subscribes the requesting user to the run."""
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201)
+        client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name="r", is_user_subscribed=True))
+
+    assert m.last_request.json()["is_user_subscribed"] is True
+
+
+def test_start_masking_run_omits_is_user_subscribed_by_default(client):
+    """An unset `is_user_subscribed` is excluded from the request body rather than sent as null."""
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201)
+        client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name="r"))
+
+    assert "is_user_subscribed" not in m.last_request.json()
+
+
+def test_start_masking_run_sends_auto_pull_options(client):
+    """`auto_pull` / `auto_pull_branch` are sent inside `options` so the server refreshes the ruleset from git."""
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201)
+        client.start_masking_run(
+            MaskingRunRequest(
+                connection="1",
+                ruleset="rs-1",
+                name="r",
+                options=MaskingRunOptions(auto_pull=True, auto_pull_branch="main"),
+            )
+        )
+
+    options = m.last_request.json()["options"]
+    assert options["auto_pull"] is True
+    assert options["auto_pull_branch"] == "main"
+
+
+def test_start_masking_run_omits_auto_pull_options_by_default(client):
+    """Unset auto-pull options are excluded from the request body rather than sent as null."""
+    with requests_mock.Mocker() as m:
+        m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201)
+        client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name="r"))
+
+    options = m.last_request.json()["options"]
+    assert "auto_pull" not in options
+    assert "auto_pull_branch" not in options
+
+
 def test_start_masking_run_invalid_ruleset(client):
     with requests_mock.Mocker() as m:
         m.post(

From 2e07560a3f2d11aaef4217405c8dada2bc56ec7f Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 17 Jun 2026 15:09:28 +1200
Subject: [PATCH 16/26] fix: Handle dict-shaped config validation errors on the
 discovery run API

---
 datamasque/client/discovery.py | 43 ++++++++++++++++++++++++----------
 tests/test_discovery.py        | 28 ++++++++++++++++++++++
 2 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index ca27772..0180036 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -283,7 +283,8 @@ def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromCon
 
         Raises:
             InvalidDiscoveryConfigError: the run failed to start because the referenced
-                discovery config is missing, archived, or not in a `valid` validation state.
+                discovery config is missing, archived, not in a `valid` validation state,
+                or carries YAML the server rejects at trigger time.
             FailedToStartError: the run failed to start for any other reason.
         """
 
@@ -325,7 +326,8 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
 
         Raises:
             InvalidDiscoveryConfigError: the run failed to start because the referenced
-                discovery config is missing, archived, or not in a `valid` validation state.
+                discovery config is missing, archived, not in a `valid` validation state,
+                or carries YAML the server rejects at trigger time.
             FailedToStartError: the run failed to start for any other reason.
         """
 
@@ -353,18 +355,35 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
             response=response,
         )
 
-    @staticmethod
-    def _maybe_raise_discovery_config_error(run_data: object, response: Response, run_kind: str) -> None:
-        """Raise `InvalidDiscoveryConfigError` if the server's 400 body cites `discovery_config`."""
-        if not isinstance(run_data, dict) or "discovery_config" not in run_data:
+    # Server key for a 400 that means the discovery config itself is unusable.
+    # `discovery_config` covers a missing/archived config or a non-`valid` validation state
+    # (string messages),
+    # and re-validation of broken saved-config YAML at trigger time
+    # (a `{"message", "line_number", "column_number"}` dict per error).
+    DISCOVERY_CONFIG_ERROR_FIELDS = ("discovery_config",)
+
+    @classmethod
+    def _maybe_raise_discovery_config_error(cls, run_data: object, response: Response, run_kind: str) -> None:
+        """Raise `InvalidDiscoveryConfigError` if the server's 400 body cites the discovery config."""
+        if not isinstance(run_data, dict):
             return
 
-        errors = run_data["discovery_config"]
-        detail = errors[0] if isinstance(errors, list) and errors else str(errors)
-        raise InvalidDiscoveryConfigError(
-            f"{run_kind} run failed to start due to discovery config error: {detail}",
-            response=response,
-        )
+        for field in cls.DISCOVERY_CONFIG_ERROR_FIELDS:
+            if field in run_data:
+                detail = cls._format_discovery_config_error(run_data[field])
+                raise InvalidDiscoveryConfigError(
+                    f"{run_kind} run failed to start due to discovery config error: {detail}",
+                    response=response,
+                )
+
+    @staticmethod
+    def _format_discovery_config_error(errors: object) -> str:
+        """Render the first server error, handling both string and `{message, ...}` dict items."""
+        first = errors[0] if isinstance(errors, list) and errors else errors
+        if isinstance(first, dict) and "message" in first:
+            return str(first["message"])
+
+        return str(first)
 
     def iter_schema_discovery_results(self, run_id: RunId) -> Iterator[SchemaDiscoveryResult]:
         """Lazily iterate all schema discovery results for a run via the paginated v2 endpoint."""
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 0e14e57..7a2fec8 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -1067,6 +1067,34 @@ def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_
             )
 
 
+def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_on_broken_yaml(client):
+    """
+    A 400 carrying trigger-time re-validation of broken saved YAML is classified.
+
+    The server reports these under `discovery_config` as `{"message", "line_number", "column_number"}` dicts,
+    not strings.
+    """
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/schema-discovery/v2/",
+            json={
+                "discovery_config": [
+                    {"message": "Unknown mask 'no_such_mask'.", "line_number": 4, "column_number": 7},
+                ],
+            },
+            status_code=400,
+        )
+        with pytest.raises(
+            InvalidDiscoveryConfigError,
+            match=r"Schema discovery run failed to start due to discovery config error: Unknown mask 'no_such_mask'\.",
+        ):
+            client.start_schema_discovery_run_from_config(
+                SchemaDiscoveryFromConfigRequest(
+                    connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
+                ),
+            )
+
+
 def test_start_file_data_discovery_run_from_config_raises_invalid_discovery_config(client):
     with requests_mock.Mocker() as m:
         m.post(

From bf1cb054d13fec85551a77e2a2fa3d94d09a3120 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Sun, 21 Jun 2026 11:01:20 +1200
Subject: [PATCH 17/26] feat: Add DiscoveryRunType and
 DiscoveryConfigNotFoundError

---
 datamasque/client/__init__.py                |  5 +-
 datamasque/client/discovery.py               | 52 ++++++++++++--------
 datamasque/client/exceptions.py              | 13 ++++-
 datamasque/client/models/discovery_config.py | 26 +++++++---
 tests/test_discovery.py                      | 37 +++++++++++---
 tests/test_discovery_configs.py              | 23 ++++++++-
 6 files changed, 117 insertions(+), 39 deletions(-)

diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py
index 7908151..0872d95 100644
--- a/datamasque/client/__init__.py
+++ b/datamasque/client/__init__.py
@@ -17,6 +17,7 @@
     DataMasqueNotReadyError,
     DataMasqueTransportError,
     DataMasqueUserError,
+    DiscoveryConfigNotFoundError,
     FailedToStartError,
     IfmAuthError,
     InvalidDiscoveryConfigError,
@@ -77,7 +78,7 @@
     SchemaDiscoveryResult,
     TableConstraints,
 )
-from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
+from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, DiscoveryConfigType
 from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
 from datamasque.client.models.files import (
     DataMasqueFile,
@@ -146,6 +147,8 @@
     "DatabricksConnectionConfig",
     "DiscoveryConfig",
     "DiscoveryConfigId",
+    "DiscoveryConfigNotFoundError",
+    "DiscoveryConfigType",
     "DiscoveryMatch",
     "DynamoConnectionConfig",
     "FailedToStartError",
diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index 0180036..392c32e 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -10,6 +10,7 @@
 from datamasque.client.exceptions import (
     AsyncRulesetGenerationInProgressError,
     DataMasqueException,
+    DiscoveryConfigNotFoundError,
     FailedToStartError,
     InvalidDiscoveryConfigError,
 )
@@ -282,9 +283,10 @@ def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromCon
             RunId: The ID of the started discovery run
 
         Raises:
-            InvalidDiscoveryConfigError: the run failed to start because the referenced
-                discovery config is missing, archived, not in a `valid` validation state,
-                or carries YAML the server rejects at trigger time.
+            DiscoveryConfigNotFoundError: the referenced discovery config cannot be found
+                (it does not exist or is the wrong type for the run).
+            InvalidDiscoveryConfigError: the config is present but not in a `valid` validation state,
+                or its YAML is rejected when the run starts.
             FailedToStartError: the run failed to start for any other reason.
         """
 
@@ -325,9 +327,10 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
             RunId: The ID of the started discovery run
 
         Raises:
-            InvalidDiscoveryConfigError: the run failed to start because the referenced
-                discovery config is missing, archived, not in a `valid` validation state,
-                or carries YAML the server rejects at trigger time.
+            DiscoveryConfigNotFoundError: the referenced discovery config cannot be found
+                (it does not exist or is the wrong type for the run).
+            InvalidDiscoveryConfigError: the config is present but not in a `valid` validation state,
+                or its YAML is rejected when the run starts.
             FailedToStartError: the run failed to start for any other reason.
         """
 
@@ -355,26 +358,35 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
             response=response,
         )
 
-    # Server key for a 400 that means the discovery config itself is unusable.
-    # `discovery_config` covers a missing/archived config or a non-`valid` validation state
-    # (string messages),
-    # and re-validation of broken saved-config YAML at trigger time
-    # (a `{"message", "line_number", "column_number"}` dict per error).
-    DISCOVERY_CONFIG_ERROR_FIELDS = ("discovery_config",)
+    # Server key for a 400 that means the discovery config itself is unusable:
+    # a missing or wrong-type config, or one not in a `valid` validation state (string messages),
+    # or re-validation of broken saved-config YAML when the run starts
+    # (a `{"message", "line_number", "column_number"}` dict).
+    DISCOVERY_CONFIG_ERROR_FIELD = "discovery_config"
+
+    # The phrase the server uses when the config id cannot be resolved (a missing or wrong-type config).
+    MISSING_DISCOVERY_CONFIG_SIGNATURE = "object does not exist"
 
     @classmethod
     def _maybe_raise_discovery_config_error(cls, run_data: object, response: Response, run_kind: str) -> None:
-        """Raise `InvalidDiscoveryConfigError` if the server's 400 body cites the discovery config."""
+        """Raise a discovery-config error if the server's 400 body cites the discovery config."""
         if not isinstance(run_data, dict):
             return
 
-        for field in cls.DISCOVERY_CONFIG_ERROR_FIELDS:
-            if field in run_data:
-                detail = cls._format_discovery_config_error(run_data[field])
-                raise InvalidDiscoveryConfigError(
-                    f"{run_kind} run failed to start due to discovery config error: {detail}",
-                    response=response,
-                )
+        if not (errors := run_data.get(cls.DISCOVERY_CONFIG_ERROR_FIELD)):
+            return
+
+        detail = cls._format_discovery_config_error(errors)
+        if cls.MISSING_DISCOVERY_CONFIG_SIGNATURE in detail:
+            raise DiscoveryConfigNotFoundError(
+                f"{run_kind} run failed to start: the referenced discovery config could not be found: {detail}",
+                response=response,
+            )
+
+        raise InvalidDiscoveryConfigError(
+            f"{run_kind} run failed to start due to discovery config error: {detail}",
+            response=response,
+        )
 
     @staticmethod
     def _format_discovery_config_error(errors: object) -> str:
diff --git a/datamasque/client/exceptions.py b/datamasque/client/exceptions.py
index f034c4d..39c90fa 100644
--- a/datamasque/client/exceptions.py
+++ b/datamasque/client/exceptions.py
@@ -43,9 +43,18 @@ class InvalidLibraryError(FailedToStartError):
 
 class InvalidDiscoveryConfigError(FailedToStartError):
     """
-    Raised when a discovery run fails to start due to an unusable discovery config.
+    Raised when a discovery run fails to start because the referenced config is present but unusable.
 
-    The referenced config is missing, archived, or not in a `valid` validation state.
+    The config exists but is not in a `valid` validation state, or its YAML is rejected when the run starts.
+    A config that cannot be found raises `DiscoveryConfigNotFoundError` instead.
+    """
+
+
+class DiscoveryConfigNotFoundError(FailedToStartError):
+    """
+    Raised when a discovery run references a discovery config that cannot be found.
+
+    The config does not exist or is the wrong type for the run.
     """
 
 
diff --git a/datamasque/client/models/discovery_config.py b/datamasque/client/models/discovery_config.py
index f0a6b3c..09b123f 100644
--- a/datamasque/client/models/discovery_config.py
+++ b/datamasque/client/models/discovery_config.py
@@ -1,3 +1,4 @@
+import enum
 from datetime import datetime
 from typing import Any, NewType, Optional
 
@@ -8,6 +9,13 @@
 DiscoveryConfigId = NewType("DiscoveryConfigId", str)
 
 
+class DiscoveryConfigType(enum.Enum):
+    """Which discovery config variant a config targets: database (qualified columns) or file (locators)."""
+
+    database = "database"
+    file = "file"
+
+
 def unwrap_discovery_config_id(value: Any) -> Any:
     """
     Coerce a `DiscoveryConfig` to its `id`; pass other values through unchanged.
@@ -33,11 +41,13 @@ class DiscoveryConfig(BaseModel):
 
     name: str
     yaml: Optional[str] = Field(default=None, alias="config_yaml")
-    id: Optional[DiscoveryConfigId] = None
-    # Server-managed validation surface, populated by the DataMasque server.
-    # `is_valid` may be `in_progress` immediately after creating a large config,
-    # transitioning to `valid` or `invalid` once the server finishes validating.
-    is_valid: Optional[ValidationStatus] = None
-    validation_error: Optional[str] = None
-    created: Optional[datetime] = None
-    modified: Optional[datetime] = None
+    config_type: DiscoveryConfigType
+
+    # Server-populated read-only fields, excluded from request bodies.
+    id: Optional[DiscoveryConfigId] = Field(default=None, exclude=True)
+    is_valid: Optional[ValidationStatus] = Field(default=None, exclude=True)
+    """Validation status; may be `in_progress` briefly after creating a large config."""
+    validation_error: Optional[str] = Field(default=None, exclude=True)
+    """Human-readable validation error, or `None` when valid."""
+    created: Optional[datetime] = Field(default=None, exclude=True)
+    modified: Optional[datetime] = Field(default=None, exclude=True)
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 7a2fec8..8605474 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -30,6 +30,7 @@
     AsyncRulesetGenerationInProgressError,
     DataMasqueApiError,
     DataMasqueException,
+    DiscoveryConfigNotFoundError,
     FailedToStartError,
     InvalidDiscoveryConfigError,
 )
@@ -873,14 +874,14 @@ def test_schema_discovery_from_config_request_accepts_discovery_config_id():
 
 def test_schema_discovery_from_config_request_unwraps_discovery_config_model():
     """Passing a full `DiscoveryConfig` object substitutes its `id` for the wire payload."""
-    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    config = DiscoveryConfig(name="my_cfg", config_type="database", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
     req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
     assert req.model_dump(exclude_none=True, mode="json")["discovery_config"] == DISCOVERY_CONFIG_ID
 
 
 def test_schema_discovery_from_config_request_rejects_unsaved_discovery_config():
     """A `DiscoveryConfig` without an `id` cannot be used yet — raises immediately."""
-    config = DiscoveryConfig(name="my_cfg")
+    config = DiscoveryConfig(name="my_cfg", config_type="database")
     with pytest.raises(ValueError, match="id is None"):
         SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
 
@@ -983,7 +984,7 @@ def test_start_schema_discovery_run_from_config_sends_schemas(client):
 
 def test_start_file_data_discovery_run_from_config_sends_discovery_config(client):
     """`start_file_data_discovery_run_from_config` posts only the connection and discovery_config id."""
-    config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
+    config = DiscoveryConfig(name="my_cfg", config_type="file", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
     req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
     with requests_mock.Mocker() as m:
         m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 99}, status_code=201)
@@ -1051,21 +1052,45 @@ def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_
             )
 
 
-def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_when_missing(client):
-    """A 400 from DRF's PrimaryKeyRelatedField (config not found / archived) is also classified."""
+def test_start_schema_discovery_run_from_config_raises_not_found_when_missing(client):
+    """
+    A 400 for a config that cannot be found raises `DiscoveryConfigNotFoundError`.
+
+    This is a bad reference rather than an unusable-but-present config,
+    so it must not be conflated with `InvalidDiscoveryConfigError`.
+    The not-found subclass still inherits `FailedToStartError` for callers that catch the base.
+    """
     with requests_mock.Mocker() as m:
         m.post(
             "http://test-server/api/schema-discovery/v2/",
             json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']},
             status_code=400,
         )
-        with pytest.raises(InvalidDiscoveryConfigError, match="object does not exist"):
+        with pytest.raises(DiscoveryConfigNotFoundError, match="object does not exist") as exc_info:
             client.start_schema_discovery_run_from_config(
                 SchemaDiscoveryFromConfigRequest(
                     connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
                 ),
             )
 
+    assert isinstance(exc_info.value, FailedToStartError)
+
+
+def test_start_file_data_discovery_run_from_config_raises_not_found_when_missing(client):
+    """A not-found saved config on the file-data trigger also raises `DiscoveryConfigNotFoundError`."""
+    with requests_mock.Mocker() as m:
+        m.post(
+            "http://test-server/api/run-file-data-discovery/v2/",
+            json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']},
+            status_code=400,
+        )
+        with pytest.raises(DiscoveryConfigNotFoundError, match="object does not exist"):
+            client.start_file_data_discovery_run_from_config(
+                FileDataDiscoveryFromConfigRequest(
+                    connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
+                ),
+            )
+
 
 def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_on_broken_yaml(client):
     """
diff --git a/tests/test_discovery_configs.py b/tests/test_discovery_configs.py
index f98893e..7ea2b67 100644
--- a/tests/test_discovery_configs.py
+++ b/tests/test_discovery_configs.py
@@ -29,6 +29,7 @@ def sample_config_list_response() -> dict[str, Any]:
             {
                 "id": CONFIG_ID_1,
                 "name": "my_config",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-01-01T12:00:00Z",
                 "modified": "2025-01-02T12:00:00Z",
@@ -36,6 +37,7 @@ def sample_config_list_response() -> dict[str, Any]:
             {
                 "id": CONFIG_ID_2,
                 "name": "another_config",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-02-01T12:00:00Z",
                 "modified": "2025-02-02T12:00:00Z",
@@ -50,6 +52,7 @@ def sample_config_detail_response() -> dict[str, Any]:
         "id": CONFIG_ID_1,
         "name": "my_config",
         "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "config_type": "database",
         "archived": False,
         "created": "2025-01-01T12:00:00Z",
         "modified": "2025-01-02T12:00:00Z",
@@ -61,6 +64,7 @@ def discovery_config() -> DiscoveryConfig:
     return DiscoveryConfig(
         name="test_config",
         yaml="labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        config_type="database",
     )
 
 
@@ -89,6 +93,7 @@ def test_list_discovery_configs_pagination(client: DataMasqueClient) -> None:
             {
                 "id": CONFIG_ID_1,
                 "name": "c1",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-01-01T12:00:00Z",
                 "modified": "2025-01-01T12:00:00Z",
@@ -96,6 +101,7 @@ def test_list_discovery_configs_pagination(client: DataMasqueClient) -> None:
             {
                 "id": CONFIG_ID_2,
                 "name": "c2",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-01-01T12:00:00Z",
                 "modified": "2025-01-01T12:00:00Z",
@@ -110,6 +116,7 @@ def test_list_discovery_configs_pagination(client: DataMasqueClient) -> None:
             {
                 "id": "cccccccc-1111-2222-3333-444444444444",
                 "name": "c3",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-01-01T12:00:00Z",
                 "modified": "2025-01-01T12:00:00Z",
@@ -162,6 +169,7 @@ def test_get_discovery_config_by_name_found(
             {
                 "id": CONFIG_ID_1,
                 "name": "my_config",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-01-01T12:00:00Z",
                 "modified": "2025-01-02T12:00:00Z",
@@ -209,6 +217,7 @@ def test_get_discovery_config_by_name_raises_when_server_omits_id(client: DataMa
         "results": [
             {
                 "name": "my_config",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-01-01T12:00:00Z",
                 "modified": "2025-01-02T12:00:00Z",
@@ -231,6 +240,7 @@ def test_create_discovery_config(client: DataMasqueClient, discovery_config: Dis
         "id": CONFIG_ID_1,
         "name": "test_config",
         "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "config_type": "database",
         "archived": False,
         "created": "2025-06-01T10:00:00Z",
         "modified": "2025-06-01T10:00:00Z",
@@ -252,6 +262,7 @@ def test_create_discovery_config(client: DataMasqueClient, discovery_config: Dis
     request_body = m.last_request.json()
     assert request_body["name"] == "test_config"
     assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n"
+    assert request_body["config_type"] == "database"
 
 
 def test_update_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
@@ -261,6 +272,7 @@ def test_update_discovery_config(client: DataMasqueClient, discovery_config: Dis
         "id": CONFIG_ID_1,
         "name": "test_config",
         "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "config_type": "database",
         "archived": False,
         "created": "2025-06-01T10:00:00Z",
         "modified": "2025-06-02T10:00:00Z",
@@ -293,6 +305,7 @@ def test_create_or_update_discovery_config_create(client: DataMasqueClient, disc
         "id": CONFIG_ID_1,
         "name": "test_config",
         "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "config_type": "database",
         "archived": False,
         "created": "2025-06-01T10:00:00Z",
         "modified": "2025-06-01T10:00:00Z",
@@ -317,6 +330,7 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
             {
                 "id": CONFIG_ID_1,
                 "name": "test_config",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-06-01T10:00:00Z",
                 "modified": "2025-06-01T10:00:00Z",
@@ -327,6 +341,7 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
         "id": CONFIG_ID_1,
         "name": "test_config",
         "config_yaml": "labels: []",
+        "config_type": "database",
         "archived": False,
         "created": "2025-06-01T10:00:00Z",
         "modified": "2025-06-01T10:00:00Z",
@@ -335,6 +350,7 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
         "id": CONFIG_ID_1,
         "name": "test_config",
         "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
+        "config_type": "database",
         "archived": False,
         "created": "2025-06-01T10:00:00Z",
         "modified": "2025-06-02T10:00:00Z",
@@ -403,6 +419,7 @@ def test_delete_discovery_config_by_name_raises_when_server_omits_id(client: Dat
         "results": [
             {
                 "name": "my_config",
+                "config_type": "database",
                 "archived": False,
                 "created": "2025-01-01T12:00:00Z",
                 "modified": "2025-01-02T12:00:00Z",
@@ -441,6 +458,7 @@ def test_discovery_config_parses_validation_fields() -> None:
         {
             "id": CONFIG_ID_1,
             "name": "my_config",
+            "config_type": "database",
             "config_yaml": "labels: []",
             "is_valid": "invalid",
             "validation_error": "bad shape on line 3",
@@ -459,6 +477,7 @@ def test_discovery_config_validation_fields_optional() -> None:
         {
             "id": CONFIG_ID_1,
             "name": "my_config",
+            "config_type": "database",
             "created": "2025-01-01T12:00:00Z",
             "modified": "2025-01-02T12:00:00Z",
         }
@@ -474,11 +493,11 @@ def test_unwrap_discovery_config_id_passes_through_strings() -> None:
 
 
 def test_unwrap_discovery_config_id_extracts_id_from_model() -> None:
-    config = DiscoveryConfig(name="x", id=DiscoveryConfigId(CONFIG_ID_1))
+    config = DiscoveryConfig(name="x", config_type="database", id=DiscoveryConfigId(CONFIG_ID_1))
     assert unwrap_discovery_config_id(config) == CONFIG_ID_1
 
 
 def test_unwrap_discovery_config_id_raises_without_id() -> None:
-    config = DiscoveryConfig(name="x")
+    config = DiscoveryConfig(name="x", config_type="database")
     with pytest.raises(ValueError, match="id is None"):
         unwrap_discovery_config_id(config)

From 39fa29f574451caf25a618e93c6fb9397921d6a6 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 10:26:33 +1200
Subject: [PATCH 18/26] feat!: require type for discovery-config and ruleset
 lookup/delete

Names are unique only per type, so the by-name lookups need the type to identify a single config.
---
 HISTORY.rst                            |  2 ++
 datamasque/client/discovery_configs.py | 25 ++++++++++--------
 datamasque/client/rulesets.py          | 20 ++++++++++-----
 tests/test_discovery_configs.py        | 35 +++++++++++++++++++++-----
 tests/test_rulesets.py                 | 17 +++++++++++--
 5 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 67055e4..e1803c0 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -21,6 +21,8 @@ History
 * Read-only fields (``id``, ``is_valid``, ``validation_error``, etc.)
   are no longer echoed back in ``Ruleset`` / ``RulesetLibrary`` create/update request bodies.
 * Fixed ``SslZipFile`` uploads to send the required ``database_type=mysql`` form field.
+* **Breaking:** ``delete_ruleset_by_name_if_exists`` now requires a ``ruleset_type`` argument,
+  since ruleset names are unique only per type.
 
 1.0.5 (2026-06-18)
 ------------------
diff --git a/datamasque/client/discovery_configs.py b/datamasque/client/discovery_configs.py
index 617f367..d5e1a32 100644
--- a/datamasque/client/discovery_configs.py
+++ b/datamasque/client/discovery_configs.py
@@ -3,7 +3,7 @@
 
 from datamasque.client.base import BaseClient
 from datamasque.client.exceptions import DataMasqueApiError, DataMasqueException
-from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
+from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, DiscoveryConfigType
 from datamasque.client.models.pagination import Page
 
 logger = logging.getLogger(__name__)
@@ -33,17 +33,18 @@ def get_discovery_config(self, config_id: DiscoveryConfigId) -> DiscoveryConfig:
         response = self.make_request("GET", f"/api/discovery/configs/{config_id}/")
         return DiscoveryConfig.model_validate(response.json())
 
-    def get_discovery_config_by_name(self, name: str) -> Optional[DiscoveryConfig]:
+    def get_discovery_config_by_name(self, name: str, config_type: DiscoveryConfigType) -> Optional[DiscoveryConfig]:
         """
-        Looks for a discovery config matching the given name (case-sensitive, exact match).
+        Looks for a discovery config matching the given name and type (case-sensitive, exact match).
 
+        Config names are unique per type, so a type is required to identify a single config.
         Returns it if found, otherwise `None`.
         """
 
         response = self.make_request(
             "GET",
             "/api/discovery/configs/",
-            params={"name_exact": name, "limit": 1},
+            params={"name_exact": name, "config_type": config_type.value, "limit": 1},
         )
         page = Page[DiscoveryConfig].model_validate(response.json())
         if not page.results:
@@ -99,7 +100,7 @@ def create_or_update_discovery_config(self, config: DiscoveryConfig) -> Discover
         Sets the config's `id` property.
         """
 
-        existing = self.get_discovery_config_by_name(config.name)
+        existing = self.get_discovery_config_by_name(config.name, config.config_type)
         if existing is not None:
             config.id = existing.id
             return self.update_discovery_config(config)
@@ -115,15 +116,19 @@ def delete_discovery_config_by_id_if_exists(self, config_id: DiscoveryConfigId)
 
         self._delete_if_exists(f"/api/discovery/configs/{config_id}/")
 
-    def delete_discovery_config_by_name_if_exists(self, name: str) -> None:
+    def delete_discovery_config_by_name_if_exists(self, name: str, config_type: DiscoveryConfigType) -> None:
         """
-        Archives the discovery config with the given name.
+        Archives the discovery config with the given name and type.
 
-        No-op if the config does not exist.
+        Config names are unique per type, so a type is required to identify a single config.
+        No-op if no such config exists.
         """
 
-        all_configs = self.list_discovery_configs()
-        matching = [config for config in all_configs if config.name == name]
+        matching = [
+            config
+            for config in self.list_discovery_configs()
+            if config.name == name and config.config_type is config_type
+        ]
         for config in matching:
             if config.id is None:
                 raise DataMasqueException(f'Server returned a discovery config named "{config.name}" without an `id`.')
diff --git a/datamasque/client/rulesets.py b/datamasque/client/rulesets.py
index 77ab54d..5ea0ce9 100644
--- a/datamasque/client/rulesets.py
+++ b/datamasque/client/rulesets.py
@@ -2,7 +2,7 @@
 
 from datamasque.client.base import BaseClient
 from datamasque.client.exceptions import DataMasqueException
-from datamasque.client.models.ruleset import Ruleset, RulesetId
+from datamasque.client.models.ruleset import Ruleset, RulesetId, RulesetType
 
 logger = logging.getLogger(__name__)
 
@@ -45,12 +45,20 @@ def delete_ruleset_by_id_if_exists(self, ruleset_id: RulesetId) -> None:
 
         self._delete_if_exists(f"/api/rulesets/{ruleset_id}/")
 
-    def delete_ruleset_by_name_if_exists(self, ruleset_name: str) -> None:
-        """Deletes the ruleset with the given name. No-op if the ruleset does not exist."""
+    def delete_ruleset_by_name_if_exists(self, ruleset_name: str, ruleset_type: RulesetType) -> None:
+        """
+        Deletes the ruleset with the given name and type.
+
+        Ruleset names are unique per type, so a type is required to identify a single ruleset.
+        No-op if no such ruleset exists.
+        """
 
-        all_rulesets = self.list_rulesets()
-        rulesets_matching_name = [ruleset for ruleset in all_rulesets if ruleset.name == ruleset_name]
-        for ruleset in rulesets_matching_name:
+        matching = [
+            ruleset
+            for ruleset in self.list_rulesets()
+            if ruleset.name == ruleset_name and ruleset.ruleset_type is ruleset_type
+        ]
+        for ruleset in matching:
             if ruleset.id is None:
                 raise DataMasqueException(f'Server returned a ruleset named "{ruleset.name}" without an `id`.')
 
diff --git a/tests/test_discovery_configs.py b/tests/test_discovery_configs.py
index 7ea2b67..d8bdf68 100644
--- a/tests/test_discovery_configs.py
+++ b/tests/test_discovery_configs.py
@@ -11,6 +11,7 @@
 from datamasque.client.models.discovery_config import (
     DiscoveryConfig,
     DiscoveryConfigId,
+    DiscoveryConfigType,
     unwrap_discovery_config_id,
 )
 from datamasque.client.models.status import ValidationStatus
@@ -188,11 +189,12 @@ def test_get_discovery_config_by_name_found(
             json=sample_config_detail_response,
             status_code=200,
         )
-        config = client.get_discovery_config_by_name("my_config")
+        config = client.get_discovery_config_by_name("my_config", DiscoveryConfigType.database)
 
     assert config is not None
     assert config.name == "my_config"
     assert "name_exact=my_config" in m.request_history[0].url
+    assert "config_type=database" in m.request_history[0].url
 
 
 def test_get_discovery_config_by_name_not_found(client: DataMasqueClient) -> None:
@@ -204,7 +206,7 @@ def test_get_discovery_config_by_name_not_found(client: DataMasqueClient) -> Non
             json=empty_response,
             status_code=200,
         )
-        config = client.get_discovery_config_by_name("nonexistent")
+        config = client.get_discovery_config_by_name("nonexistent", DiscoveryConfigType.database)
 
     assert config is None
 
@@ -232,7 +234,7 @@ def test_get_discovery_config_by_name_raises_when_server_omits_id(client: DataMa
             status_code=200,
         )
         with pytest.raises(DataMasqueApiError, match="without an `id`"):
-            client.get_discovery_config_by_name("my_config")
+            client.get_discovery_config_by_name("my_config", DiscoveryConfigType.database)
 
 
 def test_create_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
@@ -394,7 +396,7 @@ def test_delete_discovery_config_by_name(client: DataMasqueClient, sample_config
     with requests_mock.Mocker() as m:
         m.get("http://test-server/api/discovery/configs/", json=sample_config_list_response, status_code=200)
         m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", status_code=204)
-        client.delete_discovery_config_by_name_if_exists("my_config")
+        client.delete_discovery_config_by_name_if_exists("my_config", DiscoveryConfigType.database)
 
     assert m.request_history[0].method == "GET"
     assert m.request_history[1].method == "DELETE"
@@ -405,7 +407,7 @@ def test_delete_discovery_config_by_name_not_found(
 ) -> None:
     with requests_mock.Mocker() as m:
         m.get("http://test-server/api/discovery/configs/", json=sample_config_list_response, status_code=200)
-        client.delete_discovery_config_by_name_if_exists("nonexistent")
+        client.delete_discovery_config_by_name_if_exists("nonexistent", DiscoveryConfigType.database)
 
     assert m.call_count == 1
     assert m.request_history[0].method == "GET"
@@ -434,7 +436,28 @@ def test_delete_discovery_config_by_name_raises_when_server_omits_id(client: Dat
             status_code=200,
         )
         with pytest.raises(DataMasqueException, match="without an `id`"):
-            client.delete_discovery_config_by_name_if_exists("my_config")
+            client.delete_discovery_config_by_name_if_exists("my_config", DiscoveryConfigType.database)
+
+
+def test_delete_discovery_config_by_name_only_deletes_matching_type(client: DataMasqueClient) -> None:
+    shared_name_response = {
+        "count": 2,
+        "next": None,
+        "previous": None,
+        "results": [
+            {"id": CONFIG_ID_1, "name": "shared", "config_type": "database", "archived": False},
+            {"id": CONFIG_ID_2, "name": "shared", "config_type": "file", "archived": False},
+        ],
+    }
+
+    with requests_mock.Mocker() as m:
+        m.get("http://test-server/api/discovery/configs/", json=shared_name_response, status_code=200)
+        m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_2}/", status_code=204)
+        client.delete_discovery_config_by_name_if_exists("shared", DiscoveryConfigType.file)
+
+    assert m.request_history[-1].method == "DELETE"
+    assert CONFIG_ID_2 in m.request_history[-1].url
+    assert CONFIG_ID_1 not in m.request_history[-1].url
 
 
 def test_get_default_discovery_config_yaml(client: DataMasqueClient) -> None:
diff --git a/tests/test_rulesets.py b/tests/test_rulesets.py
index aff5f32..1200a68 100644
--- a/tests/test_rulesets.py
+++ b/tests/test_rulesets.py
@@ -101,13 +101,26 @@ def test_delete_ruleset_by_name(client, existing_rulesets_json):
             status_code=200,
         )
         m.delete("http://test-server/api/rulesets/2/", status_code=204)
-        client.delete_ruleset_by_name_if_exists("file_masking_ruleset")
+        client.delete_ruleset_by_name_if_exists("file_masking_ruleset", RulesetType.file)
 
     assert m.call_count == 2
     assert m.request_history[0].method == "GET"
     assert m.request_history[1].method == "DELETE"
 
 
+def test_delete_ruleset_by_name_skips_other_type(client, existing_rulesets_json):
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/v2/rulesets/",
+            json=existing_rulesets_json,
+            status_code=200,
+        )
+        client.delete_ruleset_by_name_if_exists("file_masking_ruleset", RulesetType.database)
+
+    assert m.call_count == 1
+    assert m.request_history[0].method == "GET"
+
+
 def test_delete_ruleset_that_does_not_exist(client, existing_rulesets_json):
     with requests_mock.Mocker() as m:
         m.get(
@@ -115,7 +128,7 @@ def test_delete_ruleset_that_does_not_exist(client, existing_rulesets_json):
             json=existing_rulesets_json,
             status_code=200,
         )
-        client.delete_ruleset_by_name_if_exists("not_a_ruleset")
+        client.delete_ruleset_by_name_if_exists("not_a_ruleset", RulesetType.database)
 
     assert m.call_count == 1
     assert m.request_history[0].method == "GET"

From db41755a75c18342296ea292b12e578c8641a9e0 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 13:02:19 +1200
Subject: [PATCH 19/26] fix: populate validation status on discovery-config
 create/update

Copy `is_valid`/`validation_error` back from the server response, matching the ruleset-library client.
---
 datamasque/client/discovery_configs.py | 38 ++++++++++++++++++--------
 tests/test_discovery_configs.py        | 28 ++++++++-----------
 2 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/datamasque/client/discovery_configs.py b/datamasque/client/discovery_configs.py
index d5e1a32..b8cb5d5 100644
--- a/datamasque/client/discovery_configs.py
+++ b/datamasque/client/discovery_configs.py
@@ -33,13 +33,10 @@ def get_discovery_config(self, config_id: DiscoveryConfigId) -> DiscoveryConfig:
         response = self.make_request("GET", f"/api/discovery/configs/{config_id}/")
         return DiscoveryConfig.model_validate(response.json())
 
-    def get_discovery_config_by_name(self, name: str, config_type: DiscoveryConfigType) -> Optional[DiscoveryConfig]:
-        """
-        Looks for a discovery config matching the given name and type (case-sensitive, exact match).
-
-        Config names are unique per type, so a type is required to identify a single config.
-        Returns it if found, otherwise `None`.
-        """
+    def _get_discovery_config_id_by_name(
+        self, name: str, config_type: DiscoveryConfigType
+    ) -> Optional[DiscoveryConfigId]:
+        """Return the id of the config matching name and type via a single list request, or `None`."""
 
         response = self.make_request(
             "GET",
@@ -57,19 +54,36 @@ def get_discovery_config_by_name(self, name: str, config_type: DiscoveryConfigTy
                 response=response,
             )
 
+        return config_id
+
+    def get_discovery_config_by_name(self, name: str, config_type: DiscoveryConfigType) -> Optional[DiscoveryConfig]:
+        """
+        Looks for a discovery config matching the given name and type (case-sensitive, exact match).
+
+        Config names are unique per type, so a type is required to identify a single config.
+        Returns it if found, otherwise `None`.
+        """
+
+        config_id = self._get_discovery_config_id_by_name(name, config_type)
+        if config_id is None:
+            return None
+
         return self.get_discovery_config(config_id)
 
     def create_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
         """
         Creates a new discovery config on the server.
 
-        Sets the config's server-assigned fields (`id`, `created`, `modified`) and returns the config.
+        Sets the config's server-assigned fields
+        (`id`, `is_valid`, `validation_error`, `created`, `modified`) and returns the config.
         """
 
         data = config.model_dump(exclude_none=True, by_alias=True, mode="json")
         response = self.make_request("POST", "/api/discovery/configs/", data=data)
         created = DiscoveryConfig.model_validate(response.json())
         config.id = created.id
+        config.is_valid = created.is_valid
+        config.validation_error = created.validation_error
         config.created = created.created
         config.modified = created.modified
         logger.info('Creation of discovery config "%s" successful', config.name)
@@ -89,6 +103,8 @@ def update_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
         data = config.model_dump(exclude_none=True, by_alias=True, mode="json")
         response = self.make_request("PUT", f"/api/discovery/configs/{config.id}/", data=data)
         updated = DiscoveryConfig.model_validate(response.json())
+        config.is_valid = updated.is_valid
+        config.validation_error = updated.validation_error
         config.modified = updated.modified
         logger.debug('Update of discovery config "%s" successful', config.name)
         return config
@@ -100,9 +116,9 @@ def create_or_update_discovery_config(self, config: DiscoveryConfig) -> Discover
         Sets the config's `id` property.
         """
 
-        existing = self.get_discovery_config_by_name(config.name, config.config_type)
-        if existing is not None:
-            config.id = existing.id
+        existing_id = self._get_discovery_config_id_by_name(config.name, config.config_type)
+        if existing_id is not None:
+            config.id = existing_id
             return self.update_discovery_config(config)
 
         return self.create_discovery_config(config)
diff --git a/tests/test_discovery_configs.py b/tests/test_discovery_configs.py
index d8bdf68..caa4b31 100644
--- a/tests/test_discovery_configs.py
+++ b/tests/test_discovery_configs.py
@@ -243,6 +243,7 @@ def test_create_discovery_config(client: DataMasqueClient, discovery_config: Dis
         "name": "test_config",
         "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
         "config_type": "database",
+        "is_valid": "in_progress",
         "archived": False,
         "created": "2025-06-01T10:00:00Z",
         "modified": "2025-06-01T10:00:00Z",
@@ -258,6 +259,7 @@ def test_create_discovery_config(client: DataMasqueClient, discovery_config: Dis
 
     assert result is discovery_config
     assert result.id == DiscoveryConfigId(CONFIG_ID_1)
+    assert result.is_valid is ValidationStatus.in_progress
     assert result.created == datetime.fromisoformat("2025-06-01T10:00:00+00:00")
     assert result.modified == datetime.fromisoformat("2025-06-01T10:00:00+00:00")
 
@@ -265,6 +267,11 @@ def test_create_discovery_config(client: DataMasqueClient, discovery_config: Dis
     assert request_body["name"] == "test_config"
     assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n"
     assert request_body["config_type"] == "database"
+    # Server-managed read-only fields must not be echoed back in the request body.
+    assert "id" not in request_body
+    assert "is_valid" not in request_body
+    assert "created" not in request_body
+    assert "modified" not in request_body
 
 
 def test_update_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
@@ -294,6 +301,8 @@ def test_update_discovery_config(client: DataMasqueClient, discovery_config: Dis
     request_body = m.last_request.json()
     assert request_body["name"] == "test_config"
     assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n"
+    # The id identifies the config in the URL, not the request body.
+    assert "id" not in request_body
 
 
 def test_update_discovery_config_no_id_raises(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
@@ -339,15 +348,6 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
             },
         ],
     }
-    detail_response = {
-        "id": CONFIG_ID_1,
-        "name": "test_config",
-        "config_yaml": "labels: []",
-        "config_type": "database",
-        "archived": False,
-        "created": "2025-06-01T10:00:00Z",
-        "modified": "2025-06-01T10:00:00Z",
-    }
     update_response = {
         "id": CONFIG_ID_1,
         "name": "test_config",
@@ -360,11 +360,6 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
 
     with requests_mock.Mocker() as m:
         m.get("http://test-server/api/discovery/configs/", json=list_response, status_code=200)
-        m.get(
-            f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/",
-            json=detail_response,
-            status_code=200,
-        )
         m.put(
             f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/",
             json=update_response,
@@ -373,9 +368,8 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
         result = client.create_or_update_discovery_config(discovery_config)
 
     assert result.id == DiscoveryConfigId(CONFIG_ID_1)
-    assert m.request_history[0].method == "GET"
-    assert m.request_history[1].method == "GET"
-    assert m.request_history[2].method == "PUT"
+    # The upsert resolves the id from the list response alone — no detail GET before the PUT.
+    assert [r.method for r in m.request_history] == ["GET", "PUT"]
 
 
 def test_delete_discovery_config_by_id(client: DataMasqueClient) -> None:

From f1464c722f377ea38e25a02459b86a92c6effc90 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 13:02:19 +1200
Subject: [PATCH 20/26] fix: tighten file-discovery result models

Require the fields the server always returns; type `connection` as `RunConnectionRef`.
---
 datamasque/client/models/discovery.py | 31 ++++++++++++++-------------
 tests/test_discovery.py               | 30 ++++++++++++++++++++++++++
 tests/test_runs.py                    | 11 +++++++++-
 3 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 6a41b1c..7bccefa 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -9,6 +9,7 @@
 from datamasque.client.models.data_selection import HashColumnsTableConfig, Locator, UserSelection
 from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, unwrap_discovery_config_id
 from datamasque.client.models.pagination import Page
+from datamasque.client.models.runs import RunConnectionRef
 
 
 class InDataDiscoveryRule(BaseModel):
@@ -340,11 +341,11 @@ class FileDiscoveryMatch(BaseModel):
 
     model_config = ConfigDict(extra="allow")
 
-    categories: Optional[list[str]] = None
-    flagged_by: Optional[str] = None
-    description: Optional[str] = None
-    label: Optional[str] = None
-    hit_ratio: Optional[int] = None
+    flagged_by: str
+    description: str
+    label: Optional[str] = None  # Omitted for non-sensitive and ignored matches.
+    categories: Optional[list[str]] = None  # Omitted for ignored matches.
+    hit_ratio: Optional[int] = None  # None for metadata matches, percentage 0-100 for IDD matches.
 
 
 class FileDiscoveryLocatorResult(BaseModel):
@@ -352,9 +353,9 @@ class FileDiscoveryLocatorResult(BaseModel):
 
     model_config = ConfigDict(extra="allow")
 
-    locator: Optional[Locator] = None
-    matches: Optional[list[FileDiscoveryMatch]] = None
-    data_types: Optional[list[str]] = None
+    locator: Locator
+    matches: list[FileDiscoveryMatch]
+    data_types: list[str]
 
 
 class FileDiscoveryFile(BaseModel):
@@ -362,8 +363,8 @@ class FileDiscoveryFile(BaseModel):
 
     model_config = ConfigDict(extra="allow")
 
-    path: Optional[str] = None
-    file_type: Optional[str] = None
+    path: str
+    file_type: str
     delimiter: Optional[str] = None
     encoding: Optional[str] = None
 
@@ -373,8 +374,8 @@ class FileDiscoveryResult(BaseModel):
 
     model_config = ConfigDict(extra="allow")
 
-    id: Optional[int] = None
-    connection: Optional[Any] = None
-    file_type: Optional[str] = None
-    files: Optional[list[FileDiscoveryFile]] = None
-    results: Optional[list[FileDiscoveryLocatorResult]] = None
+    id: int
+    connection: RunConnectionRef
+    file_type: str
+    files: list[FileDiscoveryFile]
+    results: list[FileDiscoveryLocatorResult]
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 8605474..a5ede9b 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -15,6 +15,7 @@
     FileDataDiscoveryFromConfigRequest,
     FileDataDiscoveryOptions,
     FileDataDiscoveryRequest,
+    FileDiscoveryResult,
     FileFilter,
     FileFilterMatchAgainst,
     FileRulesetGenerationRequest,
@@ -837,6 +838,35 @@ def test_file_filter_rejects_empty_pattern():
         FileFilter(regex="", glob="*.csv")
 
 
+def test_file_discovery_result_parses_server_response():
+    """A file-discovery-results record parses; match label/categories stay optional for non-sensitive matches."""
+    result = FileDiscoveryResult.model_validate(
+        {
+            "id": 7,
+            "connection": {"id": "conn-1", "name": "my files"},
+            "file_type": "csv",
+            "files": [{"path": "data/people.csv", "file_type": "csv", "delimiter": ",", "encoding": "utf-8"}],
+            "results": [
+                {
+                    "locator": "email",
+                    "data_types": ["string"],
+                    "matches": [
+                        {"flagged_by": "MDD", "description": "Email", "label": "email", "categories": ["PII"]},
+                        {"flagged_by": "MDD", "description": "Not sensitive"},
+                    ],
+                }
+            ],
+        }
+    )
+
+    assert result.id == 7
+    assert result.connection.name == "my files"
+    assert result.files[0].path == "data/people.csv"
+    non_sensitive_match = result.results[0].matches[1]
+    assert non_sensitive_match.label is None
+    assert non_sensitive_match.categories is None
+
+
 def test_file_data_discovery_ignore_rules_serialize():
     """`in_data_discovery.ignore_rules` round-trips into the wire payload."""
     req = FileDataDiscoveryRequest(
diff --git a/tests/test_runs.py b/tests/test_runs.py
index 24cbcd6..d4be2e8 100644
--- a/tests/test_runs.py
+++ b/tests/test_runs.py
@@ -48,12 +48,21 @@ def test_get_file_data_discovery_report(client):
     with requests_mock.Mocker() as m:
         m.get(
             "http://test-server/api/runs/1/file-discovery-results/",
-            json=[{"id": 1, "file_type": "csv", "files": [], "results": []}],
+            json=[
+                {
+                    "id": 1,
+                    "connection": {"id": "conn-1", "name": "files"},
+                    "file_type": "csv",
+                    "files": [],
+                    "results": [],
+                }
+            ],
             status_code=200,
         )
         results = client.get_file_data_discovery_report(RunId(1))
         assert len(results) == 1
         assert results[0].id == 1
+        assert results[0].connection.name == "files"
 
 
 def test_unfinished_run_str_with_destination():

From eb5e26ef81cf6015f029624ccdccfd19840df485 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 13:02:19 +1200
Subject: [PATCH 21/26] fix: raise when a generated-ruleset archive contains no
 rulesets

---
 datamasque/client/discovery.py |  7 +++++++
 tests/test_discovery.py        | 27 +++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index 392c32e..94de3cb 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -192,6 +192,13 @@ def get_generated_rulesets(self, connection_id: ConnectionId) -> list[Ruleset]:
                         with zip_file.open(file_info) as file:
                             yaml_content = file.read().decode("utf-8")
                             rulesets.append(Ruleset(name=Path(file_info.filename).stem, yaml=yaml_content))
+
+            if not rulesets:
+                raise DataMasqueException(
+                    f"Ruleset generation for connection {connection_id} reported `finished` "
+                    f"but the downloaded archive contained no rulesets."
+                )
+
             return rulesets
 
         generated = response.json().get("generated_ruleset")
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index a5ede9b..298f160 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -193,6 +193,33 @@ def test_get_generated_rulesets_success(client):
         assert rulesets[1].yaml == yaml_content_2.decode("utf-8")
 
 
+def test_get_generated_rulesets_empty_archive_raises(client):
+    """A finished task whose download archive contains no ruleset files raises a clear error."""
+    connection_id = ConnectionId("1")
+
+    with requests_mock.Mocker() as m:
+        m.get(
+            f"http://test-server/api/async-generate-ruleset/{connection_id}/",
+            json={"status": "finished"},
+            status_code=200,
+        )
+
+        zip_buffer = BytesIO()
+        with zipfile.ZipFile(zip_buffer, "w") as zip_file:
+            zip_file.writestr("README.txt", "no rulesets here")
+        zip_buffer.seek(0)
+
+        m.get(
+            f"http://test-server/api/async-generate-ruleset/{connection_id}/download-rulesets/",
+            content=zip_buffer.getvalue(),
+            headers={"Content-Disposition": 'attachment; filename="rulesets.zip"'},
+            status_code=200,
+        )
+
+        with pytest.raises(DataMasqueException, match="contained no rulesets"):
+            client.get_generated_rulesets(connection_id)
+
+
 def test_get_generated_rulesets_from_selection_success(client):
     """Non-CSV async RG: server 303s to the task-status endpoint, whose JSON body carries `generated_ruleset`."""
     connection_id = ConnectionId("1")

From 45877a7c8d4382552f92584a335f088da2606958 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 13:02:19 +1200
Subject: [PATCH 22/26] docs: tidy the unreleased changelog

---
 HISTORY.rst | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index e1803c0..7f43206 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -5,18 +5,17 @@ History
 1.1.0 (unreleased)
 ------------------
 
-* Added discovery-config management APIs (``list_discovery_configs``, ``create_discovery_config``, and friends).
+* Added discovery configuration models and management APIs.
 * Added schema-discovery and file-data-discovery APIs that take a saved discovery configuration
   (``start_schema_discovery_run_from_config`` / ``start_file_data_discovery_run_from_config``).
   Adoption is recommended; the older APIs that take individual options will be deprecated in a future release.
 * Corrected the file-data-discovery ``include``/``skip`` filter syntax and added ``ignore_rules`` support.
-* Added ``InvalidDiscoveryConfigError``, raised when a discovery run can't start due to an invalid discovery config.
-* Added ``is_user_subscribed`` to ``MaskingRunRequest``
-  to subscribe the requesting user to a run's email notifications.
+* Added ``InvalidDiscoveryConfigError`` and ``DiscoveryConfigNotFoundError``,
+  raised when a discovery run can't start due to an unusable or missing discovery config.
+* Added ``is_user_subscribed`` to ``MaskingRunRequest`` to subscribe the requesting user to a run's email notifications.
 * Added ``auto_pull`` / ``auto_pull_branch`` to ``MaskingRunOptions``
   to refresh the run's ruleset from git before starting.
-* Added ``validation_error`` (and ``validation_error_type`` for rulesets)
-  to ``Ruleset`` and ``RulesetLibrary``.
+* Added ``validation_error`` (and ``validation_error_type`` for rulesets) to ``Ruleset`` and ``RulesetLibrary``.
 * Exposed git provenance on ``Ruleset`` and ``RulesetLibrary`` as a nested ``git`` field (``GitSnapshot``).
 * Read-only fields (``id``, ``is_valid``, ``validation_error``, etc.)
   are no longer echoed back in ``Ruleset`` / ``RulesetLibrary`` create/update request bodies.

From cb865d16d4c1f31b92f6c9dfe9663b6968f001ec Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 13:49:43 +1200
Subject: [PATCH 23/26] docs: drop references to "archive" from docstrings

---
 datamasque/client/discovery_configs.py | 6 +++---
 datamasque/client/ruleset_libraries.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/datamasque/client/discovery_configs.py b/datamasque/client/discovery_configs.py
index b8cb5d5..60f448d 100644
--- a/datamasque/client/discovery_configs.py
+++ b/datamasque/client/discovery_configs.py
@@ -19,7 +19,7 @@ def iter_discovery_configs(self) -> Iterator[DiscoveryConfig]:
 
     def list_discovery_configs(self) -> list[DiscoveryConfig]:
         """
-        Lists all (non-archived) discovery configs.
+        Lists all discovery configs.
 
         Note: the YAML content is not included in the list response for performance.
         Use `get_discovery_config` to retrieve the full config with its YAML body.
@@ -125,7 +125,7 @@ def create_or_update_discovery_config(self, config: DiscoveryConfig) -> Discover
 
     def delete_discovery_config_by_id_if_exists(self, config_id: DiscoveryConfigId) -> None:
         """
-        Archives the discovery config with the given ID.
+        Deletes the discovery config with the given ID.
 
         No-op if the config does not exist.
         """
@@ -134,7 +134,7 @@ def delete_discovery_config_by_id_if_exists(self, config_id: DiscoveryConfigId)
 
     def delete_discovery_config_by_name_if_exists(self, name: str, config_type: DiscoveryConfigType) -> None:
         """
-        Archives the discovery config with the given name and type.
+        Deletes the discovery config with the given name and type.
 
         Config names are unique per type, so a type is required to identify a single config.
         No-op if no such config exists.
diff --git a/datamasque/client/ruleset_libraries.py b/datamasque/client/ruleset_libraries.py
index 8943553..e734bc2 100644
--- a/datamasque/client/ruleset_libraries.py
+++ b/datamasque/client/ruleset_libraries.py
@@ -115,7 +115,7 @@ def create_or_update_ruleset_library(self, library: RulesetLibrary) -> RulesetLi
 
     def delete_ruleset_library_by_id_if_exists(self, library_id: RulesetLibraryId, *, force: bool = False) -> None:
         """
-        Deletes (archives) the ruleset library with the given ID.
+        Deletes the ruleset library with the given ID.
 
         No-op if the library does not exist.
 
@@ -144,13 +144,13 @@ def delete_ruleset_library_by_name_if_exists(
             self.delete_ruleset_library_by_id_if_exists(lib.id, force=force)
 
     def iter_rulesets_using_library(self, library_id: RulesetLibraryId) -> Iterator[Ruleset]:
-        """Lazily iterate non-archived rulesets that import the given library."""
+        """Lazily iterate rulesets that import the given library."""
 
         return self._iter_paginated(f"/api/ruleset-libraries/{library_id}/rulesets/", model=Ruleset)
 
     def list_rulesets_using_library(self, library_id: RulesetLibraryId) -> list[Ruleset]:
         """
-        Lists non-archived rulesets that import the given library.
+        Lists rulesets that import the given library.
 
         Note: The YAML content is not included in the response for performance.
         Each returned Ruleset will have an empty string for `yaml`.

From edfa6e049779813346963b05e2bc68300283e5f6 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 14:20:36 +1200
Subject: [PATCH 24/26] refactor: collapse duplicated saved-config discovery
 start methods

---
 datamasque/client/discovery.py | 50 +++++++++++-----------------------
 1 file changed, 16 insertions(+), 34 deletions(-)

diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index 94de3cb..0d2155b 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -297,29 +297,7 @@ def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromCon
             FailedToStartError: the run failed to start for any other reason.
         """
 
-        data = request.model_dump(exclude_none=True, mode="json")
-        # The server requires `discovery_config` to be present; a null selects its built-in defaults,
-        # so send it explicitly rather than letting `exclude_none` drop a None.
-        data.setdefault("discovery_config", None)
-        response = self.make_request(
-            "POST",
-            "/api/schema-discovery/v2/",
-            data=data,
-            require_status_check=False,
-        )
-        run_data = response.json() if response.content else {}
-
-        if response.status_code == 201:
-            logger.info("Schema discovery run %s started successfully", run_data["id"])
-            return RunId(run_data["id"])
-
-        logger.error("Schema discovery run failed to start: %s", run_data)
-        self._maybe_raise_discovery_config_error(run_data, response, "Schema discovery")
-        raise FailedToStartError(
-            f"Schema discovery run failed to start "
-            f"(server responded with status {response.status_code}: {response.text}).",
-            response=response,
-        )
+        return self._start_discovery_run_from_config(request, "/api/schema-discovery/v2/", "Schema discovery")
 
     def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFromConfigRequest) -> RunId:
         """
@@ -341,27 +319,31 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
             FailedToStartError: the run failed to start for any other reason.
         """
 
+        return self._start_discovery_run_from_config(request, "/api/run-file-data-discovery/v2/", "File data discovery")
+
+    def _start_discovery_run_from_config(
+        self,
+        request: Union[SchemaDiscoveryFromConfigRequest, FileDataDiscoveryFromConfigRequest],
+        path: str,
+        run_kind: str,
+    ) -> RunId:
+        """Post a saved-config discovery request and return its run id, classifying config errors on failure."""
+
         data = request.model_dump(exclude_none=True, mode="json")
         # The server requires `discovery_config` to be present; a null selects its built-in defaults,
         # so send it explicitly rather than letting `exclude_none` drop a None.
         data.setdefault("discovery_config", None)
-        response = self.make_request(
-            "POST",
-            "/api/run-file-data-discovery/v2/",
-            data=data,
-            require_status_check=False,
-        )
+        response = self.make_request("POST", path, data=data, require_status_check=False)
         run_data = response.json() if response.content else {}
 
         if response.status_code == 201:
-            logger.info("File data discovery run %s started successfully", run_data["id"])
+            logger.info("%s run %s started successfully", run_kind, run_data["id"])
             return RunId(run_data["id"])
 
-        logger.error("File data discovery run failed to start: %s", run_data)
-        self._maybe_raise_discovery_config_error(run_data, response, "File data discovery")
+        logger.error("%s run failed to start: %s", run_kind, run_data)
+        self._maybe_raise_discovery_config_error(run_data, response, run_kind)
         raise FailedToStartError(
-            f"File data discovery run failed to start "
-            f"(server responded with status {response.status_code}: {response.text}).",
+            f"{run_kind} run failed to start (server responded with status {response.status_code}: {response.text}).",
             response=response,
         )
 

From 55c1dc800c5bd02b7ce7755c6c5085689f3f5393 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 14:46:18 +1200
Subject: [PATCH 25/26] feat: add get_discovery_run_config_snapshot_yaml

---
 HISTORY.rst                    |  2 ++
 datamasque/client/discovery.py | 19 +++++++++++++
 tests/test_discovery.py        | 50 ++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/HISTORY.rst b/HISTORY.rst
index 7f43206..e239d5e 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -12,6 +12,8 @@ History
 * Corrected the file-data-discovery ``include``/``skip`` filter syntax and added ``ignore_rules`` support.
 * Added ``InvalidDiscoveryConfigError`` and ``DiscoveryConfigNotFoundError``,
   raised when a discovery run can't start due to an unusable or missing discovery config.
+* Added ``get_discovery_run_config_snapshot_yaml`` to retrieve the discovery-config YAML
+  that was effective at the start of a given discovery run.
 * Added ``is_user_subscribed`` to ``MaskingRunRequest`` to subscribe the requesting user to a run's email notifications.
 * Added ``auto_pull`` / ``auto_pull_branch`` to ``MaskingRunOptions``
   to refresh the run's ruleset from git before starting.
diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index 0d2155b..f6f3850 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -440,3 +440,22 @@ def get_file_data_discovery_report(self, run_id: RunId) -> list[FileDiscoveryRes
 
         response = self.make_request("GET", f"api/runs/{run_id}/file-discovery-results/")
         return [FileDiscoveryResult.model_validate(d) for d in response.json()]
+
+    def get_discovery_run_config_snapshot_yaml(self, run_id: RunId, *, timezone: Optional[str] = None) -> str:
+        """
+        Returns the discovery-config YAML that was effective at the start of the given discovery run.
+
+        The YAML is prefixed with a commented provenance header naming the saved config
+        (or the built-in defaults) the run used, and whether it has since been modified or deleted.
+        `timezone`, a `±HH:MM` UTC offset, sets the timezone of the header timestamp; the server defaults to UTC.
+        """
+
+        params = {"timezone": timezone} if timezone is not None else None
+        response = self.make_request("GET", f"/api/discovery/runs/{run_id}/config-snapshot/", params=params)
+        with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
+            names = zip_file.namelist()
+            if not names:
+                raise DataMasqueException(f"Discovery run {run_id} config snapshot archive contained no files.")
+
+            with zip_file.open(names[0]) as snapshot_file:
+                return snapshot_file.read().decode("utf-8")
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 298f160..42debfd 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -1213,3 +1213,53 @@ def test_start_schema_discovery_run_from_config_non_discovery_config_400_still_r
                 ),
             )
         assert not isinstance(exc_info.value, InvalidDiscoveryConfigError)
+
+
+def _zip_bytes(*members: tuple[str, str]) -> bytes:
+    buffer = BytesIO()
+    with zipfile.ZipFile(buffer, "w") as zip_file:
+        for name, content in members:
+            zip_file.writestr(name, content)
+
+    return buffer.getvalue()
+
+
+def test_get_discovery_run_config_snapshot_yaml_returns_yaml(client):
+    """The single zipped snapshot member is unzipped and returned as a YAML string."""
+    snapshot = "# Discovery configuration: my_cfg.\nlabels: []\nmetadata_rules: []\n"
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/runs/7/config-snapshot/",
+            content=_zip_bytes(("config-7.yaml", snapshot)),
+            headers={"Content-Disposition": 'attachment; filename="config-7.yaml.zip"'},
+            status_code=200,
+        )
+        result = client.get_discovery_run_config_snapshot_yaml(RunId(7))
+
+    assert result == snapshot
+    assert "timezone" not in (m.last_request.qs or {})
+
+
+def test_get_discovery_run_config_snapshot_yaml_forwards_timezone(client):
+    """An explicit `timezone` is forwarded as a query param for the provenance-header timestamp."""
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/runs/7/config-snapshot/",
+            content=_zip_bytes(("config-7.yaml", "labels: []\n")),
+            status_code=200,
+        )
+        client.get_discovery_run_config_snapshot_yaml(RunId(7), timezone="+12:00")
+
+    assert m.last_request.qs["timezone"] == ["+12:00"]
+
+
+def test_get_discovery_run_config_snapshot_yaml_empty_archive_raises(client):
+    """An archive with no members raises a clear error rather than an IndexError."""
+    with requests_mock.Mocker() as m:
+        m.get(
+            "http://test-server/api/discovery/runs/7/config-snapshot/",
+            content=_zip_bytes(),
+            status_code=200,
+        )
+        with pytest.raises(DataMasqueException, match="contained no files"):
+            client.get_discovery_run_config_snapshot_yaml(RunId(7))

From d0c59d5508775b6a206f2f220466bc0c37bb20ea Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Wed, 24 Jun 2026 15:02:32 +1200
Subject: [PATCH 26/26] chore: release 1.1.0

---
 HISTORY.rst    | 2 +-
 pyproject.toml | 2 +-
 setup.cfg      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index e239d5e..dc72f9a 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,7 +2,7 @@
 History
 =======
 
-1.1.0 (unreleased)
+1.1.0 (2026-06-24)
 ------------------
 
 * Added discovery configuration models and management APIs.
diff --git a/pyproject.toml b/pyproject.toml
index 7ba57f5..1d226e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "datamasque-python"
-version = "1.1.0.dev0"
+version = "1.1.0"
 description = "Official Python client for the DataMasque data-masking API."
 authors = [
     { name = "DataMasque Ltd" },
diff --git a/setup.cfg b/setup.cfg
index 24d750a..26b71a7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.1.0.dev0
+current_version = 1.1.0
 commit = True
 tag = True