Skip to content

Commit 3b65b2b

Browse files
Merge pull request #20 from datamasque/discovery-run-type
feat: Add DiscoveryConfigType and DiscoveryConfigNotFoundError
2 parents 1fd7e6f + a071a09 commit 3b65b2b

6 files changed

Lines changed: 98 additions & 12 deletions

File tree

datamasque/client/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
DataMasqueNotReadyError,
1818
DataMasqueTransportError,
1919
DataMasqueUserError,
20+
DiscoveryConfigNotFoundError,
2021
FailedToStartError,
2122
IfmAuthError,
2223
InvalidDiscoveryConfigError,
@@ -77,7 +78,7 @@
7778
SchemaDiscoveryResult,
7879
TableConstraints,
7980
)
80-
from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId
81+
from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, DiscoveryConfigType
8182
from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
8283
from datamasque.client.models.files import (
8384
DataMasqueFile,
@@ -146,6 +147,8 @@
146147
"DatabricksConnectionConfig",
147148
"DiscoveryConfig",
148149
"DiscoveryConfigId",
150+
"DiscoveryConfigNotFoundError",
151+
"DiscoveryConfigType",
149152
"DiscoveryMatch",
150153
"DynamoConnectionConfig",
151154
"FailedToStartError",

datamasque/client/discovery.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from datamasque.client.exceptions import (
1111
AsyncRulesetGenerationInProgressError,
1212
DataMasqueException,
13+
DiscoveryConfigNotFoundError,
1314
FailedToStartError,
1415
InvalidDiscoveryConfigError,
1516
)
@@ -362,15 +363,29 @@ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFr
362363
# (a `{"message", "line_number", "column_number"}` dict per error).
363364
DISCOVERY_CONFIG_ERROR_FIELDS = ("discovery_config", "config_yaml")
364365

366+
# The server emits this phrase when the referenced config id cannot be found
367+
# (it was never created, or it has since been deleted).
368+
# There is no structured error code in the body,
369+
# so this stable message is the only signal
370+
# that separates a bad reference from a present-but-unusable config
371+
# (e.g. a non-`valid` validation state, which is also a string under `discovery_config`).
372+
MISSING_DISCOVERY_CONFIG_SIGNATURE = "object does not exist"
373+
365374
@classmethod
366375
def _maybe_raise_discovery_config_error(cls, run_data: object, response: Response, run_kind: str) -> None:
367-
"""Raise `InvalidDiscoveryConfigError` if the server's 400 body cites the discovery config."""
376+
"""Raise a discovery-config error if the server's 400 body cites the discovery config."""
368377
if not isinstance(run_data, dict):
369378
return
370379

371380
for field in cls.DISCOVERY_CONFIG_ERROR_FIELDS:
372381
if field in run_data:
373382
detail = cls._format_discovery_config_error(run_data[field])
383+
if cls.MISSING_DISCOVERY_CONFIG_SIGNATURE in detail:
384+
raise DiscoveryConfigNotFoundError(
385+
f"{run_kind} run failed to start: the referenced discovery config does not exist: {detail}",
386+
response=response,
387+
)
388+
374389
raise InvalidDiscoveryConfigError(
375390
f"{run_kind} run failed to start due to discovery config error: {detail}",
376391
response=response,

datamasque/client/exceptions.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,24 @@ class InvalidLibraryError(FailedToStartError):
4343

4444
class InvalidDiscoveryConfigError(FailedToStartError):
4545
"""
46-
Raised when a discovery run fails to start due to an unusable discovery config.
46+
Raised when a discovery run fails to start because the referenced config exists but is unusable.
4747
48-
The referenced config is missing, archived, or not in a `valid` validation state.
48+
The config is present but not in a `valid` validation state,
49+
is the wrong type for the connection,
50+
or carries YAML the server rejects at trigger time.
51+
A config that cannot be found at all raises `DiscoveryConfigNotFoundError` instead.
52+
"""
53+
54+
55+
class DiscoveryConfigNotFoundError(FailedToStartError):
56+
"""
57+
Raised when a discovery run references a discovery config that the server cannot find.
58+
59+
The config does not exist — it was never created, or it has since been deleted.
60+
Unlike `InvalidDiscoveryConfigError`,
61+
this signals a bad reference (a deleted or mistyped id)
62+
rather than a present but unusable config,
63+
so callers can treat it as a caller/setup error.
4964
"""
5065

5166

datamasque/client/models/discovery_config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import enum
12
from datetime import datetime
23
from typing import Any, NewType, Optional
34

@@ -8,6 +9,13 @@
89
DiscoveryConfigId = NewType("DiscoveryConfigId", str)
910

1011

12+
class DiscoveryConfigType(enum.Enum):
13+
"""Which discovery config variant a config targets: database (qualified columns) or file (locators)."""
14+
15+
database = "database"
16+
file = "file"
17+
18+
1119
def unwrap_discovery_config_id(value: Any) -> Any:
1220
"""
1321
Coerce a `DiscoveryConfig` to its `id`; pass other values through unchanged.
@@ -33,6 +41,7 @@ class DiscoveryConfig(BaseModel):
3341

3442
name: str
3543
yaml: Optional[str] = Field(default=None, alias="config_yaml")
44+
config_type: DiscoveryConfigType
3645
id: Optional[DiscoveryConfigId] = None
3746
# Server-managed validation surface, populated by the DataMasque server.
3847
# `is_valid` may be `in_progress` immediately after creating a large config,

tests/test_discovery.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
AsyncRulesetGenerationInProgressError,
3131
DataMasqueApiError,
3232
DataMasqueException,
33+
DiscoveryConfigNotFoundError,
3334
FailedToStartError,
3435
InvalidDiscoveryConfigError,
3536
)
@@ -864,14 +865,14 @@ def test_schema_discovery_from_config_request_accepts_discovery_config_id():
864865

865866
def test_schema_discovery_from_config_request_unwraps_discovery_config_model():
866867
"""Passing a full `DiscoveryConfig` object substitutes its `id` for the wire payload."""
867-
config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
868+
config = DiscoveryConfig(name="my_cfg", config_type="database", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
868869
req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
869870
assert req.model_dump(exclude_none=True, mode="json")["discovery_config"] == DISCOVERY_CONFIG_ID
870871

871872

872873
def test_schema_discovery_from_config_request_rejects_unsaved_discovery_config():
873874
"""A `DiscoveryConfig` without an `id` cannot be used yet — raises immediately."""
874-
config = DiscoveryConfig(name="my_cfg")
875+
config = DiscoveryConfig(name="my_cfg", config_type="database")
875876
with pytest.raises(ValueError, match="id is None"):
876877
SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
877878

@@ -974,7 +975,7 @@ def test_start_schema_discovery_run_from_config_sends_schemas(client):
974975

975976
def test_start_file_data_discovery_run_from_config_sends_discovery_config(client):
976977
"""`start_file_data_discovery_run_from_config` posts only the connection and discovery_config id."""
977-
config = DiscoveryConfig(name="my_cfg", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
978+
config = DiscoveryConfig(name="my_cfg", config_type="file", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID))
978979
req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config)
979980
with requests_mock.Mocker() as m:
980981
m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 99}, status_code=201)
@@ -1042,21 +1043,45 @@ def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_
10421043
)
10431044

10441045

1045-
def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_when_missing(client):
1046-
"""A 400 from DRF's PrimaryKeyRelatedField (config not found / archived) is also classified."""
1046+
def test_start_schema_discovery_run_from_config_raises_not_found_when_missing(client):
1047+
"""
1048+
A 400 for a config that cannot be found raises `DiscoveryConfigNotFoundError`.
1049+
1050+
This is a bad reference rather than an unusable-but-present config,
1051+
so it must not be conflated with `InvalidDiscoveryConfigError`.
1052+
The not-found subclass still inherits `FailedToStartError` for callers that catch the base.
1053+
"""
10471054
with requests_mock.Mocker() as m:
10481055
m.post(
10491056
"http://test-server/api/schema-discovery/v2/",
10501057
json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']},
10511058
status_code=400,
10521059
)
1053-
with pytest.raises(InvalidDiscoveryConfigError, match="object does not exist"):
1060+
with pytest.raises(DiscoveryConfigNotFoundError, match="object does not exist") as exc_info:
10541061
client.start_schema_discovery_run_from_config(
10551062
SchemaDiscoveryFromConfigRequest(
10561063
connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
10571064
),
10581065
)
10591066

1067+
assert isinstance(exc_info.value, FailedToStartError)
1068+
1069+
1070+
def test_start_file_data_discovery_run_from_config_raises_not_found_when_missing(client):
1071+
"""A not-found saved config on the file-data trigger also raises `DiscoveryConfigNotFoundError`."""
1072+
with requests_mock.Mocker() as m:
1073+
m.post(
1074+
"http://test-server/api/run-file-data-discovery/v2/",
1075+
json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']},
1076+
status_code=400,
1077+
)
1078+
with pytest.raises(DiscoveryConfigNotFoundError, match="object does not exist"):
1079+
client.start_file_data_discovery_run_from_config(
1080+
FileDataDiscoveryFromConfigRequest(
1081+
connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)
1082+
),
1083+
)
1084+
10601085

10611086
def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_on_broken_yaml(client):
10621087
"""

tests/test_discovery_configs.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,15 @@ def sample_config_list_response() -> dict[str, Any]:
2929
{
3030
"id": CONFIG_ID_1,
3131
"name": "my_config",
32+
"config_type": "database",
3233
"archived": False,
3334
"created": "2025-01-01T12:00:00Z",
3435
"modified": "2025-01-02T12:00:00Z",
3536
},
3637
{
3738
"id": CONFIG_ID_2,
3839
"name": "another_config",
40+
"config_type": "database",
3941
"archived": False,
4042
"created": "2025-02-01T12:00:00Z",
4143
"modified": "2025-02-02T12:00:00Z",
@@ -50,6 +52,7 @@ def sample_config_detail_response() -> dict[str, Any]:
5052
"id": CONFIG_ID_1,
5153
"name": "my_config",
5254
"config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
55+
"config_type": "database",
5356
"archived": False,
5457
"created": "2025-01-01T12:00:00Z",
5558
"modified": "2025-01-02T12:00:00Z",
@@ -61,6 +64,7 @@ def discovery_config() -> DiscoveryConfig:
6164
return DiscoveryConfig(
6265
name="test_config",
6366
yaml="labels: []\nmetadata_rules: []\nidd_rules: []\n",
67+
config_type="database",
6468
)
6569

6670

@@ -89,13 +93,15 @@ def test_list_discovery_configs_pagination(client: DataMasqueClient) -> None:
8993
{
9094
"id": CONFIG_ID_1,
9195
"name": "c1",
96+
"config_type": "database",
9297
"archived": False,
9398
"created": "2025-01-01T12:00:00Z",
9499
"modified": "2025-01-01T12:00:00Z",
95100
},
96101
{
97102
"id": CONFIG_ID_2,
98103
"name": "c2",
104+
"config_type": "database",
99105
"archived": False,
100106
"created": "2025-01-01T12:00:00Z",
101107
"modified": "2025-01-01T12:00:00Z",
@@ -110,6 +116,7 @@ def test_list_discovery_configs_pagination(client: DataMasqueClient) -> None:
110116
{
111117
"id": "cccccccc-1111-2222-3333-444444444444",
112118
"name": "c3",
119+
"config_type": "database",
113120
"archived": False,
114121
"created": "2025-01-01T12:00:00Z",
115122
"modified": "2025-01-01T12:00:00Z",
@@ -162,6 +169,7 @@ def test_get_discovery_config_by_name_found(
162169
{
163170
"id": CONFIG_ID_1,
164171
"name": "my_config",
172+
"config_type": "database",
165173
"archived": False,
166174
"created": "2025-01-01T12:00:00Z",
167175
"modified": "2025-01-02T12:00:00Z",
@@ -209,6 +217,7 @@ def test_get_discovery_config_by_name_raises_when_server_omits_id(client: DataMa
209217
"results": [
210218
{
211219
"name": "my_config",
220+
"config_type": "database",
212221
"archived": False,
213222
"created": "2025-01-01T12:00:00Z",
214223
"modified": "2025-01-02T12:00:00Z",
@@ -231,6 +240,7 @@ def test_create_discovery_config(client: DataMasqueClient, discovery_config: Dis
231240
"id": CONFIG_ID_1,
232241
"name": "test_config",
233242
"config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
243+
"config_type": "database",
234244
"archived": False,
235245
"created": "2025-06-01T10:00:00Z",
236246
"modified": "2025-06-01T10:00:00Z",
@@ -252,6 +262,7 @@ def test_create_discovery_config(client: DataMasqueClient, discovery_config: Dis
252262
request_body = m.last_request.json()
253263
assert request_body["name"] == "test_config"
254264
assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n"
265+
assert request_body["config_type"] == "database"
255266

256267

257268
def test_update_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None:
@@ -261,6 +272,7 @@ def test_update_discovery_config(client: DataMasqueClient, discovery_config: Dis
261272
"id": CONFIG_ID_1,
262273
"name": "test_config",
263274
"config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
275+
"config_type": "database",
264276
"archived": False,
265277
"created": "2025-06-01T10:00:00Z",
266278
"modified": "2025-06-02T10:00:00Z",
@@ -293,6 +305,7 @@ def test_create_or_update_discovery_config_create(client: DataMasqueClient, disc
293305
"id": CONFIG_ID_1,
294306
"name": "test_config",
295307
"config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
308+
"config_type": "database",
296309
"archived": False,
297310
"created": "2025-06-01T10:00:00Z",
298311
"modified": "2025-06-01T10:00:00Z",
@@ -317,6 +330,7 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
317330
{
318331
"id": CONFIG_ID_1,
319332
"name": "test_config",
333+
"config_type": "database",
320334
"archived": False,
321335
"created": "2025-06-01T10:00:00Z",
322336
"modified": "2025-06-01T10:00:00Z",
@@ -327,6 +341,7 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
327341
"id": CONFIG_ID_1,
328342
"name": "test_config",
329343
"config_yaml": "labels: []",
344+
"config_type": "database",
330345
"archived": False,
331346
"created": "2025-06-01T10:00:00Z",
332347
"modified": "2025-06-01T10:00:00Z",
@@ -335,6 +350,7 @@ def test_create_or_update_discovery_config_update(client: DataMasqueClient, disc
335350
"id": CONFIG_ID_1,
336351
"name": "test_config",
337352
"config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n",
353+
"config_type": "database",
338354
"archived": False,
339355
"created": "2025-06-01T10:00:00Z",
340356
"modified": "2025-06-02T10:00:00Z",
@@ -403,6 +419,7 @@ def test_delete_discovery_config_by_name_raises_when_server_omits_id(client: Dat
403419
"results": [
404420
{
405421
"name": "my_config",
422+
"config_type": "database",
406423
"archived": False,
407424
"created": "2025-01-01T12:00:00Z",
408425
"modified": "2025-01-02T12:00:00Z",
@@ -441,6 +458,7 @@ def test_discovery_config_parses_validation_fields() -> None:
441458
{
442459
"id": CONFIG_ID_1,
443460
"name": "my_config",
461+
"config_type": "database",
444462
"config_yaml": "labels: []",
445463
"is_valid": "invalid",
446464
"validation_error": "bad shape on line 3",
@@ -459,6 +477,7 @@ def test_discovery_config_validation_fields_optional() -> None:
459477
{
460478
"id": CONFIG_ID_1,
461479
"name": "my_config",
480+
"config_type": "database",
462481
"created": "2025-01-01T12:00:00Z",
463482
"modified": "2025-01-02T12:00:00Z",
464483
}
@@ -474,11 +493,11 @@ def test_unwrap_discovery_config_id_passes_through_strings() -> None:
474493

475494

476495
def test_unwrap_discovery_config_id_extracts_id_from_model() -> None:
477-
config = DiscoveryConfig(name="x", id=DiscoveryConfigId(CONFIG_ID_1))
496+
config = DiscoveryConfig(name="x", config_type="database", id=DiscoveryConfigId(CONFIG_ID_1))
478497
assert unwrap_discovery_config_id(config) == CONFIG_ID_1
479498

480499

481500
def test_unwrap_discovery_config_id_raises_without_id() -> None:
482-
config = DiscoveryConfig(name="x")
501+
config = DiscoveryConfig(name="x", config_type="database")
483502
with pytest.raises(ValueError, match="id is None"):
484503
unwrap_discovery_config_id(config)

0 commit comments

Comments
 (0)