Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions e2e/test_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,10 @@ def test_entity_search_filter(sdk: NeMoPlatform, workspace: str):
assert len(response.data) == 1
assert response.data[0].name == entity_alpha

# Filter by name pattern (like)
filter_query = json.dumps({"name": {"$like": f"{prefix}%"}})
# Filter by name substring. $like is a case-insensitive substring match,
# not a SQL wildcard pattern (% and _ are literal), so the shared prefix —
# a substring of both entity names — matches alpha and beta.
filter_query = json.dumps({"name": {"$like": prefix}})
response = sdk.entities.list(
entity_type=ENTITY_TYPE,
workspace=workspace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@
from sqlalchemy.orm import aliased


def _escape_like(value: str) -> str:
"""Escape SQL LIKE metacharacters so ``%`` and ``_`` match literally.

``$like`` is a case-insensitive substring (contains) test in which ``%`` and
``_`` are ordinary characters — the canonical contract documented and pinned
by ``InMemoryFilterRepository.like``. The backslash escape character is
escaped first so the escapes we add are not themselves re-escaped.
"""
return str(value).replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")


class SQLAlchemyFilterRepository(FilterRepository):
"""SQLAlchemy implementation of FilterRepository.

Expand Down Expand Up @@ -142,11 +153,18 @@ def eq(self, field: str, value: Any) -> Any:
return column == value

def like(self, field: str, value: str) -> Any:
"""Like/contains comparison."""
"""Case-insensitive substring (contains) comparison.

``%`` and ``_`` in ``value`` are matched literally, not as SQL wildcards,
to agree with ``InMemoryFilterRepository.like``. Metacharacters are escaped
and an explicit ``ESCAPE`` clause is used, which behaves the same on SQLite
and PostgreSQL.
"""
column, is_json = self._get_column(field)
pattern = f"%{_escape_like(value)}%"
if is_json:
return self._cast_json_to_text(column).ilike(f"%{value}%")
return column.ilike(f"%{value}%")
return self._cast_json_to_text(column).ilike(pattern, escape="\\")
return column.ilike(pattern, escape="\\")

def lt(self, field: str, value: Any) -> Any:
"""Less than comparison."""
Expand Down
18 changes: 18 additions & 0 deletions services/core/entities/tests/test_filter_matches_sql_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,26 @@ class FakeEntity(Base):
# absent keys (a documented native divergence pinned in the unit tests). A
# plain-column NULL (name on row 5) and an explicit/absent ``k`` for $eq-None
# coverage are the only nullable bits, and $eq agrees with SQL on both.
#
# Rows 6-9 carry SQL LIKE metacharacters (``_``/``%``) in ``name``/``data.tier``,
# each paired with a near-identical row that a wildcard interpretation would
# wrongly match. They pin the AIRCORE-749 contract that ``$like`` is a literal
# substring (``_``/``%`` are ordinary characters), agreeing with the in-memory
# backend. All keep score/tier/flag present so no absent-key divergence is
# introduced into the existing cases.
SEED = [
dict(id=1, name="llama", data={"score": 5, "tier": "free", "flag": True, "k": None}),
dict(id=2, name="Llama-2", data={"score": 9, "tier": "pro", "flag": False}),
dict(id=3, name="zephyr", data={"score": 10, "tier": "pro", "flag": True, "k": "v"}),
dict(id=4, name="mistral", data={"score": 100, "tier": "enterprise", "flag": False}),
dict(id=5, name=None, data={"score": 1, "tier": "free", "flag": False}),
# `_` is a single-char wildcard under LIKE; "prod_db" must not match "prodXdb".
dict(id=6, name="prod_db", data={"score": 7, "tier": "free", "flag": True}),
dict(id=7, name="prodXdb", data={"score": 8, "tier": "pro", "flag": False}),
# `%` is a multi-char wildcard under LIKE; "50%off" must not match "50pctoff".
# data.tier "a_c" must not match "axc" (exercises the JSON cast-to-text path).
dict(id=8, name="50%off", data={"score": 11, "tier": "a_c", "flag": True}),
dict(id=9, name="50pctoff", data={"score": 12, "tier": "axc", "flag": False}),
]


Expand Down Expand Up @@ -90,6 +104,10 @@ def NOT(op):
("like_name_lower", C(FilterOperator.LIKE, "name", "LAMA")),
("like_data_tier", C(FilterOperator.LIKE, "data.tier", "pr")),
("like_data_miss", C(FilterOperator.LIKE, "data.tier", "zzz")),
# AIRCORE-749: `_`/`%` are literal substrings, not SQL wildcards.
("like_name_underscore_literal", C(FilterOperator.LIKE, "name", "prod_db")),
("like_name_percent_literal", C(FilterOperator.LIKE, "name", "50%off")),
("like_data_tier_underscore_literal", C(FilterOperator.LIKE, "data.tier", "a_c")),
("in_name", C(FilterOperator.IN, "name", ["llama", "mistral"])),
("in_data_tier", C(FilterOperator.IN, "data.tier", ["pro", "free"])),
("in_data_score", C(FilterOperator.IN, "data.score", [5, 10])),
Expand Down
Loading