NVIDIA-NeMo · maxdubrinsky · Jun 10, 2026
@@ -306,8 +306,10 @@ def test_entity_search_filter(sdk: NeMoPlatform, workspace: str):
         assert len(response.data) == 1
         assert response.data[0].name == entity_alpha
 
-        # Filter by name pattern (like)
-        filter_query = json.dumps({"name": {"$like": f"{prefix}%"}})
+        # Filter by name substring. $like is a case-insensitive substring match,
+        # not a SQL wildcard pattern (% and _ are literal), so the shared prefix —
+        # a substring of both entity names — matches alpha and beta.
+        filter_query = json.dumps({"name": {"$like": prefix}})
         response = sdk.entities.list(
             entity_type=ENTITY_TYPE,
             workspace=workspace,

@@ -11,6 +11,17 @@
 from sqlalchemy.orm import aliased
 
 
+def _escape_like(value: str) -> str:
+    """Escape SQL LIKE metacharacters so ``%`` and ``_`` match literally.
+
+    ``$like`` is a case-insensitive substring (contains) test in which ``%`` and
+    ``_`` are ordinary characters — the canonical contract documented and pinned
+    by ``InMemoryFilterRepository.like``. The backslash escape character is
+    escaped first so the escapes we add are not themselves re-escaped.
+    """
+    return str(value).replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+
+
 class SQLAlchemyFilterRepository(FilterRepository):
     """SQLAlchemy implementation of FilterRepository.
 
@@ -142,11 +153,18 @@ def eq(self, field: str, value: Any) -> Any:
         return column == value
 
     def like(self, field: str, value: str) -> Any:
-        """Like/contains comparison."""
+        """Case-insensitive substring (contains) comparison.
+
+        ``%`` and ``_`` in ``value`` are matched literally, not as SQL wildcards,
+        to agree with ``InMemoryFilterRepository.like``. Metacharacters are escaped
+        and an explicit ``ESCAPE`` clause is used, which behaves the same on SQLite
+        and PostgreSQL.
+        """
         column, is_json = self._get_column(field)
+        pattern = f"%{_escape_like(value)}%"
         if is_json:
-            return self._cast_json_to_text(column).ilike(f"%{value}%")
-        return column.ilike(f"%{value}%")
+            return self._cast_json_to_text(column).ilike(pattern, escape="\\")
+        return column.ilike(pattern, escape="\\")
 
     def lt(self, field: str, value: Any) -> Any:
         """Less than comparison."""

@@ -42,12 +42,26 @@ class FakeEntity(Base):
 # absent keys (a documented native divergence pinned in the unit tests). A
 # plain-column NULL (name on row 5) and an explicit/absent ``k`` for $eq-None
 # coverage are the only nullable bits, and $eq agrees with SQL on both.
+#
+# Rows 6-9 carry SQL LIKE metacharacters (``_``/``%``) in ``name``/``data.tier``,
+# each paired with a near-identical row that a wildcard interpretation would
+# wrongly match. They pin the AIRCORE-749 contract that ``$like`` is a literal
+# substring (``_``/``%`` are ordinary characters), agreeing with the in-memory
+# backend. All keep score/tier/flag present so no absent-key divergence is
+# introduced into the existing cases.
 SEED = [
     dict(id=1, name="llama", data={"score": 5, "tier": "free", "flag": True, "k": None}),
     dict(id=2, name="Llama-2", data={"score": 9, "tier": "pro", "flag": False}),
     dict(id=3, name="zephyr", data={"score": 10, "tier": "pro", "flag": True, "k": "v"}),
     dict(id=4, name="mistral", data={"score": 100, "tier": "enterprise", "flag": False}),
     dict(id=5, name=None, data={"score": 1, "tier": "free", "flag": False}),
+    # `_` is a single-char wildcard under LIKE; "prod_db" must not match "prodXdb".
+    dict(id=6, name="prod_db", data={"score": 7, "tier": "free", "flag": True}),
+    dict(id=7, name="prodXdb", data={"score": 8, "tier": "pro", "flag": False}),
+    # `%` is a multi-char wildcard under LIKE; "50%off" must not match "50pctoff".
+    # data.tier "a_c" must not match "axc" (exercises the JSON cast-to-text path).
+    dict(id=8, name="50%off", data={"score": 11, "tier": "a_c", "flag": True}),
+    dict(id=9, name="50pctoff", data={"score": 12, "tier": "axc", "flag": False}),
 ]
 
 
@@ -90,6 +104,10 @@ def NOT(op):
     ("like_name_lower", C(FilterOperator.LIKE, "name", "LAMA")),
     ("like_data_tier", C(FilterOperator.LIKE, "data.tier", "pr")),
     ("like_data_miss", C(FilterOperator.LIKE, "data.tier", "zzz")),
+    # AIRCORE-749: `_`/`%` are literal substrings, not SQL wildcards.
+    ("like_name_underscore_literal", C(FilterOperator.LIKE, "name", "prod_db")),
+    ("like_name_percent_literal", C(FilterOperator.LIKE, "name", "50%off")),
+    ("like_data_tier_underscore_literal", C(FilterOperator.LIKE, "data.tier", "a_c")),
     ("in_name", C(FilterOperator.IN, "name", ["llama", "mistral"])),
     ("in_data_tier", C(FilterOperator.IN, "data.tier", ["pro", "free"])),
     ("in_data_score", C(FilterOperator.IN, "data.score", [5, 10])),