From 0e074c6f83b3efef64a7eb36efdb48209dd28a4c Mon Sep 17 00:00:00 2001
From: oss-amikos
Date: Mon, 23 Mar 2026 14:25:28 +0200
Subject: [PATCH 01/26] docs(05): replan 05-02 for CLOUD-01 search parity tests
---
.../05-02-PLAN.md | 479 +++++++++++-------
1 file changed, 309 insertions(+), 170 deletions(-)
diff --git a/.planning/phases/05-cloud-integration-testing/05-02-PLAN.md b/.planning/phases/05-cloud-integration-testing/05-02-PLAN.md
index 027d2a8..bf07cfe 100644
--- a/.planning/phases/05-cloud-integration-testing/05-02-PLAN.md
+++ b/.planning/phases/05-cloud-integration-testing/05-02-PLAN.md
@@ -4,7 +4,6 @@ plan: 02
type: execute
wave: 2
depends_on: ["05-01"]
-blocked_by_phase: 3
files_modified:
- src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
autonomous: true
@@ -12,38 +11,38 @@ requirements: [CLOUD-01]
must_haves:
truths:
- - "Cloud KNN search returns ranked results with expected ordering"
- - "Cloud RRF hybrid search combines multiple rank expressions end-to-end"
- - "Cloud GroupBy search aggregates results by metadata key with MinK/MaxK"
- - "Cloud batch search executes multiple independent searches in one call"
- - "Cloud search pagination with limit and offset returns correct pages"
- - "Cloud search filter matrix covers Where, IDIn, IDNotIn, DocumentContains, and combinations"
- - "Cloud search projection returns selected fields and excludes unselected fields"
- - "Cloud search read levels INDEX_AND_WAL and INDEX_ONLY return appropriate result sets"
- - "Knn.limit (candidate pool) vs Search.limit (final result count) distinction validated"
+ - "Cloud KNN search with embedding returns ranked results from the shared seed collection"
+ - "Cloud batch search executes two independent KNN searches and returns two result groups"
+ - "Cloud GroupBy search partitions results by metadata key and returns results via rows()"
+ - "Cloud search with ReadLevel.INDEX_AND_WAL returns results including unindexed WAL records"
+ - "Cloud search with ReadLevel.INDEX_ONLY succeeds (may return fewer results than INDEX_AND_WAL)"
+ - "Knn.limit (candidate pool) vs Search.limit (final result count) distinction is validated"
+ - "Cloud search filter matrix covers Where metadata, IDIn, IDNotIn, DocumentContains, DocumentNotContains, and combined filters"
+ - "Cloud search pagination with limit returns correct count, and limit+offset returns a different page"
+ - "Cloud search projection returns selected fields and excluded fields are null"
+ - "Custom metadata key projection returns the specified key values"
- "All search tests skip cleanly when CHROMA_API_KEY is absent"
artifacts:
- path: "src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java"
- provides: "CLOUD-01 search parity test methods added to existing test class"
+ provides: "CLOUD-01 search parity test methods added to existing class"
contains: "testCloudKnnSearch"
key_links:
- from: "src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java"
- to: "Phase 3 Search API types"
- via: "import of Search, Knn, Rrf, GroupBy, ReadLevel, SearchResult"
- pattern: "collection\\.search\\(\\)"
+ to: "Search API types"
+ via: "import of Search, Knn, Rrf, GroupBy, ReadLevel, Select, SearchResult, SearchResultRow"
+ pattern: "import tech\\.amikos\\.chromadb\\.v2\\.Search;"
- from: "src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java"
to: "shared seed collection"
- via: "seedCollection field from @BeforeClass"
- pattern: "seedCollection"
+ via: "static seedCollection field populated in @BeforeClass"
+ pattern: "seedCollection\\.search\\(\\)"
---
-Add CLOUD-01 search parity test methods to `SearchApiCloudIntegrationTest` covering KNN, RRF, GroupBy, batch search, pagination, filter combinations, field projection, and read levels.
+Add CLOUD-01 search parity test methods to the existing `SearchApiCloudIntegrationTest` class, validating the Phase 3 Search API end-to-end against Chroma Cloud.
-Purpose: Validate the Phase 3 Search API end-to-end against Chroma Cloud, going beyond the chroma-go baseline by testing RRF and GroupBy in cloud integration (not just unit tests).
-Output: 8-10 additional test methods in the existing test class.
+Purpose: Validate KNN search, batch search, GroupBy, read levels, pagination, filter combinations, field projection, and Knn.limit vs Search.limit distinction using the shared seed collection (15 products, 4D embeddings, 6 categories).
-**BLOCKED: This plan depends on Phase 3 (Search API) being implemented first.** The Search API types (`SearchResult`, `Knn`, `Rrf`, `GroupBy`, `ReadLevel`, search builder) do not exist yet -- Phase 3 has 0 plans executed. This plan MUST NOT be executed until Phase 3 ships. If Phase 3 type signatures differ from what is assumed below, adapt the test code to match the actual Phase 3 API.
+Output: 11 new test methods in `SearchApiCloudIntegrationTest.java` covering all CLOUD-01 scenarios.
@@ -59,30 +58,88 @@ Output: 8-10 additional test methods in the existing test class.
@.planning/phases/05-cloud-integration-testing/05-RESEARCH.md
@.planning/phases/05-cloud-integration-testing/05-01-SUMMARY.md
-@src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
-@src/main/java/tech/amikos/chromadb/v2/Collection.java
-@src/main/java/tech/amikos/chromadb/v2/Where.java
-@src/main/java/tech/amikos/chromadb/v2/WhereDocument.java
-
-
-
-Expected Phase 3 types (adapt to actual implementation):
-- Collection.search() - returns a SearchBuilder
-- SearchBuilder with methods for: searches(Search...), limit(int), offset(int), include(Include...), readLevel(ReadLevel)
-- Search with: knn(Knn), rrf(Rrf), where(Where), whereDocument(WhereDocument), select(String...), groupBy(GroupBy), limit(int)
-- Knn with: queryText(String), queryEmbedding(float[]), limit(int)
-- Rrf with: ranks(Knn...), k(int)
-- GroupBy with: key(String), minK(int), maxK(int)
-- ReadLevel enum: INDEX_AND_WAL, INDEX_ONLY
-- SearchResult type for results
-
-From src/main/java/tech/amikos/chromadb/v2/Where.java:
+
+
+From src/main/java/tech/amikos/chromadb/v2/Collection.java (SearchBuilder):
+```java
+interface SearchBuilder {
+ SearchBuilder queryText(String text);
+ SearchBuilder queryEmbedding(float[] embedding);
+ SearchBuilder searches(Search... searches);
+ SearchBuilder where(Where globalFilter);
+ SearchBuilder limit(int limit);
+ SearchBuilder offset(int offset);
+ SearchBuilder readLevel(ReadLevel readLevel);
+ SearchResult execute();
+}
+```
+
+From src/main/java/tech/amikos/chromadb/v2/Search.java:
+```java
+public static Builder builder();
+// Builder methods: knn(Knn), rrf(Rrf), where(Where), select(Select...), selectAll(),
+// groupBy(GroupBy), limit(int), offset(int), build()
+```
+
+From src/main/java/tech/amikos/chromadb/v2/Knn.java:
+```java
+public static Knn queryText(String text); // NOT supported by server yet
+public static Knn queryEmbedding(float[] embedding);
+public Knn limit(int limit); // limit must be > 0
+public Knn returnRank(boolean returnRank);
+```
+
+From src/main/java/tech/amikos/chromadb/v2/Rrf.java:
+```java
+// NOT supported by server yet (returns "unknown variant '$rrf'")
+public static Builder builder();
+// Builder: rank(Knn, double), k(int), normalize(boolean), build()
+```
+
+From src/main/java/tech/amikos/chromadb/v2/GroupBy.java:
+```java
+public static Builder builder();
+// Builder: key(String), minK(int), maxK(int), build()
+```
+
+From src/main/java/tech/amikos/chromadb/v2/ReadLevel.java:
+```java
+INDEX_AND_WAL("index_and_wal"), // includes WAL, most up-to-date
+INDEX_ONLY("index_only"); // faster, potentially stale
+```
+
+From src/main/java/tech/amikos/chromadb/v2/Select.java:
+```java
+public static final Select DOCUMENT, SCORE, EMBEDDING, METADATA, ID;
+public static Select key(String fieldName); // custom metadata key
+public static Select[] all(); // ID, DOCUMENT, EMBEDDING, METADATA, SCORE
+```
+
+From src/main/java/tech/amikos/chromadb/v2/SearchResult.java:
+```java
+List> getIds();
+List> getDocuments();
+List>> getMetadatas();
+List> getEmbeddings();
+List> getScores();
+ResultGroup rows(int searchIndex);
+int searchCount();
+```
+
+From src/main/java/tech/amikos/chromadb/v2/SearchResultRow.java:
+```java
+public interface SearchResultRow extends ResultRow {
+ Double getScore();
+ // Inherited from ResultRow: getId(), getDocument(), getMetadata(), getEmbedding()
+}
+```
+
+From src/main/java/tech/amikos/chromadb/v2/Where.java (filter DSL):
```java
public static Where eq(String key, String value);
public static Where gt(String key, float value);
+public static Where lt(String key, float value);
public static Where idIn(String... ids);
public static Where idNotIn(String... ids);
public static Where documentContains(String text);
@@ -90,162 +147,244 @@ public static Where documentNotContains(String text);
public static Where and(Where... conditions);
```
-Existing test infrastructure (from Plan 01):
-- sharedClient, seedCollection (static, @BeforeClass)
-- waitForIndexing(Collection, long, long) helper
-- createIsolatedCollection(String prefix) helper
-- Seed data: 15 product records with category, price, in_stock, tags, ratings metadata
-- Product IDs: "prod-001" through "prod-015"
-- Categories: "electronics", "grocery", "clothing"
+Seed collection details (from 05-01-SUMMARY / existing @BeforeClass):
+- 15 records: prod-001 through prod-015
+- 4D embeddings (clustered: electronics dim0, grocery dim1, sports/clothing dim2, travel/office dim3)
+- Categories: electronics (6: 001,005,008,009,011,015), grocery (3: 002,007,010),
+ clothing (1: 003), sports (3: 004,006,013), travel (1: 012), office (1: 014)
+- Metadata fields: category (String), price (float), in_stock (boolean),
+ tags (List), ratings (List)
+- Documents: descriptive product titles
+
+Query embedding constants to define in test:
+- QUERY_ELECTRONICS = {0.85f, 0.15f, 0.05f, 0.05f} // should match electronics cluster
+- QUERY_GROCERY = {0.05f, 0.85f, 0.15f, 0.05f} // should match grocery cluster
+- QUERY_SPORTS = {0.05f, 0.05f, 0.85f, 0.15f} // should match sports/clothing cluster
- Task 1: Add CLOUD-01 search parity test methods to SearchApiCloudIntegrationTest
+ Task 1: Add core search tests (KNN, batch, GroupBy, ReadLevel, KnnLimit)src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
- - src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
- - src/main/java/tech/amikos/chromadb/v2/Collection.java
- - src/main/java/tech/amikos/chromadb/v2/Where.java
- - src/main/java/tech/amikos/chromadb/v2/WhereDocument.java
- - .planning/phases/05-cloud-integration-testing/05-CONTEXT.md
- - .planning/phases/05-cloud-integration-testing/05-01-SUMMARY.md
+ - src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java (current file to modify)
+ - src/test/java/tech/amikos/chromadb/v2/SearchApiIntegrationTest.java (pattern reference for Search API test code)
+ - src/main/java/tech/amikos/chromadb/v2/SearchResult.java (result interface)
+ - src/main/java/tech/amikos/chromadb/v2/SearchResultRow.java (row interface with getScore())
+ - src/main/java/tech/amikos/chromadb/v2/Knn.java (KNN factory and limit validation)
+ - src/main/java/tech/amikos/chromadb/v2/GroupBy.java (GroupBy builder)
+ - src/main/java/tech/amikos/chromadb/v2/ReadLevel.java (INDEX_AND_WAL, INDEX_ONLY)
+ - src/main/java/tech/amikos/chromadb/v2/Select.java (field projection)
-**MANDATORY PRE-EXECUTION GATE:** Before implementing ANY code, verify Phase 3 Search API types exist:
-```bash
-grep -r "class Search\|interface Search\|SearchResult\|SearchBuilder\|ReadLevel\|class Knn\|class Rrf\|class GroupBy" src/main/java/tech/amikos/chromadb/v2/
+Add the following to `SearchApiCloudIntegrationTest.java`:
+
+1. **Add query embedding constants** as private static final fields at the top of the class (after the existing `cloudAvailable` field):
+```java
+private static final float[] QUERY_ELECTRONICS = {0.85f, 0.15f, 0.05f, 0.05f};
+private static final float[] QUERY_GROCERY = {0.05f, 0.85f, 0.15f, 0.05f};
+private static final float[] QUERY_SPORTS = {0.05f, 0.05f, 0.85f, 0.15f};
+```
+
+2. **Add CLOUD-01 section header** after the D-22 section (line ~863):
+```java
+// =============================================================================
+// CLOUD-01: Search parity tests (D-07 through D-12)
+// =============================================================================
```
-If these types do NOT exist, STOP IMMEDIATELY. Do not proceed. Report:
-"BLOCKED: Phase 3 Search API types not found. This plan requires Phase 3 to be implemented first. Run `/gsd:plan-phase 3` and `/gsd:execute-phase 3` before retrying this plan."
-
-If Phase 3 types exist, read their actual signatures and adapt the test code below to match.
-
-Add the following test methods to `SearchApiCloudIntegrationTest.java`. All tests use the shared seed collection (15 product records) established in @BeforeClass from Plan 01. Each test starts with `Assume.assumeTrue("Cloud not available", cloudAvailable);`.
-
-**Test 1: `testCloudKnnSearch()`** (per D-07, D-11):
-- Execute a KNN search on the seed collection with a text query (e.g., "wireless headphones") per D-06 (server-side embedding)
-- Set KNN limit=10 (candidate pool) and search limit=3 (final result count) per D-11
-- Assert: result count is exactly 3 (Search.limit controls final output)
-- Assert: results are ordered by relevance (score[0] >= score[1] >= score[2], or distance[0] <= distance[1] depending on API shape)
-- Assert: each result has a non-null ID from the seed collection
-- Per D-11: This explicitly tests that Knn.limit (candidate pool) and Search.limit (final result count) are distinct -- KNN fetches 10 candidates but only 3 are returned
-
-**Test 2: `testCloudRrfSearch()`** (per D-07):
-- Execute an RRF (Reciprocal Rank Fusion) search combining two KNN rank expressions:
- - Rank 1: KNN query text "wireless audio device"
- - Rank 2: KNN query text "premium quality headphones"
-- Use RRF default k (typically 60) or explicit k=60
-- Set search limit=5
-- Assert: result count <= 5
-- Assert: each result has a valid ID and score
-- Assert: results are ranked (scores are monotonically non-increasing)
-
-**Test 3: `testCloudGroupBySearch()`** (per D-08):
-- Execute a search with GroupBy on `"category"` metadata key
-- Set minK=1, maxK=3
-- Set search limit=10
-- Assert: results are grouped by category
-- Assert: each group has at least minK results and at most maxK results (where enough records exist for that category)
-- Assert: group keys include at least some of "electronics", "grocery", "clothing"
-
-**Test 4: `testCloudBatchSearch()`** (per D-10):
-- Execute batch search with 2-3 independent Search objects:
- - Search A: KNN "headphones" with limit=2
- - Search B: KNN "organic tea" with limit=2
-- Assert: batch response contains results for both searches
-- Assert: each search result has the correct number of results (up to limit)
-- Assert: results from Search A and Search B differ (different query, different top results)
-
-**Test 5: `testCloudSearchPagination()`** (per D-14):
-- Page 1: search with limit=3, offset=0. Assert: exactly 3 results
-- Page 2: search with limit=3, offset=3. Assert: results differ from page 1 (no ID overlap)
-- Client validation: attempt search with limit=0, assert exception. Attempt search with negative offset, assert exception.
- Note: Check actual Phase 3 API -- if limit=0 or negative offset are server-rejected rather than client-validated, adjust to expect server exception.
-
-**Test 6: `testCloudSearchFilterMatrix()`** (per D-13):
-- Sub-test A: Where metadata filter alone -- `Where.eq("category", "electronics")`. Assert: all results have category=electronics.
-- Sub-test B: IDIn alone -- `Where.idIn("prod-001", "prod-005", "prod-010")`. Assert: results are subset of those 3 IDs.
-- Sub-test C: IDNotIn alone -- `Where.idNotIn("prod-001", "prod-002")`. Assert: neither prod-001 nor prod-002 in results.
-- Sub-test D: DocumentContains alone -- `Where.documentContains("wireless")`. Assert: all result documents contain "wireless".
-- Sub-test E: IDNotIn + metadata combined -- `Where.and(Where.idNotIn("prod-001"), Where.eq("category", "electronics"))`. Assert: results exclude prod-001 AND have category=electronics.
-- Sub-test F: Where + DocumentContains combined -- `Where.and(Where.gt("price", 20.0f), Where.documentContains("premium"))`. Assert: all results have price > 20 and document contains "premium".
-- Sub-test G: Triple combination -- `Where.and(Where.idIn("prod-001", "prod-002", "prod-003", "prod-004", "prod-005"), Where.eq("category", "electronics"), Where.documentContains("wireless"))`. Assert: results satisfy all three constraints.
-
-Note: Filter availability may depend on how Phase 3 Search exposes where/whereDocument. If `search()` uses a different filter mechanism than `query()`, adapt the filter calls. The Where DSL methods exist: `idIn`, `idNotIn`, `documentContains`, `documentNotContains`, `eq`, `gt`, `and`.
-
-**Test 7: `testCloudSearchProjection()`** (per D-15, D-16):
-- Execute search selecting only `#id` and `#score` (or equivalent Phase 3 select syntax). Assert: result has id and score, but document is null and metadata is null.
-- Execute search selecting `#id`, `#document`, and specific metadata key `category`. Assert: result has id, document, and category key in metadata, but other metadata keys (like price) are absent.
-- Per D-16: test custom metadata key projection -- not just the `#metadata` blob.
-
-Note: Projection syntax depends on Phase 3 implementation. Go client uses `KID`, `KDocument`, `KEmbedding`, `KMetadata`, `KScore` constants. Java may use `Include` enum or string-based select. Read Phase 3 types before implementing.
-
-**Test 8: `testCloudSearchReadLevel()`** (per D-12):
-- Create an isolated collection (not shared seed -- per D-05 since this may need fresh data)
-- Add 5-10 records with explicit embeddings
-- **INDEX_AND_WAL test:** Execute search with ReadLevel.INDEX_AND_WAL immediately (NO polling wait per D-12). Assert: result count equals total records inserted (WAL guarantees all records visible).
-- **INDEX_ONLY test:** Execute search with ReadLevel.INDEX_ONLY. Assert: result count <= total records inserted (per D-12: index may not be compacted yet, so count may be lower). Use `assertTrue(count <= totalRecords)` not `assertEquals`.
-- Per D-12: The INDEX_AND_WAL test deliberately skips the polling wait to verify WAL consistency.
-
-**General implementation notes:**
-- All tests use `Assume.assumeTrue("Cloud not available", cloudAvailable)` at the start
-- Tests that use the shared seed collection reference `seedCollection` static field
-- Tests that create isolated collections use `createIsolatedCollection(prefix)` helper
-- Import Phase 3 types as needed (Search, Knn, Rrf, GroupBy, ReadLevel, SearchResult)
-- Assertion on result ordering should be flexible: use `>=` for scores (not strict `>`) since tied scores are valid
-- When asserting document content, use `assertTrue(doc.contains("keyword"))` not exact string match
-- Java 8 compatible syntax throughout
+
+3. **testCloudKnnSearch** (D-07 KNN end-to-end):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Execute: `seedCollection.search().queryEmbedding(QUERY_ELECTRONICS).limit(5).execute()`
+- Assert: result not null, `result.getIds()` not null, `result.getIds().get(0)` not empty, size <= 5
+- Assert via row access: `result.rows(0)` not empty, each `SearchResultRow.getId()` not null
+
+4. **testCloudRrfSearch** (D-07 RRF end-to-end):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Gate: `Assume.assumeTrue("Skipping: $rrf variant is not yet supported by Chroma server", false);` (RRF is unsupported per SearchApiIntegrationTest findings)
+- Build: `Rrf.builder().rank(Knn.queryEmbedding(QUERY_ELECTRONICS), 0.7).rank(Knn.queryEmbedding(QUERY_GROCERY), 0.3).k(60).build()`
+- Execute via `Search.builder().rrf(rrf).selectAll().limit(5).build()` -> `seedCollection.search().searches(s).execute()`
+- Assert: result not null, ids not empty
+
+5. **testCloudGroupBySearch** (D-08 GroupBy with MinK/MaxK):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Build: `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).groupBy(GroupBy.builder().key("category").maxK(2).build()).selectAll().limit(10).build()`
+- Execute via `seedCollection.search().searches(s).execute()`
+- Assert: result not null, `result.getIds()` not null
+- Assert via rows: `result.rows(0)` not null (GroupBy flattens into the standard column-major response; DO NOT call `groups()` or `isGrouped()` -- those methods do not exist)
+- Assert: result has at least 1 row
+
+6. **testCloudBatchSearch** (D-10 batch search):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Build two Search objects: `s1 = Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).limit(3).build()`, `s2 = Search.builder().knn(Knn.queryEmbedding(QUERY_GROCERY)).limit(3).build()`
+- Execute: `seedCollection.search().searches(s1, s2).execute()`
+- Assert: `result.searchCount() == 2`
+- Assert: `result.rows(0)` not empty, `result.rows(1)` not empty
+
+7. **testCloudSearchReadLevelIndexAndWal** (D-12 INDEX_AND_WAL):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Create isolated collection `"cloud_rl_wal_"`, add 3 records with explicit 3D embeddings:
+ - ids: "rl-1", "rl-2", "rl-3"
+ - embeddings: `{1.0f, 0.0f, 0.0f}`, `{0.0f, 1.0f, 0.0f}`, `{0.0f, 0.0f, 1.0f}`
+ - documents: "ReadLevel test document one", "ReadLevel test document two", "ReadLevel test document three"
+- NO waitForIndexing -- deliberately skip polling per D-12
+- Search immediately with `ReadLevel.INDEX_AND_WAL`, query embedding `{0.9f, 0.1f, 0.1f}`, limit 3
+- Assert: result not null, result has at least 1 row (WAL guarantees recently written records are visible)
+
+8. **testCloudSearchReadLevelIndexOnly** (D-12 INDEX_ONLY):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Use the shared seedCollection (already indexed from @BeforeClass)
+- Search with `ReadLevel.INDEX_ONLY`, query `QUERY_ELECTRONICS`, limit 5
+- Assert: result not null, `result.getIds()` not null (may return fewer than total if index not fully compacted per D-12 -- use `<= 15` not exact count)
+- Assert: no exception thrown (the key assertion for INDEX_ONLY is that it succeeds)
+
+9. **testCloudKnnLimitVsSearchLimit** (D-11 explicit Knn.limit vs Search.limit distinction):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Build: `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS).limit(10)).selectAll().limit(3).build()`
+- Execute via `seedCollection.search().searches(s).execute()`
+- Assert: `result.rows(0).size() <= 3` (Search.limit=3 caps final result count even though Knn.limit=10 retrieves 10 candidates)
+- Comment in code explaining the distinction: "Knn.limit(10) retrieves 10 nearest neighbor candidates; Search.limit(3) caps the final result count returned to the caller"
+
+
+ cd /Users/tazarov/experiments/amikos/chromadb-java-client && mvn compile -pl . -q 2>&1 | tail -5
+
+
+ - grep -c "testCloudKnnSearch\|testCloudRrfSearch\|testCloudGroupBySearch\|testCloudBatchSearch\|testCloudSearchReadLevelIndexAndWal\|testCloudSearchReadLevelIndexOnly\|testCloudKnnLimitVsSearchLimit" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 7
+ - grep -c "QUERY_ELECTRONICS\|QUERY_GROCERY\|QUERY_SPORTS" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 10
+ - grep -c "Assume.assumeTrue" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 18 (existing ~12 + new ~7)
+ - grep "ReadLevel.INDEX_AND_WAL" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 1 match
+ - grep "ReadLevel.INDEX_ONLY" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 1 match
+ - grep "Knn.queryEmbedding(QUERY_ELECTRONICS).limit(10)" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 1 match (D-11 test)
+ - grep "searchCount() == 2\|searchCount(), 2" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 1 match (batch test)
+ - grep "GroupBy.builder().key" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 1 match
+ - grep -c "groups()\|isGrouped()" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 0 (removed APIs must NOT appear)
+ - grep -c "waitForIndexing" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 0 (removed helper must NOT appear)
+ - File compiles: mvn compile -pl . -q succeeds with exit code 0
+
+
+ 7 new test methods added: testCloudKnnSearch, testCloudRrfSearch, testCloudGroupBySearch, testCloudBatchSearch, testCloudSearchReadLevelIndexAndWal, testCloudSearchReadLevelIndexOnly, testCloudKnnLimitVsSearchLimit. All use Assume.assumeTrue for cloud gating. RRF test is auto-skipped (server unsupported). ReadLevel WAL test uses isolated collection with explicit 3D embeddings and no polling. KnnLimit test verifies candidate pool vs final result count distinction. No calls to groups(), isGrouped(), or waitForIndexing().
+
+
+
+
+ Task 2: Add filter matrix, pagination, and projection tests
+ src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
+
+ - src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java (current file after Task 1)
+ - src/main/java/tech/amikos/chromadb/v2/Where.java (filter DSL: idIn, idNotIn, documentContains, documentNotContains, eq, gt, and)
+ - src/main/java/tech/amikos/chromadb/v2/Select.java (ID, DOCUMENT, SCORE, METADATA, EMBEDDING, key())
+ - src/main/java/tech/amikos/chromadb/v2/Search.java (Search.builder with where, select, limit, offset)
+ - src/main/java/tech/amikos/chromadb/v2/SearchResult.java (getDocuments, getMetadatas, getEmbeddings)
+ - src/main/java/tech/amikos/chromadb/v2/SearchResultRow.java (getDocument, getMetadata, getScore, getEmbedding)
+
+
+Add the following test methods to `SearchApiCloudIntegrationTest.java` after the Task 1 tests:
+
+1. **testCloudSearchFilterMatrix** (D-13 filter combinations):
+All sub-tests use the shared seedCollection with `QUERY_ELECTRONICS` embedding and `selectAll()`.
+
+Sub-test A -- Where metadata filter alone:
+- `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).where(Where.eq("category", "electronics")).selectAll().limit(10).build()`
+- Assert: all returned rows have `getMetadata().get("category")` equal to `"electronics"`
+
+Sub-test B -- IDIn alone:
+- `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).where(Where.idIn("prod-001", "prod-005", "prod-008")).selectAll().limit(10).build()`
+- Assert: all returned row IDs are in the set `{"prod-001", "prod-005", "prod-008"}`
+- Assert: result size <= 3
+
+Sub-test C -- IDNotIn alone:
+- `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).where(Where.idNotIn("prod-001", "prod-002")).selectAll().limit(10).build()`
+- Assert: no returned row ID equals "prod-001" or "prod-002"
+
+Sub-test D -- DocumentContains alone:
+- `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).where(Where.documentContains("headphones")).selectAll().limit(10).build()`
+- Assert: each returned row's `getDocument()` contains "headphones" (case-insensitive check via `toLowerCase().contains("headphones")`)
+
+Sub-test E -- IDNotIn + metadata filter combined:
+- `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).where(Where.and(Where.idNotIn("prod-001"), Where.eq("category", "electronics"))).selectAll().limit(10).build()`
+- Assert: no returned row has ID "prod-001"
+- Assert: all returned rows have category "electronics"
+
+Sub-test F -- Where + DocumentContains combined:
+- `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).where(Where.and(Where.eq("category", "electronics"), Where.documentContains("wireless"))).selectAll().limit(10).build()`
+- Assert: all returned rows have category "electronics" AND document contains "wireless"
+
+Execute each sub-test as: `seedCollection.search().searches(s).execute()`
+Each sub-test is a block within the single test method, with descriptive comment headers. If any sub-test gets zero results, assert that the result is at least not-null and has no exception (some filters may legitimately match zero records, but the search call must succeed).
+
+2. **testCloudSearchPagination** (D-14 limit and limit+offset):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+
+Sub-test A -- Basic limit:
+- `seedCollection.search().queryEmbedding(QUERY_ELECTRONICS).limit(3).execute()`
+- Assert: `result.rows(0).size() <= 3`
+
+Sub-test B -- Limit+offset (page 2):
+- Page 1: `seedCollection.search().queryEmbedding(QUERY_ELECTRONICS).limit(3).offset(0).execute()`
+- Page 2: `seedCollection.search().queryEmbedding(QUERY_ELECTRONICS).limit(3).offset(3).execute()`
+- Assert: page1 rows not empty
+- Assert: page2 result not null (may be empty if fewer than 4 results)
+- If both pages have results: assert page1 first row ID != page2 first row ID (different pages)
+
+3. **testCloudSearchProjectionPresent** (D-15 selected fields present):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Build: `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).select(Select.ID, Select.SCORE, Select.DOCUMENT).limit(3).build()`
+- Execute via `seedCollection.search().searches(s).execute()`
+- Assert via rows: each `SearchResultRow` has non-null `getId()`, non-null `getScore()`, non-null `getDocument()`
+- Assert: `result.getEmbeddings()` is null (embedding was NOT selected)
+
+4. **testCloudSearchProjectionCustomKey** (D-16 custom metadata key projection):
+- Gate: `Assume.assumeTrue("Cloud not available", cloudAvailable);`
+- Build: `Search.builder().knn(Knn.queryEmbedding(QUERY_ELECTRONICS)).select(Select.ID, Select.SCORE, Select.key("category"), Select.key("price")).limit(3).build()`
+- Execute via `seedCollection.search().searches(s).execute()`
+- Assert: result not null, rows not empty
+- Assert via `result.getMetadatas()`: if metadatas present, each metadata map contains key "category" and key "price" (these were projected)
+- Comment: "Custom key projection is a Cloud-oriented feature per D-16"
- cd /Users/tazarov/experiments/amikos/chromadb-java-client && mvn test-compile 2>&1 | tail -5
+ cd /Users/tazarov/experiments/amikos/chromadb-java-client && mvn compile -pl . -q 2>&1 | tail -5
- - MANDATORY: Phase 3 Search API types exist in src/main/java/tech/amikos/chromadb/v2/ (if not, plan is BLOCKED)
- - SearchApiCloudIntegrationTest.java contains `testCloudKnnSearch` method (grep-verifiable)
- - SearchApiCloudIntegrationTest.java contains `testCloudRrfSearch` method
- - SearchApiCloudIntegrationTest.java contains `testCloudGroupBySearch` method
- - SearchApiCloudIntegrationTest.java contains `testCloudBatchSearch` method
- - SearchApiCloudIntegrationTest.java contains `testCloudSearchPagination` method
- - SearchApiCloudIntegrationTest.java contains `testCloudSearchFilterMatrix` method
- - SearchApiCloudIntegrationTest.java contains `testCloudSearchProjection` method
- - SearchApiCloudIntegrationTest.java contains `testCloudSearchReadLevel` method
- - File contains `Where.idIn(` calls (for filter matrix D-13)
- - File contains `Where.idNotIn(` calls (for filter matrix D-13)
- - File contains `Where.documentContains(` calls (for filter matrix D-13)
- - File imports Phase 3 Search API types (Search, Knn, or equivalent)
- - `grep -c "@Test" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java` returns at least 20 (12 from Plan 01 + 8 from Plan 02)
- - `mvn test-compile` exits 0
+ - grep -c "testCloudSearchFilterMatrix\|testCloudSearchPagination\|testCloudSearchProjectionPresent\|testCloudSearchProjectionCustomKey" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 4
+ - grep "Where.idIn" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 1 match
+ - grep "Where.idNotIn" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 2 matches (sub-tests C and E)
+ - grep "Where.documentContains" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 2 matches (sub-tests D and F)
+ - grep "Where.and(" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 2 matches (combined filter sub-tests E and F)
+ - grep "Select.key" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 2 matches (category and price projection)
+ - grep ".offset(" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 2 matches (page 1 offset(0) and page 2 offset(3))
+ - grep -c "groups()\|isGrouped()" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 0
+ - grep -c "waitForIndexing" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns 0
+ - File compiles: mvn compile -pl . -q succeeds with exit code 0
+ - Total CLOUD-01 test method count: grep -c "@Test" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java returns at least 23 (12 existing + 11 new)
- 8 CLOUD-01 search parity test methods added to SearchApiCloudIntegrationTest. Tests cover KNN, RRF, GroupBy, batch, pagination, filter matrix (7 sub-tests), projection (2 sub-tests), and read levels (INDEX_AND_WAL + INDEX_ONLY). Knn.limit vs Search.limit distinction explicitly validated per D-11. All tests compile and skip cleanly without credentials.
+ 4 new test methods added: testCloudSearchFilterMatrix (6 sub-tests covering Where alone, IDIn, IDNotIn, DocumentContains, IDNotIn+metadata combo, Where+DocumentContains combo), testCloudSearchPagination (basic limit + limit+offset page 2), testCloudSearchProjectionPresent (selected fields present, unselected null), testCloudSearchProjectionCustomKey (Select.key for category and price). Total of 11 new CLOUD-01 test methods across both tasks. File compiles and all tests skip cleanly without cloud credentials.
-1. `mvn test-compile` exits 0 -- all code compiles including new search test methods
-2. `grep -c "@Test" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java` returns at least 20
-3. `mvn test -Pintegration -Dtest=SearchApiCloudIntegrationTest` -- runs all cloud tests (if credentials present) or skips cleanly
-4. `grep "testCloudKnnSearch\|testCloudRrfSearch\|testCloudGroupBySearch\|testCloudBatchSearch\|testCloudSearchPagination\|testCloudSearchFilterMatrix\|testCloudSearchProjection\|testCloudSearchReadLevel" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java | wc -l` returns 8
+1. File compiles: `mvn compile -pl . -q` passes
+2. Tests skip cleanly without credentials: `mvn test -Pintegration -Dtest=SearchApiCloudIntegrationTest 2>&1 | grep -E "(Tests run|SKIPPED)"` shows tests skipped, not failed
+3. No removed APIs: `grep -c "groups()\|isGrouped()\|waitForIndexing" src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java` returns 0
+4. All CLOUD-01 scenarios covered: grep for all 11 new test method names returns matches
+5. No server-side embedding calls (explicit embeddings required): any new `col.add()` call in the test file includes `.embeddings(...)` before `.execute()`
-- Pre-execution gate verified: Phase 3 types exist before any code is written
-- 8 CLOUD-01 test methods present in SearchApiCloudIntegrationTest
-- KNN test validates Knn.limit vs Search.limit distinction (D-11)
-- RRF test executes multi-rank fusion end-to-end (D-07)
-- GroupBy test validates MinK/MaxK aggregation (D-08)
-- Batch test validates multiple independent searches (D-10)
-- Pagination test validates limit, offset, and invalid input (D-14)
-- Filter matrix covers all 7 combinations from D-13
-- Projection test validates field presence/absence (D-15, D-16)
-- Read level test validates INDEX_AND_WAL (no polling) and INDEX_ONLY (<= assertion) per D-12
-- All tests use Assume.assumeTrue for credential gating (D-02)
-- Code compiles on Java 8
+- 11 new test methods in SearchApiCloudIntegrationTest.java covering all CLOUD-01 scenarios
+- All tests gate on `Assume.assumeTrue("Cloud not available", cloudAvailable)` for clean skip
+- RRF test auto-skips with `Assume.assumeTrue(..., false)` documenting server limitation
+- ReadLevel WAL test uses isolated collection with explicit 3D embeddings and no polling
+- Filter matrix covers 6 sub-scenarios per D-13
+- Pagination tests cover basic limit and limit+offset per D-14
+- Projection tests verify present/absent fields per D-15 and custom keys per D-16
+- KnnLimit vs SearchLimit test validates candidate pool vs final count per D-11
+- No calls to groups(), isGrouped(), or waitForIndexing()
+- File compiles with mvn compile
*/
static Map buildRrfRankMap(Rrf rrf) {
List ranks = rrf.getRanks();
@@ -1755,11 +1759,11 @@ static Map buildRrfRankMap(Rrf rrf) {
for (int i = 0; i < ranks.size(); i++) {
weights[i] = ranks.get(i).getWeight();
}
- // Normalize weights if requested
+ // Normalize weights if requested (divide each by the sum of all weights)
if (rrf.isNormalize()) {
double sum = 0;
for (double w : weights) sum += w;
- if (sum > 1e-6) {
+ if (sum > 1e-9) {
for (int i = 0; i < weights.length; i++) weights[i] /= sum;
}
}
diff --git a/src/main/java/tech/amikos/chromadb/v2/Rrf.java b/src/main/java/tech/amikos/chromadb/v2/Rrf.java
index d0aeefc..acb5c4c 100644
--- a/src/main/java/tech/amikos/chromadb/v2/Rrf.java
+++ b/src/main/java/tech/amikos/chromadb/v2/Rrf.java
@@ -53,7 +53,7 @@ public int getK() {
}
/**
- * Returns whether scores should be normalized.
+ * Returns whether weights should be normalized before expansion.
*/
public boolean isNormalize() {
return normalize;
@@ -103,33 +103,43 @@ private Builder() {}
* automatically set to {@code true} on the provided {@link Knn} instance.
*
* @param knn the KNN sub-ranking; must not be null
- * @param weight fusion weight for this sub-ranking
+ * @param weight fusion weight for this sub-ranking; must be non-negative and finite
* @return this builder
- * @throws IllegalArgumentException if {@code knn} is null
+ * @throws IllegalArgumentException if {@code knn} is null, or weight is negative, NaN, or infinite
*/
public Builder rank(Knn knn, double weight) {
if (knn == null) {
throw new IllegalArgumentException("knn must not be null");
}
+ if (Double.isNaN(weight) || Double.isInfinite(weight)) {
+ throw new IllegalArgumentException("weight must be finite, got: " + weight);
+ }
+ if (weight < 0) {
+ throw new IllegalArgumentException("RRF weight must be non-negative, got: " + weight);
+ }
ranks.add(new RankWithWeight(knn.withReturnRank(), weight));
return this;
}
/**
- * Sets the RRF k constant. Default is 60.
+ * Sets the RRF k constant. Default is 60. Must be positive.
*
- * @param k the RRF k constant
+ * @param k the RRF k constant; must be > 0
* @return this builder
+ * @throws IllegalArgumentException if {@code k} is not positive
*/
public Builder k(int k) {
+ if (k <= 0) {
+ throw new IllegalArgumentException("RRF k must be positive, got: " + k);
+ }
this.k = k;
return this;
}
/**
- * Sets whether scores should be normalized. Default is {@code false}.
+ * Sets whether weights should be normalized before expansion. Default is {@code false}.
*
- * @param normalize whether to normalize scores
+ * @param normalize whether to normalize weights
* @return this builder
*/
public Builder normalize(boolean normalize) {
diff --git a/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java b/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java
index 44518d6..1f39c38 100644
--- a/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java
+++ b/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java
@@ -97,10 +97,7 @@ public void testCloudForkCountReturnsZeroForNewCollection() {
int count = col.forkCount();
assertEquals(0, count);
} catch (ChromaNotFoundException e) {
- Assume.assumeTrue("forkCount not available on this Chroma Cloud account", false);
- } catch (ChromaServerException e) {
- Assume.assumeTrue("forkCount not available on this Chroma Cloud account"
- + " (server error: " + e.getMessage() + ")", false);
+ Assume.assumeTrue("forkCount endpoint not available on this Chroma Cloud account", false);
}
}
diff --git a/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java b/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
index a97439f..d8ec27c 100644
--- a/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
+++ b/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
@@ -27,7 +27,8 @@
import static org.junit.Assert.fail;
/**
- * Cloud integration tests for schema/index parity (CLOUD-02) and array metadata (CLOUD-03).
+ * Cloud integration tests for search parity (CLOUD-01), schema/index parity (CLOUD-02),
+ * and array metadata (CLOUD-03).
*
*
Credentials loaded from {@code .env} or environment variables:
* CHROMA_API_KEY, CHROMA_TENANT, CHROMA_DATABASE.
@@ -423,8 +424,11 @@ public void testCloudSpannConfigRoundTrip() {
}
// Cannot switch from HNSW to SPANN — skip this test gracefully
return;
- } catch (ChromaException e) {
- // SPANN may not be available on this cloud account
+ } catch (ChromaBadRequestException e) {
+ // SPANN may not be available on this cloud account/plan
+ return;
+ } catch (ChromaNotFoundException e) {
+ // SPANN endpoint may not exist on this cloud version
return;
}
@@ -1339,20 +1343,25 @@ private static Map buildSingleMeta(String key, Object value) {
/**
* Polls a condition until it passes or the timeout expires (similar to Go's require.Eventually).
+ * Retries on both {@link AssertionError} and transient {@link ChromaException} (e.g., connection
+ * timeouts or server errors during cloud replication windows).
*
* @param timeout maximum time to wait
* @param tick interval between attempts
- * @param runnable assertion block that throws {@link AssertionError} on failure
+ * @param runnable assertion block that throws {@link AssertionError} or {@link ChromaException} on failure
*/
private static void assertEventually(Duration timeout, Duration tick, Runnable runnable) {
long deadline = System.nanoTime() + timeout.toNanos();
- AssertionError lastError = null;
- while (System.nanoTime() < deadline) {
+ Throwable lastError = null;
+ do {
try {
runnable.run();
return; // passed
} catch (AssertionError e) {
lastError = e;
+ } catch (ChromaException e) {
+ // Transient server/connection errors during cloud replication
+ lastError = e;
}
try {
Thread.sleep(tick.toMillis());
@@ -1360,7 +1369,9 @@ private static void assertEventually(Duration timeout, Duration tick, Runnable r
Thread.currentThread().interrupt();
throw new RuntimeException("assertEventually interrupted", ie);
}
- }
- throw lastError;
+ } while (System.nanoTime() < deadline);
+ if (lastError instanceof RuntimeException) throw (RuntimeException) lastError;
+ if (lastError instanceof Error) throw (Error) lastError;
+ throw new AssertionError("assertEventually timed out", lastError);
}
}
diff --git a/src/test/java/tech/amikos/chromadb/v2/SearchApiIntegrationTest.java b/src/test/java/tech/amikos/chromadb/v2/SearchApiIntegrationTest.java
index 79c5884..a02f173 100644
--- a/src/test/java/tech/amikos/chromadb/v2/SearchApiIntegrationTest.java
+++ b/src/test/java/tech/amikos/chromadb/v2/SearchApiIntegrationTest.java
@@ -206,11 +206,18 @@ public void testRrfSearch() {
SearchResult result = searchCollection.search().searches(s).execute();
assertNotNull(result);
assertFalse("RRF should return results", result.getIds().get(0).isEmpty());
- } catch (ChromaException e) {
- // Arithmetic rank expressions may not be supported on older self-hosted versions;
- // this also catches ChromaDeserializationException for malformed response bodies
- Assume.assumeTrue("RRF not supported on self-hosted Chroma " + configuredChromaVersion()
- + " (" + e.getMessage() + ")", false);
+ } catch (ChromaBadRequestException e) {
+ // Server does not understand arithmetic rank expressions
+ Assume.assumeTrue("RRF arithmetic ranks not supported on Chroma "
+ + configuredChromaVersion() + " (" + e.getMessage() + ")", false);
+ } catch (ChromaServerException e) {
+ // Server returned 5xx — may not support arithmetic rank expressions
+ Assume.assumeTrue("RRF not supported on Chroma "
+ + configuredChromaVersion() + " (server error: " + e.getMessage() + ")", false);
+ } catch (ChromaDeserializationException e) {
+ // Server returned an unexpected response format for RRF
+ Assume.assumeTrue("RRF response format not supported on Chroma "
+ + configuredChromaVersion() + " (" + e.getMessage() + ")", false);
}
}
diff --git a/src/test/java/tech/amikos/chromadb/v2/SearchApiUnitTest.java b/src/test/java/tech/amikos/chromadb/v2/SearchApiUnitTest.java
index 6e97072..42cb51c 100644
--- a/src/test/java/tech/amikos/chromadb/v2/SearchApiUnitTest.java
+++ b/src/test/java/tech/amikos/chromadb/v2/SearchApiUnitTest.java
@@ -175,6 +175,121 @@ public void testRrfDefaultK() {
assertEquals("default k should be 60", 60, rrf.getK());
}
+ @SuppressWarnings("unchecked")
+ @Test
+ public void testRrfDtoStructureSecondRank() {
+ // Verify the second rank's weight and KNN are correctly placed in the expanded structure
+ Knn knn1 = Knn.queryText("wireless audio");
+ Knn knn2 = Knn.queryText("noise cancelling headphones");
+ Rrf rrf = Rrf.builder()
+ .rank(knn1, 0.7)
+ .rank(knn2, 0.3)
+ .k(60)
+ .build();
+ Map map = ChromaDtos.buildRrfRankMap(rrf);
+ List mulTerms = (List) map.get("$mul");
+ Map sumMap = (Map) mulTerms.get(1);
+ List divTerms = (List) sumMap.get("$sum");
+ // Second term: $div { left: $val(0.3), right: $sum[$val(60), $knn] }
+ Map div1 = (Map) divTerms.get(1);
+ Map div1Inner = (Map) div1.get("$div");
+ Map leftVal1 = (Map) div1Inner.get("left");
+ assertEquals("second rank weight should be 0.3", 0.3, (Double) leftVal1.get("$val"), 1e-9);
+ Map rightSum1 = (Map) div1Inner.get("right");
+ List denomTerms1 = (List) rightSum1.get("$sum");
+ assertEquals(2, denomTerms1.size());
+ Map kVal1 = (Map) denomTerms1.get(0);
+ assertEquals("k should be 60 in second rank too", 60.0, (Double) kVal1.get("$val"), 1e-9);
+ assertTrue("second rank denominator should contain $knn",
+ ((Map) denomTerms1.get(1)).containsKey("$knn"));
+ }
+
+ @SuppressWarnings("unchecked")
+ @Test
+ public void testRrfCustomKValue() {
+ // Verify non-default k value propagates into the expanded structure
+ Rrf rrf = Rrf.builder()
+ .rank(Knn.queryText("a"), 1.0)
+ .k(100)
+ .build();
+ Map map = ChromaDtos.buildRrfRankMap(rrf);
+ List mulTerms = (List) map.get("$mul");
+ // Single rank → $div directly (no $sum wrapper)
+ Map divMap = (Map) mulTerms.get(1);
+ Map divInner = (Map) divMap.get("$div");
+ Map rightSum = (Map) divInner.get("right");
+ List denomTerms = (List) rightSum.get("$sum");
+ Map kVal = (Map) denomTerms.get(0);
+ assertEquals("custom k=100 should appear in $val", 100.0, (Double) kVal.get("$val"), 1e-9);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Test
+ public void testRrfThreeRanksExpandsCorrectly() {
+ // Verify 3 ranks produce a $sum list with 3 $div terms
+ Rrf rrf = Rrf.builder()
+ .rank(Knn.queryText("a"), 0.5)
+ .rank(Knn.queryText("b"), 0.3)
+ .rank(Knn.queryText("c"), 0.2)
+ .k(60)
+ .build();
+ Map map = ChromaDtos.buildRrfRankMap(rrf);
+ List mulTerms = (List) map.get("$mul");
+ Map sumMap = (Map) mulTerms.get(1);
+ assertTrue("3 ranks should produce $sum wrapper", sumMap.containsKey("$sum"));
+ List divTerms = (List) sumMap.get("$sum");
+ assertEquals("should have 3 terms for 3 ranks", 3, divTerms.size());
+ // Verify each term is a $div
+ for (int i = 0; i < 3; i++) {
+ assertTrue("term " + i + " should be a $div",
+ ((Map) divTerms.get(i)).containsKey("$div"));
+ }
+ // Verify weights: 0.5, 0.3, 0.2
+ double[] expectedWeights = {0.5, 0.3, 0.2};
+ for (int i = 0; i < 3; i++) {
+ Map div = (Map) divTerms.get(i);
+ Map divInner = (Map) div.get("$div");
+ Map leftVal = (Map) divInner.get("left");
+ assertEquals("weight for rank " + i, expectedWeights[i],
+ (Double) leftVal.get("$val"), 1e-9);
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRrfNegativeWeightThrows() {
+ Rrf.builder().rank(Knn.queryText("a"), -1.0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRrfNaNWeightThrows() {
+ Rrf.builder().rank(Knn.queryText("a"), Double.NaN);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRrfInfiniteWeightThrows() {
+ Rrf.builder().rank(Knn.queryText("a"), Double.POSITIVE_INFINITY);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRrfNegativeInfinityWeightThrows() {
+ Rrf.builder().rank(Knn.queryText("a"), Double.NEGATIVE_INFINITY);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRrfKZeroThrows() {
+ Rrf.builder().k(0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRrfKNegativeThrows() {
+ Rrf.builder().k(-1);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRrfKMinValueThrows() {
+ Rrf.builder().k(Integer.MIN_VALUE);
+ }
+
// ========== Search builder tests ==========
@Test
@@ -244,6 +359,20 @@ public void testBuildSearchItemMapKnn() {
assertTrue("rank should contain '$knn'", rank.containsKey("$knn"));
}
+ @SuppressWarnings("unchecked")
+ @Test
+ public void testBuildSearchItemMapRrf() {
+ // Verify RRF routing through buildSearchItemMap produces $mul (not $knn)
+ Knn knn = Knn.queryText("test");
+ Rrf rrf = Rrf.builder().rank(knn, 1.0).build();
+ Search search = Search.builder().rrf(rrf).build();
+ Map item = ChromaDtos.buildSearchItemMap(search, null);
+ assertTrue("item should have 'rank' key", item.containsKey("rank"));
+ Map rank = (Map) item.get("rank");
+ assertTrue("RRF rank should contain '$mul' (not '$knn')", rank.containsKey("$mul"));
+ assertFalse("RRF rank should not contain '$knn' at top level", rank.containsKey("$knn"));
+ }
+
@Test
public void testBuildSearchItemMapWithFilter() {
Knn knn = Knn.queryText("test");
From 061bc46512c4d56191683467917a7e7af91adde5 Mon Sep 17 00:00:00 2001
From: oss-amikos
Date: Tue, 24 Mar 2026 14:16:11 +0200
Subject: [PATCH 24/26] fix: narrow assertEventually to transient exceptions
and fix remaining catch blocks
- assertEventually: catch only ChromaConnectionException + ChromaServerException
(non-transient 4xx/deserialization errors now propagate immediately)
- SPANN config test: use Assume.assumeTrue instead of silent return
- Config transition test: narrow ChromaException to BadRequest + Server
- Fix misleading "server-side embeddings" comment
- Complete @throws tag on Rrf.Builder.build()
---
.../java/tech/amikos/chromadb/v2/Rrf.java | 2 +-
.../v2/SearchApiCloudIntegrationTest.java | 29 +++++++++++--------
2 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/src/main/java/tech/amikos/chromadb/v2/Rrf.java b/src/main/java/tech/amikos/chromadb/v2/Rrf.java
index acb5c4c..a24b847 100644
--- a/src/main/java/tech/amikos/chromadb/v2/Rrf.java
+++ b/src/main/java/tech/amikos/chromadb/v2/Rrf.java
@@ -151,7 +151,7 @@ public Builder normalize(boolean normalize) {
* Builds the {@link Rrf} instance.
*
* @return an immutable {@code Rrf}
- * @throws IllegalArgumentException if no ranks have been added
+ * @throws IllegalArgumentException if no ranks have been added, or if all weights are zero
*/
public Rrf build() {
if (ranks.isEmpty()) {
diff --git a/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java b/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
index d8ec27c..f282dcf 100644
--- a/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
+++ b/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
@@ -79,7 +79,7 @@ public static void setUpSharedSeedCollection() {
sharedCollectionName = "seed_" + UUID.randomUUID().toString().substring(0, 8);
seedCollection = sharedClient.createCollection(sharedCollectionName);
- // Add 15 records modeling a product catalog domain (per D-04, D-06 — server-side embeddings)
+ // Add 15 records with explicit 4D embeddings modeling a product catalog domain (per D-04, D-06)
List ids = Arrays.asList(
"prod-001", "prod-002", "prod-003", "prod-004", "prod-005",
"prod-006", "prod-007", "prod-008", "prod-009", "prod-010",
@@ -425,11 +425,9 @@ public void testCloudSpannConfigRoundTrip() {
// Cannot switch from HNSW to SPANN — skip this test gracefully
return;
} catch (ChromaBadRequestException e) {
- // SPANN may not be available on this cloud account/plan
- return;
+ Assume.assumeTrue("SPANN not available on this cloud account/plan: " + e.getMessage(), false);
} catch (ChromaNotFoundException e) {
- // SPANN endpoint may not exist on this cloud version
- return;
+ Assume.assumeTrue("SPANN endpoint not found on this cloud version: " + e.getMessage(), false);
}
Collection fetched = client.getCollection(col.getName());
@@ -476,8 +474,11 @@ public void testCloudInvalidConfigTransitionRejected() {
// Expected: client-side validation prevents the switch
assertTrue("Error message should mention index group switch",
isIndexGroupSwitchError(e));
- } catch (ChromaException e) {
- // Expected: server-side rejection is also acceptable
+ } catch (ChromaBadRequestException e) {
+ // Expected: server-side rejection for invalid index group transition
+ assertNotNull("Exception message must not be null", e.getMessage());
+ } catch (ChromaServerException e) {
+ // Some server versions return 5xx for unsupported transitions
assertNotNull("Exception message must not be null", e.getMessage());
}
}
@@ -1343,12 +1344,13 @@ private static Map buildSingleMeta(String key, Object value) {
/**
* Polls a condition until it passes or the timeout expires (similar to Go's require.Eventually).
- * Retries on both {@link AssertionError} and transient {@link ChromaException} (e.g., connection
- * timeouts or server errors during cloud replication windows).
+ * Retries on {@link AssertionError} and transient server/connection errors
+ * ({@link ChromaServerException}, {@link ChromaConnectionException}).
+ * Non-transient errors (4xx, deserialization) propagate immediately.
*
* @param timeout maximum time to wait
* @param tick interval between attempts
- * @param runnable assertion block that throws {@link AssertionError} or {@link ChromaException} on failure
+ * @param runnable assertion block
*/
private static void assertEventually(Duration timeout, Duration tick, Runnable runnable) {
long deadline = System.nanoTime() + timeout.toNanos();
@@ -1359,8 +1361,11 @@ private static void assertEventually(Duration timeout, Duration tick, Runnable r
return; // passed
} catch (AssertionError e) {
lastError = e;
- } catch (ChromaException e) {
- // Transient server/connection errors during cloud replication
+ } catch (ChromaConnectionException e) {
+ // Transient: network issue during cloud replication window
+ lastError = e;
+ } catch (ChromaServerException e) {
+ // Transient: server-side 5xx during replication window
lastError = e;
}
try {
From 815ff8bfc54816f3825bcb9a61ce06aa84a7b4a7 Mon Sep 17 00:00:00 2001
From: oss-amikos
Date: Tue, 24 Mar 2026 15:22:04 +0200
Subject: [PATCH 25/26] =?UTF-8?q?fix:=20address=20PR=20review=20=E2=80=94?=
=?UTF-8?q?=20tighten=20no-op=20test,=20fix=20tautology,=20and=20clean=20u?=
=?UTF-8?q?p=20style?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- Fail testCloudInvalidConfigTransitionRejected when known index group
transition is silently accepted; strengthen catch-block assertions
- Remove tautological `>= 0` check on List.size() in INDEX_ONLY test
- Add ChromaServerException catch to forkCount test for cloud 5xx
- Remove double blank lines after .execute() calls (style consistency)
- Add comment about Integer→Double JSON round-trip in buildMeta
---
.../v2/CollectionApiExtensionsCloudTest.java | 2 ++
.../v2/SearchApiCloudIntegrationTest.java | 22 +++++++++++--------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java b/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java
index 1f39c38..dfff413 100644
--- a/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java
+++ b/src/test/java/tech/amikos/chromadb/v2/CollectionApiExtensionsCloudTest.java
@@ -98,6 +98,8 @@ public void testCloudForkCountReturnsZeroForNewCollection() {
assertEquals(0, count);
} catch (ChromaNotFoundException e) {
Assume.assumeTrue("forkCount endpoint not available on this Chroma Cloud account", false);
+ } catch (ChromaServerException e) {
+ Assume.assumeTrue("forkCount endpoint returned server error on this Chroma Cloud account", false);
}
}
diff --git a/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java b/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
index f282dcf..c6e91b6 100644
--- a/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
+++ b/src/test/java/tech/amikos/chromadb/v2/SearchApiCloudIntegrationTest.java
@@ -249,6 +249,8 @@ private static boolean isNonBlank(String value) {
return value != null && !value.trim().isEmpty();
}
+ // Note: ratings are boxed as Integer here but may round-trip through JSON as Double.
+ // Assertions should compare via Number, not exact Integer type (see instanceof Number checks).
private static Map buildMeta(String category, double price, boolean inStock,
List tags, List ratings) {
Map meta = new LinkedHashMap();
@@ -468,18 +470,23 @@ public void testCloudInvalidConfigTransitionRejected() {
.spannSearchNprobe(8)
.build());
}
- // If no exception — the server allowed the transition (UNKNOWN group allows either)
- // This is acceptable behavior when the index group is UNKNOWN
+ // No exception — only acceptable when the index group is UNKNOWN
+ if (indexGroup != IndexGroup.UNKNOWN) {
+ fail("Expected rejection for cross-group transition from " + indexGroup
+ + ", but server accepted the configuration change");
+ }
} catch (IllegalArgumentException e) {
// Expected: client-side validation prevents the switch
assertTrue("Error message should mention index group switch",
isIndexGroupSwitchError(e));
} catch (ChromaBadRequestException e) {
// Expected: server-side rejection for invalid index group transition
- assertNotNull("Exception message must not be null", e.getMessage());
+ assertTrue("Bad-request message should not be empty",
+ e.getMessage() != null && !e.getMessage().isEmpty());
} catch (ChromaServerException e) {
// Some server versions return 5xx for unsupported transitions
- assertNotNull("Exception message must not be null", e.getMessage());
+ assertTrue("Server-error message should not be empty",
+ e.getMessage() != null && !e.getMessage().isEmpty());
}
}
@@ -560,7 +567,6 @@ public void testCloudStringArrayMetadata() {
.embeddings(new float[]{0.9f, 0.1f, 0.1f})
.execute();
-
GetResult result = col.get()
.ids("arr-str-1")
.include(Include.METADATAS)
@@ -614,7 +620,6 @@ public void testCloudNumberArrayMetadata() {
.embeddings(new float[]{0.1f, 0.9f, 0.1f})
.execute();
-
GetResult result = col.get()
.ids("arr-num-1")
.include(Include.METADATAS)
@@ -731,7 +736,6 @@ public void testCloudArrayContainsEdgeCases() {
)
.execute();
-
// Contains on single-element: should return only edge-1
GetResult soloResult = col.get()
.where(Where.contains("tags", "solo"))
@@ -1004,8 +1008,8 @@ public void testCloudSearchReadLevelIndexOnly() {
assertNotNull("ids outer list must be non-null", result.getIds());
// INDEX_ONLY may return 0 results if the index hasn't compacted yet (async on Cloud).
// The key assertion is that the call succeeds without error.
- assertTrue("INDEX_ONLY result count must be >= 0 and <= 15",
- result.getIds().get(0).size() >= 0 && result.getIds().get(0).size() <= 15);
+ assertTrue("INDEX_ONLY result count must be <= 15",
+ result.getIds().get(0).size() <= 15);
}
@Test
From ea5ff7e73769ede97208120915e6e862895c89c0 Mon Sep 17 00:00:00 2001
From: oss-amikos
Date: Tue, 24 Mar 2026 16:05:12 +0200
Subject: [PATCH 26/26] fix: replace dead normalization guard with fail-fast
IllegalStateException
- Replace silent no-op in buildRrfRankMap with IllegalStateException when
weight sum is effectively zero (unreachable via Rrf.build() validation,
but will crash loudly if the invariant is ever broken)
- Remove remaining double blank lines after .execute() calls
---
src/main/java/tech/amikos/chromadb/v2/ChromaDtos.java | 9 ++++++---
.../chromadb/v2/SearchApiCloudIntegrationTest.java | 2 --
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/src/main/java/tech/amikos/chromadb/v2/ChromaDtos.java b/src/main/java/tech/amikos/chromadb/v2/ChromaDtos.java
index 8e7af45..8524c62 100644
--- a/src/main/java/tech/amikos/chromadb/v2/ChromaDtos.java
+++ b/src/main/java/tech/amikos/chromadb/v2/ChromaDtos.java
@@ -1759,13 +1759,16 @@ static Map buildRrfRankMap(Rrf rrf) {
for (int i = 0; i < ranks.size(); i++) {
weights[i] = ranks.get(i).getWeight();
}
- // Normalize weights if requested (divide each by the sum of all weights)
+ // Normalize weights if requested (divide each by the sum of all weights).
+ // Rrf.build() guarantees weightSum >= 1e-9, so sum should always be positive here.
if (rrf.isNormalize()) {
double sum = 0;
for (double w : weights) sum += w;
- if (sum > 1e-9) {
- for (int i = 0; i < weights.length; i++) weights[i] /= sum;
+ if (sum <= 1e-9) {
+ throw new IllegalStateException(
+ "RRF weight sum is effectively zero (" + sum + "); this should have been rejected by Rrf.build()");
}
+ for (int i = 0; i < weights.length; i++) weights[i] /= sum;
}
// Build terms: weight_i / (k + rank_i)
List