Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ test-ci: ensure-langflow-data ensure-backend-volumes ## Start infra, run integra
echo "::endgroup::"; \
echo "::group::Start Infrastructure"; \
echo "$(YELLOW)Starting infra (OpenSearch + Dashboards + Langflow + Backend + Frontend) with CPU containers$(NC)"; \
mkdir -p config && chmod 777 config; \
OPENSEARCH_HOST=opensearch $(COMPOSE_CMD) up -d opensearch dashboards langflow openrag-backend openrag-frontend; \
echo "$(CYAN)Architecture: $$(uname -m), Platform: $$(uname -s)$(NC)"; \
echo "$(YELLOW)Starting docling-serve...$(NC)"; \
Expand Down Expand Up @@ -797,10 +798,26 @@ test-ci: ensure-langflow-data ensure-backend-volumes ## Start infra, run integra
TEST_RESULT=$$?; \
echo "::endgroup::"; \
echo ""; \
echo "$(YELLOW)Waiting for frontend at http://localhost:3000...$(NC)"; \
echo "$(YELLOW)Waiting for OpenRAG API to be ready at http://localhost:3000/api/health...$(NC)"; \
OPENRAG_READY=0; \
for i in $$(seq 1 60); do \
curl -s http://localhost:3000/ >/dev/null 2>&1 && break || sleep 2; \
if curl -s http://localhost:3000/api/health | grep -q '"status"'; then \
OPENRAG_READY=1; \
echo "$(PURPLE)OpenRAG API is ready$(NC)"; \
break; \
fi; \
sleep 2; \
done; \
if [ "$$OPENRAG_READY" = "0" ]; then \
echo "$(RED)ERROR: OpenRAG API did not become ready at http://localhost:3000/api/health after waiting$(NC)"; \
echo "$(YELLOW)Backend logs (last 100 lines):$(NC)"; \
$(CONTAINER_RUNTIME) logs openrag-backend 2>&1 | tail -100 || true; \
echo "$(YELLOW)Frontend logs (last 50 lines):$(NC)"; \
$(CONTAINER_RUNTIME) logs openrag-frontend 2>&1 | tail -50 || true; \
uv run python scripts/docling_ctl.py stop || true; \
$(COMPOSE_CMD) down -v 2>/dev/null || true; \
exit 1; \
fi; \
echo "::group::SDK Integration Tests (Python)"; \
echo "$(CYAN)════════════════════════════════════════$(NC)"; \
echo "$(PURPLE) SDK Integration Tests (Python)$(NC)"; \
Expand Down Expand Up @@ -841,6 +858,7 @@ test-ci-local: ensure-langflow-data ensure-backend-volumes ## Same as test-ci bu
echo "::group::Start Infrastructure"; \
echo "$(YELLOW)Starting infra (OpenSearch + Dashboards + Langflow + Backend + Frontend) with CPU containers$(NC)"; \
echo "$(CYAN)Architecture: $$(uname -m), Platform: $$(uname -s)$(NC)"; \
mkdir -p config && chmod 777 config; \
OPENSEARCH_HOST=opensearch $(COMPOSE_CMD) up -d opensearch dashboards langflow openrag-backend openrag-frontend; \
echo "$(YELLOW)Starting docling-serve...$(NC)"; \
DOCLING_START_FAILED=0; \
Expand Down Expand Up @@ -921,10 +939,26 @@ test-ci-local: ensure-langflow-data ensure-backend-volumes ## Same as test-ci bu
TEST_RESULT=$$?; \
echo "::endgroup::"; \
echo ""; \
echo "$(YELLOW)Waiting for frontend at http://localhost:3000...$(NC)"; \
echo "$(YELLOW)Waiting for OpenRAG API to be ready at http://localhost:3000/api/health...$(NC)"; \
OPENRAG_READY=0; \
for i in $$(seq 1 60); do \
curl -s http://localhost:3000/ >/dev/null 2>&1 && break || sleep 2; \
if curl -s http://localhost:3000/api/health | grep -q '"status"'; then \
OPENRAG_READY=1; \
echo "$(PURPLE)OpenRAG API is ready$(NC)"; \
break; \
fi; \
sleep 2; \
done; \
if [ "$$OPENRAG_READY" = "0" ]; then \
echo "$(RED)ERROR: OpenRAG API did not become ready at http://localhost:3000/api/health after waiting$(NC)"; \
echo "$(YELLOW)Backend logs (last 100 lines):$(NC)"; \
$(CONTAINER_RUNTIME) logs openrag-backend 2>&1 | tail -100 || true; \
echo "$(YELLOW)Frontend logs (last 50 lines):$(NC)"; \
$(CONTAINER_RUNTIME) logs openrag-frontend 2>&1 | tail -50 || true; \
uv run python scripts/docling_ctl.py stop || true; \
$(COMPOSE_CMD) down -v 2>/dev/null || true; \
exit 1; \
fi; \
echo "::group::SDK Integration Tests (Python)"; \
echo "$(CYAN)════════════════════════════════════════$(NC)"; \
echo "$(PURPLE) SDK Integration Tests (Python)$(NC)"; \
Expand Down
19 changes: 7 additions & 12 deletions src/services/search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,9 @@ async def embed_with_model(model_name):
if not is_wildcard_match_all and score_threshold > 0:
search_body["min_score"] = score_threshold

# Prepare fallback search body without num_candidates for clusters that don't support it
# Pre-build a fallback query without num_candidates for OpenSearch versions
# that don't support it. Used unconditionally on any RequestError so we
# don't rely on fragile error-string matching.
fallback_search_body = None
if not is_wildcard_match_all:
try:
Expand Down Expand Up @@ -461,12 +463,10 @@ async def embed_with_model(model_name):
error=error_message,
)
raise OpenSearchDiskSpaceError(DISK_SPACE_ERROR_MESSAGE) from e
if (
fallback_search_body is not None
and "unknown field [num_candidates]" in error_message.lower()
):
if fallback_search_body is not None:
logger.warning(
"OpenSearch cluster does not support num_candidates; retrying without it"
"OpenSearch query failed; retrying without num_candidates",
error=error_message,
)
try:
results = await opensearch_client.search(
Expand All @@ -476,15 +476,10 @@ async def embed_with_model(model_name):
)
except RequestError as retry_error:
if is_disk_space_error(retry_error):
logger.error(
"OpenSearch retry blocked by disk space constraint",
error=str(retry_error),
)
raise OpenSearchDiskSpaceError(DISK_SPACE_ERROR_MESSAGE) from retry_error
logger.error(
"OpenSearch retry without num_candidates failed",
"OpenSearch retry without num_candidates also failed",
error=str(retry_error),
search_body=fallback_search_body,
)
raise
else:
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def onboard_system():

# Cleanup after all tests
try:
await clients.close()
await clients.cleanup()
except Exception:
pass

Expand Down
Loading
Loading