Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions backend/api_v2/api_deployment_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def post(
timeout = serializer.validated_data.get(ApiExecution.TIMEOUT_FORM_DATA)
include_metadata = serializer.validated_data.get(ApiExecution.INCLUDE_METADATA)
include_metrics = serializer.validated_data.get(ApiExecution.INCLUDE_METRICS)
include_extracted_text = serializer.validated_data.get(
ApiExecution.INCLUDE_EXTRACTED_TEXT
)
use_file_history = serializer.validated_data.get(ApiExecution.USE_FILE_HISTORY)
tag_names = serializer.validated_data.get(ApiExecution.TAGS)
llm_profile_id = serializer.validated_data.get(ApiExecution.LLM_PROFILE_ID)
Expand Down Expand Up @@ -117,6 +120,7 @@ def post(
timeout=timeout,
include_metadata=include_metadata,
include_metrics=include_metrics,
include_extracted_text=include_extracted_text,
use_file_history=use_file_history,
tag_names=tag_names,
llm_profile_id=llm_profile_id,
Expand Down Expand Up @@ -171,6 +175,9 @@ def get(
execution_id = serializer.validated_data.get(ApiExecution.EXECUTION_ID)
include_metadata = serializer.validated_data.get(ApiExecution.INCLUDE_METADATA)
include_metrics = serializer.validated_data.get(ApiExecution.INCLUDE_METRICS)
include_extracted_text = serializer.validated_data.get(
ApiExecution.INCLUDE_EXTRACTED_TEXT
)

# Fetch execution status
response: ExecutionResponse = DeploymentHelper.get_execution_status(execution_id)
Expand Down Expand Up @@ -218,6 +225,7 @@ def get(
deployment_execution_dto=deployment_execution_dto,
include_metadata=include_metadata,
include_metrics=include_metrics,
include_extracted_text=include_extracted_text,
)
return Response(
data={
Expand Down
1 change: 1 addition & 0 deletions backend/api_v2/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class ApiExecution:
TIMEOUT_FORM_DATA: str = "timeout"
INCLUDE_METADATA: str = "include_metadata"
INCLUDE_METRICS: str = "include_metrics"
INCLUDE_EXTRACTED_TEXT: str = "include_extracted_text"
USE_FILE_HISTORY: str = "use_file_history" # Undocumented parameter
EXECUTION_ID: str = "execution_id"
TAGS: str = "tags"
Expand Down
12 changes: 10 additions & 2 deletions backend/api_v2/deployment_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def execute_workflow(
timeout: int,
include_metadata: bool = False,
include_metrics: bool = False,
include_extracted_text: bool = False,
use_file_history: bool = False,
tag_names: list[str] = [],
llm_profile_id: str | None = None,
Expand Down Expand Up @@ -275,7 +276,10 @@ def execute_workflow(
)
if not enable_highlight:
result.remove_result_metadata_keys(["highlight_data"])
result.remove_result_metadata_keys(["extracted_text"])
if not include_extracted_text:
result.remove_result_metadata_keys(["extracted_text"])
if include_extracted_text:
result.promote_extracted_text()
if include_metadata or include_metrics:
cls._enrich_result_with_usage_metadata(result)
if not include_metadata:
Expand Down Expand Up @@ -458,6 +462,7 @@ def process_completed_execution(
deployment_execution_dto: Any,
include_metadata: bool,
include_metrics: bool,
include_extracted_text: bool = False,
) -> None:
"""Enrich and clean up the response for a completed execution."""
api_deployment = deployment_execution_dto.api
Expand All @@ -476,7 +481,10 @@ def process_completed_execution(
)
if not enable_highlight:
response.remove_result_metadata_keys(["highlight_data"])
response.remove_result_metadata_keys(["extracted_text"])
if not include_extracted_text:
response.remove_result_metadata_keys(["extracted_text"])
if include_extracted_text:
response.promote_extracted_text()
if include_metadata or include_metrics:
DeploymentHelper._enrich_result_with_usage_metadata(response)
if not include_metadata:
Expand Down
5 changes: 5 additions & 0 deletions backend/api_v2/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ class ExecutionRequestSerializer(TagParamsSerializer):
If -1 it corresponds to async execution. Defaults to -1
include_metadata (bool): Flag to include metadata in API response
include_metrics (bool): Flag to include metrics in API response
include_extracted_text (bool): Flag to include the full extracted text
of the input file in the API response. The extracted text is returned
at the top level of each file result, independent of include_metadata.
use_file_history (bool): Flag to use FileHistory to save and retrieve
responses quickly. This is undocumented to the user and can be
helpful for demos.
Expand All @@ -232,6 +235,7 @@ class ExecutionRequestSerializer(TagParamsSerializer):
)
include_metadata = BooleanField(default=False)
include_metrics = BooleanField(default=False)
include_extracted_text = BooleanField(default=False)
use_file_history = BooleanField(default=False)

presigned_urls = ListField(child=URLField(), required=False)
Expand Down Expand Up @@ -408,6 +412,7 @@ class ExecutionQuerySerializer(Serializer):
execution_id = CharField(required=True)
include_metadata = BooleanField(default=False)
include_metrics = BooleanField(default=False)
include_extracted_text = BooleanField(default=False)

def validate_execution_id(self, value):
"""Trim spaces, validate UUID format, and check if execution_id exists."""
Expand Down
24 changes: 24 additions & 0 deletions backend/workflow_manager/workflow_v2/dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,30 @@ def remove_inner_result_metadata(self) -> None:
if isinstance(result, dict):
result.pop("metadata", None)

def promote_extracted_text(self) -> None:
"""Copies extracted_text from metadata to the top level of each file
result item. This allows extracted_text to be returned independently
of include_metadata.

After promotion, the extracted_text appears as:
result[i]["extracted_text"] = "..."
"""
if not isinstance(self.result, list):
return

for item in self.result:
if not isinstance(item, dict):
continue

result = item.get("result")
if not isinstance(result, dict):
continue

metadata = result.get("metadata", {})
extracted_text = metadata.get("extracted_text")
if extracted_text is not None:
item["extracted_text"] = extracted_text

def remove_result_metrics(self) -> None:
"""Removes the 'metrics' key from the 'result' dictionary within each
'result' dictionary in the 'result' list attribute of the instance.
Expand Down