rh-ai-quickstart · ganeshmurthy · Jun 15, 2026
diff --git a/frontend/llama_stack_ui/distribution/ui/modules/local_extractors.py b/frontend/llama_stack_ui/distribution/ui/modules/local_extractors.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import io
+import logging
+import os
+
+from docx import Document
+
+logger = logging.getLogger(__name__)
+
+LOCAL_SUPPORTED_EXTENSIONS = [".docx"]
+PROVIDER_SUPPORTED_EXTENSIONS = [".txt", ".pdf", ".md"]
+
+
+def extract_text_from_docx(file) -> str:
+    """Extract all text content from a .docx file.
+
+    Reads paragraph text and table cell text from the document.
+
+    Args:
+        file: File-like object containing .docx data
+
+    Returns:
+        str: Extracted text with paragraphs separated by newlines
+    """
+    doc = Document(file)
+    parts = [p.text for p in doc.paragraphs]
+
+    for table in doc.tables:
+        for row in table.rows:
+            for cell in row.cells:
+                parts.append(cell.text)
+
+    return "\n".join(parts)
+
+
+def extract_text(file, filename: str) -> str:
+    """Extract text from a locally supported file type.
+
+    Routes to the appropriate extractor based on file extension.
+
+    Args:
+        file: File-like object with document data
+        filename: Original filename used to determine the file type
+
+    Returns:
+        str: Extracted plain text content
+
+    Raises:
+        ValueError: If the file extension is not locally supported
+    """
+    ext = os.path.splitext(filename)[1].lower()
+
+    if ext == ".docx":
+        return extract_text_from_docx(file)
+    else:
+        raise ValueError(f"Unsupported file type for local extraction: {ext}")
+
+
+def create_text_file_from_extracted_content(
+    content: str, original_filename: str
+) -> io.BytesIO:
+    """Wrap extracted text as an in-memory .txt file for the Llama Stack API.
+
+    Creates a BytesIO object with .name and .size attributes so it can be
+    passed directly to the files.create API endpoint.
+
+    Args:
+        content: Extracted plain text to wrap
+        original_filename: Original filename; the stem is reused with a .txt extension
+
+    Returns:
+        io.BytesIO: In-memory text file ready for upload
+    """
+    text_bytes = content.encode("utf-8")
+    text_file = io.BytesIO(text_bytes)
+    stem = os.path.splitext(original_filename)[0]
+    text_file.name = f"{stem}.txt"
+    text_file.size = len(text_bytes)
+    return text_file
diff --git a/frontend/llama_stack_ui/distribution/ui/page/distribution/vector_dbs.py b/frontend/llama_stack_ui/distribution/ui/page/distribution/vector_dbs.py
@@ -13,6 +13,11 @@
 
 from llama_stack_ui.distribution.ui.modules.utils import get_vector_db_name, data_url_from_file
 from llama_stack_ui.distribution.ui.modules.api import llama_stack_api
+from llama_stack_ui.distribution.ui.modules.local_extractors import (
+    extract_text,
+    create_text_file_from_extracted_content,
+    LOCAL_SUPPORTED_EXTENSIONS,
+)
 # RAGDocument removed in 0.6.1 - using new files API instead
 
 
@@ -295,17 +300,30 @@ def _upload_documents_to_database(vector_db_name, uploaded_files, vector_db_id=N
         actual_db_id = vector_db_id or vector_db_name
         with st.spinner(f"Uploading {len(uploaded_files)} file(s) to '{vector_db_name}'..."):
             for uploaded_file in uploaded_files:
+                original_filename = uploaded_file.name
+                file_ext = os.path.splitext(original_filename)[1].lower()
+
+                # Auto-detect DOCX and extract locally, let server handle PDF/TXT
+                if file_ext in LOCAL_SUPPORTED_EXTENSIONS:
+                    st.caption(f"📄 Extracting text from {original_filename}...")
+                    text_content = extract_text(uploaded_file, original_filename)
+                    file_to_upload = create_text_file_from_extracted_content(
+                        text_content, original_filename
+                    )
+                else:
+                    file_to_upload = uploaded_file
+
                 # Step 1: Upload file content to get file_id
                 file_obj = llama_stack_api.client.files.create(
-                    file=uploaded_file,
+                    file=file_to_upload,
                     purpose='assistants'
                 )
 
                 # Step 2: Add file to vector store (chunking handled server-side)
                 llama_stack_api.client.vector_stores.files.create(
                     vector_store_id=actual_db_id,
                     file_id=file_obj.id,
-                    attributes={"source": uploaded_file.name}
+                    attributes={"source": original_filename}
                 )
 
         # Success

diff --git a/frontend/pyproject.toml b/frontend/pyproject.toml
@@ -16,6 +16,7 @@ dependencies = [
     "llama-stack==__LLAMASTACK_VERSION__",
     "fire",
     "asyncpg",
+    "python-docx",
 ]
 
 [tool.setuptools]