From df0212387ebcfc19f289fd2eb5c945730fd97628 Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:05:07 +0530 Subject: [PATCH 1/8] Add DeepReadFile tool to the Deep Research Agent --- deep_research/deep_research.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deep_research/deep_research.py b/deep_research/deep_research.py index dc74c807..8cffeb95 100644 --- a/deep_research/deep_research.py +++ b/deep_research/deep_research.py @@ -12,7 +12,7 @@ def create_deep_research() -> Agent: description="Comprehensive deep research agent that conducts thorough research on any topic.", instructions="./instructions.md", files_folder="./files", - tools=[WebSearchTool(), ScholarSearch, IPythonInterpreter], + tools=[WebSearchTool(), ScholarSearch, IPythonInterpreter, DeepReadFile], model=get_default_model(), model_settings=ModelSettings( reasoning=Reasoning(effort="high", summary="auto") if is_openai_provider() else None, From 4503159eb4c07754c739b187460f9cf3a99b1026 Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:05:12 +0530 Subject: [PATCH 2/8] Add DeepReadFile tool for reading and converting files to Markdown --- shared_tools/DeepReadFile.py | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 shared_tools/DeepReadFile.py diff --git a/shared_tools/DeepReadFile.py b/shared_tools/DeepReadFile.py new file mode 100644 index 00000000..9b55cdfb --- /dev/null +++ b/shared_tools/DeepReadFile.py @@ -0,0 +1,55 @@ +import os +from typing import Optional +from agency_swarm.tools import BaseTool +from pydantic import Field +from markitdown import MarkItDown + +class DeepReadFile(BaseTool): + """ + A tool to read and convert various file formats into Markdown. + Supports PDF, DOCX, XLSX, PPTX, HTML, and common image formats (via OCR if configured). + + Use this tool when you need to understand the content of a non-plain-text file. + """ + + file_path: str = Field(..., description="The absolute path to the file to read and convert.") + + def run(self): + try: + # Ensure the path is absolute + abs_path = os.path.abspath(self.file_path) + + if not os.path.exists(abs_path): + return f"Error: File does not exist at {abs_path}" + + if not os.path.isfile(abs_path): + return f"Error: {abs_path} is not a file." + + # Initialize MarkItDown + md = MarkItDown() + + # Convert the file + result = md.convert(abs_path) + + if not result or not result.text_content: + return f"Warning: Conversion successful but no text content was extracted from {os.path.basename(abs_path)}." + + content = result.text_content + + # Add a header to indicate the source + header = f"--- Content of {os.path.basename(abs_path)} ---\n\n" + return header + content + + except Exception as e: + return f"Error converting file {self.file_path}: {str(e)}" + +if __name__ == "__main__": + # Test with a simple text file first + test_file = "test_deep_read.txt" + with open(test_file, "w") as f: + f.write("# Test Heading\nThis is a test file for DeepReadFile.") + + tool = DeepReadFile(file_path=os.path.abspath(test_file)) + print(tool.run()) + + os.remove(test_file) From 205654de36bc6a1e3c3aecc51c118f79401e85de Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:05:16 +0530 Subject: [PATCH 3/8] Add DeepReadFile to shared_tools module exports --- shared_tools/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/shared_tools/__init__.py b/shared_tools/__init__.py index d99576ea..4fc3d58f 100644 --- a/shared_tools/__init__.py +++ b/shared_tools/__init__.py @@ -1,7 +1,15 @@ from shared_tools.CopyFile import CopyFile +from shared_tools.DeepReadFile import DeepReadFile from shared_tools.ExecuteTool import ExecuteTool from shared_tools.FindTools import FindTools from shared_tools.ManageConnections import ManageConnections from shared_tools.SearchTools import SearchTools -__all__ = ["CopyFile", "ExecuteTool", "FindTools", "ManageConnections", "SearchTools"] +__all__ = [ + "CopyFile", + "DeepReadFile", + "ExecuteTool", + "FindTools", + "ManageConnections", + "SearchTools", +] From 440d548e8d235994dafe74f02561ab02fa776349 Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:05:22 +0530 Subject: [PATCH 4/8] Refactor import statements for better readability and organization --- virtual_assistant/virtual_assistant.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/virtual_assistant/virtual_assistant.py b/virtual_assistant/virtual_assistant.py index 487c7ce6..da90c982 100644 --- a/virtual_assistant/virtual_assistant.py +++ b/virtual_assistant/virtual_assistant.py @@ -8,7 +8,14 @@ from dotenv import load_dotenv from config import get_default_model, is_openai_provider -from shared_tools import CopyFile, ExecuteTool, FindTools, ManageConnections, SearchTools +from shared_tools import ( + CopyFile, + DeepReadFile, + ExecuteTool, + FindTools, + ManageConnections, + SearchTools, +) load_dotenv() @@ -33,6 +40,7 @@ def create_virtual_assistant() -> Agent: PersistentShellTool, IPythonInterpreter, CopyFile, + DeepReadFile, ExecuteTool, FindTools, ManageConnections, From 13732a07edc7d496f0af3bc7918ed98e513bec63 Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:11:13 +0530 Subject: [PATCH 5/8] Enable Markdown to PDF/DOCX conversion in Docs Agent --- docs_agent/tools/CreateDocument.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/docs_agent/tools/CreateDocument.py b/docs_agent/tools/CreateDocument.py index 1c7f9e71..69d22ace 100644 --- a/docs_agent/tools/CreateDocument.py +++ b/docs_agent/tools/CreateDocument.py @@ -35,7 +35,8 @@ class CreateDocument(BaseTool): - .source.html file (the canonical source of truth) Markdown workflow creates: - - .md file only (no .docx or .pdf generation) + - .md file + - .source.html file (generated from Markdown to enable PDF/DOCX conversion) HTML is used as the source format because it provides: - Full styling control (fonts, colors, spacing, etc.) @@ -145,6 +146,16 @@ def _create_markdown(self, doc_name, project_dir, markdown_value): return f"Error: Document '{doc_name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it, or choose a different document name." md_path.write_text(markdown_value, encoding="utf-8") + + # Also create a .source.html version from Markdown to enable PDF/DOCX conversion + from markdown_it import MarkdownIt + md_parser = MarkdownIt() + html_body = md_parser.render(markdown_value) + html_full = f"{html_body}" + + source_path = project_dir / f"{doc_name}.source.html" + source_path.write_text(_ensure_ua_reset(html_full), encoding='utf-8') + if not md_path.exists(): return f"Error: Markdown generation failed for document '{doc_name}'." md_size = md_path.stat().st_size @@ -157,10 +168,11 @@ def _create_markdown(self, doc_name, project_dir, markdown_value): Files created: - {md_path.name} ({md_size:,} bytes) [Markdown source] + - {source_path.name} [HTML source for conversion] Path: {md_path} -Note: Markdown workflow only creates a .md file and does not generate .docx or .pdf files.""" +Note: You can now convert this document to PDF or DOCX using the ConvertDocument tool.""" def _build_html_preview_image(html_content: str, base_dir: Path): """Render a preview JPEG of the HTML document. From 405923a09af8cec7d4c6ce5a00b1af20b940d556 Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:18:14 +0530 Subject: [PATCH 6/8] Update dependencies to include markdown-it-py --- pyproject.toml | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6f39c61e..1d00c029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ dependencies = [ "cairosvg", "weasyprint", "html2text", + "markdown-it-py", "playwright", # AI / media generation "google-genai", diff --git a/requirements.txt b/requirements.txt index 77db5c33..ab3850ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,6 +42,7 @@ beautifulsoup4 cairosvg weasyprint html2text +markdown-it-py google-genai opencv-python-headless moviepy<2 From 54c2f2f2dbee7b775bd1a97d52833ce942f584f8 Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:32:07 +0530 Subject: [PATCH 7/8] Fix NameError in Deep Research Agent: import DeepReadFile --- deep_research/deep_research.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deep_research/deep_research.py b/deep_research/deep_research.py index 8cffeb95..00e67717 100644 --- a/deep_research/deep_research.py +++ b/deep_research/deep_research.py @@ -2,6 +2,7 @@ from agency_swarm.tools import WebSearchTool, IPythonInterpreter from openai.types.shared import Reasoning from virtual_assistant.tools.ScholarSearch import ScholarSearch +from shared_tools import DeepReadFile from config import get_default_model, is_openai_provider From 0cef9d83b630c77305022b010011883302756a19 Mon Sep 17 00:00:00 2001 From: ssgamingop Date: Wed, 6 May 2026 17:35:02 +0530 Subject: [PATCH 8/8] Fix Docs Agent bug: respect overwrite flag for both MD and HTML source --- docs_agent/tools/CreateDocument.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs_agent/tools/CreateDocument.py b/docs_agent/tools/CreateDocument.py index 69d22ace..4b41fcbc 100644 --- a/docs_agent/tools/CreateDocument.py +++ b/docs_agent/tools/CreateDocument.py @@ -142,8 +142,14 @@ def run(self): def _create_markdown(self, doc_name, project_dir, markdown_value): md_path = project_dir / f"{doc_name}.md" - if md_path.exists() and not self.overwrite: - return f"Error: Document '{doc_name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it, or choose a different document name." + source_path = project_dir / f"{doc_name}.source.html" + + # Guard: Respect overwrite flag for BOTH files + if not self.overwrite: + if md_path.exists(): + return f"Error: Markdown file '{md_path.name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it." + if source_path.exists(): + return f"Error: HTML source '{source_path.name}' already exists in project '{self.project_name}'. Creating this Markdown document would overwrite it. Use overwrite=True to proceed." md_path.write_text(markdown_value, encoding="utf-8") @@ -153,7 +159,6 @@ def _create_markdown(self, doc_name, project_dir, markdown_value): html_body = md_parser.render(markdown_value) html_full = f"{html_body}" - source_path = project_dir / f"{doc_name}.source.html" source_path.write_text(_ensure_ua_reset(html_full), encoding='utf-8') if not md_path.exists():