From df0212387ebcfc19f289fd2eb5c945730fd97628 Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:05:07 +0530
Subject: [PATCH 1/8] Add DeepReadFile tool to the Deep Research Agent

---
 deep_research/deep_research.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deep_research/deep_research.py b/deep_research/deep_research.py
index dc74c807..8cffeb95 100644
--- a/deep_research/deep_research.py
+++ b/deep_research/deep_research.py
@@ -12,7 +12,7 @@ def create_deep_research() -> Agent:
         description="Comprehensive deep research agent that conducts thorough research on any topic.",
         instructions="./instructions.md",
         files_folder="./files",
-        tools=[WebSearchTool(), ScholarSearch, IPythonInterpreter],
+        tools=[WebSearchTool(), ScholarSearch, IPythonInterpreter, DeepReadFile],
         model=get_default_model(),
         model_settings=ModelSettings(
             reasoning=Reasoning(effort="high", summary="auto") if is_openai_provider() else None,

From 4503159eb4c07754c739b187460f9cf3a99b1026 Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:05:12 +0530
Subject: [PATCH 2/8] Add DeepReadFile tool for reading and converting files to
 Markdown

---
 shared_tools/DeepReadFile.py | 55 ++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 shared_tools/DeepReadFile.py

diff --git a/shared_tools/DeepReadFile.py b/shared_tools/DeepReadFile.py
new file mode 100644
index 00000000..9b55cdfb
--- /dev/null
+++ b/shared_tools/DeepReadFile.py
@@ -0,0 +1,55 @@
+import os
+from typing import Optional
+from agency_swarm.tools import BaseTool
+from pydantic import Field
+from markitdown import MarkItDown
+
+class DeepReadFile(BaseTool):
+    """
+    A tool to read and convert various file formats into Markdown.
+    Supports PDF, DOCX, XLSX, PPTX, HTML, and common image formats (via OCR if configured).
+    
+    Use this tool when you need to understand the content of a non-plain-text file.
+    """
+
+    file_path: str = Field(..., description="The absolute path to the file to read and convert.")
+    
+    def run(self):
+        try:
+            # Ensure the path is absolute
+            abs_path = os.path.abspath(self.file_path)
+            
+            if not os.path.exists(abs_path):
+                return f"Error: File does not exist at {abs_path}"
+            
+            if not os.path.isfile(abs_path):
+                return f"Error: {abs_path} is not a file."
+
+            # Initialize MarkItDown
+            md = MarkItDown()
+            
+            # Convert the file
+            result = md.convert(abs_path)
+            
+            if not result or not result.text_content:
+                return f"Warning: Conversion successful but no text content was extracted from {os.path.basename(abs_path)}."
+            
+            content = result.text_content
+            
+            # Add a header to indicate the source
+            header = f"--- Content of {os.path.basename(abs_path)} ---\n\n"
+            return header + content
+
+        except Exception as e:
+            return f"Error converting file {self.file_path}: {str(e)}"
+
+if __name__ == "__main__":
+    # Test with a simple text file first
+    test_file = "test_deep_read.txt"
+    with open(test_file, "w") as f:
+        f.write("# Test Heading\nThis is a test file for DeepReadFile.")
+    
+    tool = DeepReadFile(file_path=os.path.abspath(test_file))
+    print(tool.run())
+    
+    os.remove(test_file)

From 205654de36bc6a1e3c3aecc51c118f79401e85de Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:05:16 +0530
Subject: [PATCH 3/8] Add DeepReadFile to shared_tools module exports

---
 shared_tools/__init__.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/shared_tools/__init__.py b/shared_tools/__init__.py
index d99576ea..4fc3d58f 100644
--- a/shared_tools/__init__.py
+++ b/shared_tools/__init__.py
@@ -1,7 +1,15 @@
 from shared_tools.CopyFile import CopyFile
+from shared_tools.DeepReadFile import DeepReadFile
 from shared_tools.ExecuteTool import ExecuteTool
 from shared_tools.FindTools import FindTools
 from shared_tools.ManageConnections import ManageConnections
 from shared_tools.SearchTools import SearchTools
 
-__all__ = ["CopyFile", "ExecuteTool", "FindTools", "ManageConnections", "SearchTools"]
+__all__ = [
+    "CopyFile",
+    "DeepReadFile",
+    "ExecuteTool",
+    "FindTools",
+    "ManageConnections",
+    "SearchTools",
+]

From 440d548e8d235994dafe74f02561ab02fa776349 Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:05:22 +0530
Subject: [PATCH 4/8] Refactor import statements for better readability and
 organization

---
 virtual_assistant/virtual_assistant.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/virtual_assistant/virtual_assistant.py b/virtual_assistant/virtual_assistant.py
index 487c7ce6..da90c982 100644
--- a/virtual_assistant/virtual_assistant.py
+++ b/virtual_assistant/virtual_assistant.py
@@ -8,7 +8,14 @@
 from dotenv import load_dotenv
 
 from config import get_default_model, is_openai_provider
-from shared_tools import CopyFile, ExecuteTool, FindTools, ManageConnections, SearchTools
+from shared_tools import (
+    CopyFile,
+    DeepReadFile,
+    ExecuteTool,
+    FindTools,
+    ManageConnections,
+    SearchTools,
+)
 
 load_dotenv()
 
@@ -33,6 +40,7 @@ def create_virtual_assistant() -> Agent:
             PersistentShellTool,
             IPythonInterpreter,
             CopyFile,
+            DeepReadFile,
             ExecuteTool,
             FindTools,
             ManageConnections,

From 13732a07edc7d496f0af3bc7918ed98e513bec63 Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:11:13 +0530
Subject: [PATCH 5/8] Enable Markdown to PDF/DOCX conversion in Docs Agent

---
 docs_agent/tools/CreateDocument.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/docs_agent/tools/CreateDocument.py b/docs_agent/tools/CreateDocument.py
index 1c7f9e71..69d22ace 100644
--- a/docs_agent/tools/CreateDocument.py
+++ b/docs_agent/tools/CreateDocument.py
@@ -35,7 +35,8 @@ class CreateDocument(BaseTool):
     - .source.html file (the canonical source of truth)
 
     Markdown workflow creates:
-    - .md file only (no .docx or .pdf generation)
+    - .md file
+    - .source.html file (generated from Markdown to enable PDF/DOCX conversion)
     
     HTML is used as the source format because it provides:
     - Full styling control (fonts, colors, spacing, etc.)
@@ -145,6 +146,16 @@ def _create_markdown(self, doc_name, project_dir, markdown_value):
             return f"Error: Document '{doc_name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it, or choose a different document name."
 
         md_path.write_text(markdown_value, encoding="utf-8")
+        
+        # Also create a .source.html version from Markdown to enable PDF/DOCX conversion
+        from markdown_it import MarkdownIt
+        md_parser = MarkdownIt()
+        html_body = md_parser.render(markdown_value)
+        html_full = f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"></head><body>{html_body}</body></html>"
+        
+        source_path = project_dir / f"{doc_name}.source.html"
+        source_path.write_text(_ensure_ua_reset(html_full), encoding='utf-8')
+
         if not md_path.exists():
             return f"Error: Markdown generation failed for document '{doc_name}'."
         md_size = md_path.stat().st_size
@@ -157,10 +168,11 @@ def _create_markdown(self, doc_name, project_dir, markdown_value):
 
 Files created:
   - {md_path.name} ({md_size:,} bytes) [Markdown source]
+  - {source_path.name} [HTML source for conversion]
 
 Path: {md_path}
 
-Note: Markdown workflow only creates a .md file and does not generate .docx or .pdf files."""
+Note: You can now convert this document to PDF or DOCX using the ConvertDocument tool."""
 
 def _build_html_preview_image(html_content: str, base_dir: Path):
     """Render a preview JPEG of the HTML document.

From 405923a09af8cec7d4c6ce5a00b1af20b940d556 Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:18:14 +0530
Subject: [PATCH 6/8] Update dependencies to include markdown-it-py

---
 pyproject.toml   | 1 +
 requirements.txt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 6f39c61e..1d00c029 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,6 +47,7 @@ dependencies = [
     "cairosvg",
     "weasyprint",
     "html2text",
+    "markdown-it-py",
     "playwright",
     # AI / media generation
     "google-genai",
diff --git a/requirements.txt b/requirements.txt
index 77db5c33..ab3850ad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,6 +42,7 @@ beautifulsoup4
 cairosvg
 weasyprint
 html2text
+markdown-it-py
 google-genai
 opencv-python-headless
 moviepy<2

From 54c2f2f2dbee7b775bd1a97d52833ce942f584f8 Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:32:07 +0530
Subject: [PATCH 7/8] Fix NameError in Deep Research Agent: import DeepReadFile

---
 deep_research/deep_research.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deep_research/deep_research.py b/deep_research/deep_research.py
index 8cffeb95..00e67717 100644
--- a/deep_research/deep_research.py
+++ b/deep_research/deep_research.py
@@ -2,6 +2,7 @@
 from agency_swarm.tools import WebSearchTool, IPythonInterpreter
 from openai.types.shared import Reasoning
 from virtual_assistant.tools.ScholarSearch import ScholarSearch
+from shared_tools import DeepReadFile
 
 from config import get_default_model, is_openai_provider
 

From 0cef9d83b630c77305022b010011883302756a19 Mon Sep 17 00:00:00 2001
From: ssgamingop <somyajeetsingh15@gmail.com>
Date: Wed, 6 May 2026 17:35:02 +0530
Subject: [PATCH 8/8] Fix Docs Agent bug: respect overwrite flag for both MD
 and HTML source

---
 docs_agent/tools/CreateDocument.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs_agent/tools/CreateDocument.py b/docs_agent/tools/CreateDocument.py
index 69d22ace..4b41fcbc 100644
--- a/docs_agent/tools/CreateDocument.py
+++ b/docs_agent/tools/CreateDocument.py
@@ -142,8 +142,14 @@ def run(self):
 
     def _create_markdown(self, doc_name, project_dir, markdown_value):
         md_path = project_dir / f"{doc_name}.md"
-        if md_path.exists() and not self.overwrite:
-            return f"Error: Document '{doc_name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it, or choose a different document name."
+        source_path = project_dir / f"{doc_name}.source.html"
+        
+        # Guard: Respect overwrite flag for BOTH files
+        if not self.overwrite:
+            if md_path.exists():
+                return f"Error: Markdown file '{md_path.name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it."
+            if source_path.exists():
+                return f"Error: HTML source '{source_path.name}' already exists in project '{self.project_name}'. Creating this Markdown document would overwrite it. Use overwrite=True to proceed."
 
         md_path.write_text(markdown_value, encoding="utf-8")
         
@@ -153,7 +159,6 @@ def _create_markdown(self, doc_name, project_dir, markdown_value):
         html_body = md_parser.render(markdown_value)
         html_full = f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"></head><body>{html_body}</body></html>"
         
-        source_path = project_dir / f"{doc_name}.source.html"
         source_path.write_text(_ensure_ua_reset(html_full), encoding='utf-8')
 
         if not md_path.exists():