Skip to content
Open
3 changes: 2 additions & 1 deletion deep_research/deep_research.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from agency_swarm.tools import WebSearchTool, IPythonInterpreter
from openai.types.shared import Reasoning
from virtual_assistant.tools.ScholarSearch import ScholarSearch
from shared_tools import DeepReadFile

from config import get_default_model, is_openai_provider

Expand All @@ -12,7 +13,7 @@ def create_deep_research() -> Agent:
description="Comprehensive deep research agent that conducts thorough research on any topic.",
instructions="./instructions.md",
files_folder="./files",
tools=[WebSearchTool(), ScholarSearch, IPythonInterpreter],
tools=[WebSearchTool(), ScholarSearch, IPythonInterpreter, DeepReadFile],
Comment thread
ssgamingop marked this conversation as resolved.
model=get_default_model(),
model_settings=ModelSettings(
reasoning=Reasoning(effort="high", summary="auto") if is_openai_provider() else None,
Expand Down
25 changes: 21 additions & 4 deletions docs_agent/tools/CreateDocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class CreateDocument(BaseTool):
- .source.html file (the canonical source of truth)

Markdown workflow creates:
- .md file only (no .docx or .pdf generation)
- .md file
- .source.html file (generated from Markdown to enable PDF/DOCX conversion)

HTML is used as the source format because it provides:
- Full styling control (fonts, colors, spacing, etc.)
Expand Down Expand Up @@ -141,10 +142,25 @@ def run(self):

def _create_markdown(self, doc_name, project_dir, markdown_value):
md_path = project_dir / f"{doc_name}.md"
if md_path.exists() and not self.overwrite:
return f"Error: Document '{doc_name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it, or choose a different document name."
source_path = project_dir / f"{doc_name}.source.html"

# Guard: Respect overwrite flag for BOTH files
if not self.overwrite:
if md_path.exists():
return f"Error: Markdown file '{md_path.name}' already exists in project '{self.project_name}'. Use overwrite=True to replace it."
if source_path.exists():
return f"Error: HTML source '{source_path.name}' already exists in project '{self.project_name}'. Creating this Markdown document would overwrite it. Use overwrite=True to proceed."

md_path.write_text(markdown_value, encoding="utf-8")

# Also create a .source.html version from Markdown to enable PDF/DOCX conversion
from markdown_it import MarkdownIt
md_parser = MarkdownIt()
html_body = md_parser.render(markdown_value)
html_full = f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"></head><body>{html_body}</body></html>"

source_path.write_text(_ensure_ua_reset(html_full), encoding='utf-8')

if not md_path.exists():
return f"Error: Markdown generation failed for document '{doc_name}'."
md_size = md_path.stat().st_size
Expand All @@ -157,10 +173,11 @@ def _create_markdown(self, doc_name, project_dir, markdown_value):

Files created:
- {md_path.name} ({md_size:,} bytes) [Markdown source]
- {source_path.name} [HTML source for conversion]

Path: {md_path}

Note: Markdown workflow only creates a .md file and does not generate .docx or .pdf files."""
Note: You can now convert this document to PDF or DOCX using the ConvertDocument tool."""

def _build_html_preview_image(html_content: str, base_dir: Path):
"""Render a preview JPEG of the HTML document.
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ dependencies = [
"cairosvg",
"weasyprint",
"html2text",
"markdown-it-py",
"playwright",
# AI / media generation
"google-genai",
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ beautifulsoup4
cairosvg
weasyprint
html2text
markdown-it-py
google-genai
opencv-python-headless
moviepy<2
Expand Down
55 changes: 55 additions & 0 deletions shared_tools/DeepReadFile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import os
from typing import Optional
from agency_swarm.tools import BaseTool
from pydantic import Field
from markitdown import MarkItDown

class DeepReadFile(BaseTool):
"""
A tool to read and convert various file formats into Markdown.
Supports PDF, DOCX, XLSX, PPTX, HTML, and common image formats (via OCR if configured).

Use this tool when you need to understand the content of a non-plain-text file.
"""

file_path: str = Field(..., description="The absolute path to the file to read and convert.")

def run(self):
try:
# Ensure the path is absolute
abs_path = os.path.abspath(self.file_path)

if not os.path.exists(abs_path):
return f"Error: File does not exist at {abs_path}"

if not os.path.isfile(abs_path):
return f"Error: {abs_path} is not a file."

# Initialize MarkItDown
md = MarkItDown()

# Convert the file
result = md.convert(abs_path)

if not result or not result.text_content:
return f"Warning: Conversion successful but no text content was extracted from {os.path.basename(abs_path)}."

content = result.text_content

# Add a header to indicate the source
header = f"--- Content of {os.path.basename(abs_path)} ---\n\n"
return header + content

except Exception as e:
return f"Error converting file {self.file_path}: {str(e)}"

if __name__ == "__main__":
# Test with a simple text file first
test_file = "test_deep_read.txt"
with open(test_file, "w") as f:
f.write("# Test Heading\nThis is a test file for DeepReadFile.")

tool = DeepReadFile(file_path=os.path.abspath(test_file))
print(tool.run())

os.remove(test_file)
10 changes: 9 additions & 1 deletion shared_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from shared_tools.CopyFile import CopyFile
from shared_tools.DeepReadFile import DeepReadFile
from shared_tools.ExecuteTool import ExecuteTool
from shared_tools.FindTools import FindTools
from shared_tools.ManageConnections import ManageConnections
from shared_tools.SearchTools import SearchTools

__all__ = ["CopyFile", "ExecuteTool", "FindTools", "ManageConnections", "SearchTools"]
__all__ = [
"CopyFile",
"DeepReadFile",
"ExecuteTool",
"FindTools",
"ManageConnections",
"SearchTools",
]
10 changes: 9 additions & 1 deletion virtual_assistant/virtual_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@
from dotenv import load_dotenv

from config import get_default_model, is_openai_provider
from shared_tools import CopyFile, ExecuteTool, FindTools, ManageConnections, SearchTools
from shared_tools import (
CopyFile,
DeepReadFile,
ExecuteTool,
FindTools,
ManageConnections,
SearchTools,
)

load_dotenv()

Expand All @@ -33,6 +40,7 @@ def create_virtual_assistant() -> Agent:
PersistentShellTool,
IPythonInterpreter,
CopyFile,
DeepReadFile,
ExecuteTool,
FindTools,
ManageConnections,
Expand Down