pip install mcp-documents-readerfrom mcp_documents_reader import DocumentReaderFactory
# Get reader for a specific file type
reader = DocumentReaderFactory.get_reader("document.docx")
content = reader.read("/path/to/document.docx")
print(content)Add to your MCP configuration file:
{
"mcpServers": {
"mcp-document-reader": {
"command": "uvx",
"args": ["mcp-documents-reader"]
}
}
}{
"mcpServers": {
"mcp-document-reader": {
"command": "uvx",
"args": ["mcp-documents-reader"]
}
}
}Add the MCP server configuration in Cherry Studio settings with the same configuration as above.
from mcp_documents_reader import (
DocumentReaderFactory,
DocxReader,
PdfReader,
ExcelReader,
TxtReader,
)
# Using factory (recommended)
reader = DocumentReaderFactory.get_reader("document.pdf")
content = reader.read("/path/to/document.pdf")
# Using specific reader directly
docx_reader = DocxReader()
content = docx_reader.read("/path/to/document.docx")
# Check if format is supported
if DocumentReaderFactory.is_supported("file.xlsx"):
reader = DocumentReaderFactory.get_reader("file.xlsx")
content = reader.read("/path/to/file.xlsx")from mcp_documents_reader import DocumentReaderFactory
try:
reader = DocumentReaderFactory.get_reader("document.docx")
content = reader.read("/path/to/document.docx")
except ValueError as e:
print(f"Unsupported format: {e}")
except FileNotFoundError:
print("File not found")
except Exception as e:
print(f"Error reading file: {e}")from pathlib import Path
from mcp_documents_reader import DocumentReaderFactory
# Read all supported documents in a directory
for file_path in Path("documents").iterdir():
if DocumentReaderFactory.is_supported(file_path.name):
reader = DocumentReaderFactory.get_reader(file_path.name)
content = reader.read(str(file_path))
print(f"Read {file_path}: {len(content)} characters")A: The reader supports 4 document formats:
- Excel: .xlsx, .xls
- DOCX: .docx
- PDF: .pdf
- Text: .txt
A: The TxtReader automatically detects encoding using chardet. It supports UTF-8, GBK, and other common encodings.
from mcp_documents_reader import TxtReader
reader = TxtReader()
# Automatically detects encoding
content = reader.read("chinese_text.txt")A: No, the reader does not support password-protected files. You need to remove the password protection first.
A: Yes, the reader supports Windows, macOS, and Linux.
A: The reader will raise an exception when reading corrupted files. Handle it with try-except:
try:
content = reader.read("potentially_corrupted.docx")
except Exception as e:
print(f"Failed to read file: {e}")A: No, the reader only supports local file paths. Download the file first if you need to read from a URL.
A: There's no hard limit, but very large files may cause memory issues. For large Excel files, consider reading specific sheets only.