-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfile_processor.py
More file actions
103 lines (82 loc) · 4.68 KB
/
file_processor.py
File metadata and controls
103 lines (82 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import markitdown
from openai import OpenAI
import os
import re
class FileProcessor:
def __init__(self, settings, ai_service):
self.settings = settings
self.ai_service = ai_service
self.markitdown_excluded_extensions = [".md"]
def extract_content(self, file_path):
"""Extract the content of the file using MarkItDown"""
# Get file extension
file_extension = os.path.splitext(file_path)[1]
# Special case for MarkItDown
if file_extension in self.markitdown_excluded_extensions:
if file_extension == ".md":
with open(file_path, "r") as f:
file_content = f.read()
print(f"\n\n\n-----------------\n\n\n# FileProcessor extract_content Special Case Response:\n\n{file_content}")
return True, file_content
# Extract content using MarkItDown
else:
# Get the LLM provider to use provider-specific settings
llm_provider = self.settings.get('llm_provider')
print(f"\n\n\n-----------------\n\n\n# FileProcessor extract_content LLM Provider:\n\n{llm_provider}")
client = OpenAI(
api_key=self.settings.get(f'{llm_provider}_api_key'),
base_url=self.settings.get(f'{llm_provider}_api_base_url')
)
try:
md = markitdown.MarkItDown(llm_client=client, llm_model=self.settings.get(f'{llm_provider}_model'))
result = md.convert(file_path)
print(f"\n\n\n-----------------\n\n\n# FileProcessor extract_content MarkItDown Response:\n\n{result.text_content}")
return True, result.text_content
# Handle unsupported format error from MarkItDown
except markitdown.UnsupportedFormatException as e:
print(f"\n\n\n-----------------\n\n\n# FileProcessor extract_content MarkItDown Error:\n\n{str(e)}")
return False, f"Unsupported file format: {str(e)}"
# Handle empty file error from MarkItDown
except ValueError as e:
if "Input was empty" in str(e):
print(f"\n\n\n-----------------\n\n\n# FileProcessor extract_content MarkItDown Error:\n\n{str(e)}")
return True, "Blank file"
return False, f"Unsupported file format: {str(e)}"
except Exception as e:
print(f"\n\n\n-----------------\n\n\n# FileProcessor extract_content MarkItDown Error:\n\n{str(e)}")
return False, f"Error extracting file content: {str(e)}"
async def rename_file(self, file_path):
"""Process the file by calling AIService and rename the file"""
# Extract file content asynchronously
success, file_content = self.extract_content(file_path)
if not success:
return False, file_content # Return the error message if extraction failed
# Get original file extension
file_extension = os.path.splitext(file_path)[1]
# Get file name suggestion or error message from AIService
success, suggestion = await self.ai_service.get_suggestion(file_content, file_extension)
if not success:
return False, suggestion # Return the error message if AI service call failed
# Rename the file
try:
# Remove invalid characters from the suggested name
invalid_chars = r'[<>:"/\\|?*]' # Common invalid characters in file names
sanitized_name = re.sub(invalid_chars, '', suggestion) # Remove invalid characters from suggested name
# Get new file path with the original extension
directory = os.path.dirname(file_path)
new_file_name = f"{sanitized_name}{file_extension}" # Append original extension
new_file_path = os.path.join(directory, new_file_name)
# Check if target file already exists
if os.path.exists(new_file_path):
base, ext = os.path.splitext(new_file_path)
counter = 1
while os.path.exists(f"{base}_{counter}{ext}"):
counter += 1
new_file_path = f"{base}_{counter}{ext}"
# Rename the file
os.rename(file_path, new_file_path)
print(f"\n\n\n-----------------\n\n\n# FileProcessor process_file New File Path:\n\n{(os.path.basename(new_file_path))}")
return True, os.path.basename(new_file_path)
except Exception as e:
print(f"\n\n\n-----------------\n\n\n# FileProcessor process_file Error:\n\n{str(e)}")
return False, str(e)