-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathKnowledgeProcessor.py
More file actions
103 lines (79 loc) · 3.37 KB
/
KnowledgeProcessor.py
File metadata and controls
103 lines (79 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from typing import Dict
from pathlib import Path
class KnowledgeProcessor:
"""
A class to handle reading and processing knowledge files from the Knowledge folder.
All files are expected to be in .md format.
"""
def __init__(self, knowledge_folder: str = "Knowledge"):
"""
Initialize the KnowledgeProcessor class.
Args:
knowledge_folder (str): Path to the knowledge folder containing .md files
"""
self.knowledge_folder = Path(knowledge_folder)
def read_all_knowledge_files(self) -> Dict[str, str]:
"""
Read all .md files from the Knowledge folder and return their contents.
Returns:
Dict[str, str]: Dictionary with filename as key and file content as value
Raises:
FileNotFoundError: If the Knowledge folder doesn't exist
IOError: If there are issues reading the files
"""
if not self.knowledge_folder.exists():
raise FileNotFoundError(f"Knowledge folder '{self.knowledge_folder}' not found")
knowledge_contents = {}
# Get all .md files in the Knowledge folder
md_files = list(self.knowledge_folder.glob("*.md"))
if not md_files:
return knowledge_contents # Return empty dict if no .md files found
for md_file in md_files:
try:
with open(md_file, 'r', encoding='utf-8') as file:
content = file.read()
knowledge_contents[md_file.name] = content
except IOError as e:
print(f"Warning: Could not read file {md_file.name}: {e}")
continue
return knowledge_contents
def get_knowledge_summary(self) -> Dict[str, any]:
"""
Get a summary of all knowledge files including file count and total content length.
Returns:
Dict[str, any]: Summary information about the knowledge files
"""
knowledge_contents = self.read_all_knowledge_files()
total_files = len(knowledge_contents)
total_content_length = sum(len(content) for content in knowledge_contents.values())
file_names = list(knowledge_contents.keys())
return {
"total_files": total_files,
"total_content_length": total_content_length,
"file_names": file_names,
"contents": knowledge_contents
}
def knowledgeprocessor(self) -> Dict[str, any]:
"""
Convenience method that calls get_knowledge_summary.
Returns:
Dict[str, any]: Knowledge processing summary
"""
return self.get_knowledge_summary()
# Convenience function for direct usage
def get_all_knowledge_contents() -> Dict[str, str]:
"""
Convenience function to get all knowledge file contents.
Returns:
Dict[str, str]: Dictionary with filename as key and file content as value
"""
processor = KnowledgeProcessor()
return processor.read_all_knowledge_files()
def knowledgeprocessor() -> Dict[str, any]:
"""
Convenience function to get knowledge summary.
Returns:
Dict[str, any]: Summary information about the knowledge files
"""
processor = KnowledgeProcessor()
return processor.get_knowledge_summary()