-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathio_handler.py
More file actions
287 lines (234 loc) · 10.2 KB
/
io_handler.py
File metadata and controls
287 lines (234 loc) · 10.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
"""
Input/Output handler for managing LaTeX files and generating diffs.
Handles file I/O and latexdiff generation.
"""
import logging
import subprocess
import os
import shutil
from pathlib import Path
from typing import Optional, Tuple
from datetime import datetime
logger = logging.getLogger(__name__)
class IOHandler:
"""Manages input/output operations for paper polishing workflow."""
def __init__(self, output_dir: str = "output", project_dir: Optional[str] = None):
"""
Initialize IO handler.
Args:
output_dir: Directory for output files
project_dir: Optional path to project folder to copy dependencies
"""
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.session_dir = None
self.project_dir = Path(project_dir) if project_dir else None
logger.info(f"IO handler initialized with output dir: {output_dir}")
def create_session(self, input_file: str) -> Path:
"""
Create a session directory for a paper processing session.
Args:
input_file: Path to input LaTeX file
Returns:
Path to session directory
"""
# Extract paper name from input file
paper_name = Path(input_file).stem
# Create session directory with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
self.session_dir = self.output_dir / f"{paper_name}_{timestamp}"
self.session_dir.mkdir(parents=True, exist_ok=True)
# Copy project dependencies if available
if self.project_dir and self.project_dir.exists():
for item in self.project_dir.iterdir():
if item.is_file() and item.suffix in ['.tex', '.bib', '.sty', '.cls', '.pdf', '.png', '.jpg', '.jpeg']:
# Copy image and resource files
shutil.copy2(item, self.session_dir)
elif item.is_dir() and item.name not in ['__pycache__', '.git', 'output']:
# Copy subdirectories (images, figures, etc.)
shutil.copytree(item, self.session_dir / item.name, dirs_exist_ok=True)
logger.info(f"Copied project dependencies to session directory")
logger.info(f"Created session directory: {self.session_dir}")
return self.session_dir
def load_paper(self, filepath: str) -> str:
"""
Load LaTeX paper from file.
Args:
filepath: Path to .tex file
Returns:
LaTeX content as string
Raises:
FileNotFoundError: If file doesn't exist
IOError: If file cannot be read
"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
logger.info(f"Loaded paper from {filepath} ({len(content)} chars)")
return content
except FileNotFoundError:
logger.error(f"Paper file not found: {filepath}")
raise
except Exception as e:
logger.error(f"Error reading paper file: {str(e)}")
raise IOError(f"Cannot read paper file {filepath}: {str(e)}")
def save_iteration(self, iteration: int, content: str,
feedback: str, original: Optional[str] = None) -> Tuple[Path, Optional[Path]]:
"""
Save paper and feedback for an iteration.
Args:
iteration: Iteration number
content: LaTeX content to save
feedback: Reviewer feedback
original: Original content for diff generation
Returns:
Tuple of (paper_path, diff_path)
"""
if self.session_dir is None:
raise RuntimeError("Session not initialized. Call create_session first.")
# Save paper
paper_path = self.session_dir / f"iteration_{iteration:02d}.tex"
with open(paper_path, 'w', encoding='utf-8') as f:
f.write(content)
logger.info(f"Saved iteration {iteration} paper to {paper_path}")
# Save feedback
feedback_path = self.session_dir / f"feedback_{iteration:02d}.txt"
with open(feedback_path, 'w', encoding='utf-8') as f:
f.write(feedback)
logger.info(f"Saved iteration {iteration} feedback to {feedback_path}")
# Generate latexdiff if original provided
diff_path = None
if original is not None:
diff_path = self._generate_latexdiff(
original, content, iteration
)
return paper_path, diff_path
def _generate_latexdiff(self, original: str, modified: str,
iteration: int) -> Optional[Path]:
"""
Generate latexdiff between original and modified content.
Args:
original: Original LaTeX content
modified: Modified LaTeX content
iteration: Iteration number
Returns:
Path to generated diff file, or None if latexdiff not available
"""
# Save temporary files for latexdiff
orig_file = self.session_dir / f"temp_iter_{iteration:02d}_orig.tex"
mod_file = self.session_dir / f"temp_iter_{iteration:02d}_mod.tex"
diff_file = self.session_dir / f"diff_{iteration:02d}.tex"
try:
# Write temporary files
with open(orig_file, 'w', encoding='utf-8') as f:
f.write(original)
with open(mod_file, 'w', encoding='utf-8') as f:
f.write(modified)
# Check if latexdiff is available
if not self._check_latexdiff_available():
logger.warning("latexdiff not found. Skipping diff generation.")
return None
# Run latexdiff
cmd = [
"latexdiff",
"--flatten",
str(orig_file),
str(mod_file),
]
with open(diff_file, 'w', encoding='utf-8') as f:
result = subprocess.run(
cmd,
stdout=f,
stderr=subprocess.PIPE,
text=True,
timeout=30
)
if result.returncode != 0:
logger.warning(
f"latexdiff failed with return code {result.returncode}: "
f"{result.stderr}"
)
return None
logger.info(f"Generated latexdiff: {diff_file}")
# Clean up temporary files
orig_file.unlink()
mod_file.unlink()
return diff_file
except subprocess.TimeoutExpired:
logger.warning("latexdiff timed out")
return None
except Exception as e:
logger.warning(f"Error generating latexdiff: {str(e)}")
return None
def _check_latexdiff_available(self) -> bool:
"""Check if latexdiff command is available."""
try:
subprocess.run(
["latexdiff", "--version"],
capture_output=True,
timeout=5
)
return True
except (FileNotFoundError, subprocess.TimeoutExpired):
return False
def save_final_results(self, paper: str, metadata: dict) -> Path:
"""
Save final results and metadata.
Args:
paper: Final LaTeX content
metadata: Metadata dict with iteration info
Returns:
Path to final paper
"""
if self.session_dir is None:
raise RuntimeError("Session not initialized")
# Save final paper
final_paper = self.session_dir / "final_paper.tex"
with open(final_paper, 'w', encoding='utf-8') as f:
f.write(paper)
logger.info(f"Saved final paper: {final_paper}")
# Save metadata
import json
metadata_file = self.session_dir / "metadata.json"
with open(metadata_file, 'w', encoding='utf-8') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Saved metadata: {metadata_file}")
# Create summary report
summary_file = self.session_dir / "SUMMARY.md"
with open(summary_file, 'w', encoding='utf-8') as f:
f.write(self._generate_summary_report(metadata))
logger.info(f"Saved summary report: {summary_file}")
return final_paper
def _generate_summary_report(self, metadata: dict) -> str:
"""Generate a summary report of the polishing process."""
report = []
report.append("# Paper Polishing Summary\n")
report.append(f"**Total Iterations:** {metadata.get('total_iterations', 0)}\n")
report.append(f"**Status:** {metadata.get('status', 'Unknown')}\n")
if metadata.get('status') == 'ACCEPTED':
report.append(
f"**Decision Made At:** Iteration {metadata.get('accepted_at_iteration', 'Unknown')}\n"
)
report.append("\n## Iteration Details\n")
iterations = metadata.get('iterations', [])
for i, iter_data in enumerate(iterations, 1):
report.append(f"\n### Iteration {i}\n")
report.append(f"- **Accepted:** {iter_data.get('accepted', False)}\n")
report.append(f"- **Feedback:** {iter_data.get('feedback', '')[:200]}...\n")
return "".join(report)
def list_iterations(self) -> list:
"""
List all saved iterations in current session.
Returns:
List of iteration numbers
"""
if self.session_dir is None:
return []
iterations = []
for f in self.session_dir.glob("iteration_*.tex"):
try:
num = int(f.stem.split('_')[1])
iterations.append(num)
except (ValueError, IndexError):
continue
return sorted(iterations)