-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathclient_utils.py
More file actions
481 lines (395 loc) · 16.7 KB
/
client_utils.py
File metadata and controls
481 lines (395 loc) · 16.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
# search_logs.py
import json
import os
from typing import Dict, Any, List, Set, Tuple
from collections import deque
import re
import html
from datetime import datetime
from graphviz import Source
from server_utils import TOOLS_LOGFILE, CODES_LOGFILE
from utils import DataCache, OUTPUT
SUBGRAPH_PNG = str(OUTPUT / "subgraph.png")
def store_conversation(debug_text: str = ''):
with (open(OUTPUT / "conversation.txt", 'w') as f):
f.write(f"=== {str(datetime.now())} ===\n")
f.write(debug_text)
f.flush()
def restart(logname: str = TOOLS_LOGFILE):
DataCache.reset()
# write to "tools.log"
with open(logname, "w") as f:
f.write("")
def load_recent_code_logs(
max_items: int = 5,
filename: str = CODES_LOGFILE
) -> List[dict]:
"""Load up to max_items most recent JSON code-log objects."""
if not filename or not os.path.exists(filename):
return []
records: List[dict] = []
with open(filename, "r", encoding="utf-8") as f:
for line in f:
row = line.strip()
if not row:
continue
try:
obj = json.loads(row)
except json.JSONDecodeError:
# Skip legacy/non-JSON entries.
continue
if isinstance(obj, dict) and "date" in obj and "code_str" in obj:
records.append(obj)
records = sorted(records, key=lambda x: x.get("date", ""), reverse=True)
return records[:max_items]
def load_objects(filename: str = TOOLS_LOGFILE) -> Dict[str, dict]:
"""
Read a file containing multiple JSON objects (concatenated, possibly spanning
multiple lines), keep only those that have an "output" dict with a
"results_panel_id" key, and return a dict keyed by that results_panel_id.
The file format is assumed to be:
{ ... }{ ... }{ ... }
or with arbitrary whitespace/newlines between/inside objects, e.g.:
{
"output": {
"results_panel_id": "A",
...
}
}
{
"output": {
"results_panel_id": "B",
...
}
}
This function does *not* require one JSON object per line.
Parameters
----------
filename : str
Path to the input file containing concatenated JSON objects.
Returns
-------
objects_by_id : dict[str, Any]
A dictionary mapping each results_panel_id (string) to its corresponding
parsed JSON object (a Python dict).
"""
objects_by_id: Dict[str, Any] = {}
with open(filename, "r", encoding="utf-8") as f:
buffer_chars: list[str] = []
depth = 0
in_string = False
escape = False
def try_process_buffer():
"""Parse the current buffer as JSON and, if valid, filter & store."""
nonlocal buffer_chars, objects_by_id
raw = "".join(buffer_chars).strip()
buffer_chars = [] # reset buffer
if not raw:
return
try:
obj = json.loads(raw)
except json.JSONDecodeError:
# Invalid JSON blob; skip silently or log if desired
return
# Filter: keep only if obj["output"]["results_panel_id"] exists
if isinstance(obj, dict):
output = obj.get("output")
if isinstance(output, dict):
results_panel_id = output.get("results_panel_id")
if isinstance(results_panel_id, str) and results_panel_id:
objects_by_id[results_panel_id] = obj
for line in f:
for ch in line:
# Always accumulate the character *after* we’ve decided if we’re
# starting a new object, etc.
if depth == 0:
# Ignore characters until we see a top-level '{'
if ch.isspace():
continue
if ch != "{":
# Non-whitespace and not '{' outside an object → skip
continue
# Starting a new object
buffer_chars = ["{"]
depth = 1
in_string = False
escape = False
continue
# We are inside a JSON object: accumulate
buffer_chars.append(ch)
# Handle string/escape state machine for correct brace tracking
if escape:
# Current char is escaped; just consume it
escape = False
continue
if ch == "\\":
# Next char will be escaped if we're in a string
if in_string:
escape = True
continue
if ch == '"':
# Toggle string state
in_string = not in_string
continue
# Only track braces when NOT inside a string
if not in_string:
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
# If depth hits 0, we've closed the top-level object
if depth == 0:
try_process_buffer()
# In case file ends while still holding a complete object at depth 0
if depth == 0 and buffer_chars:
try_process_buffer()
return objects_by_id
def traverse_links(objects: Dict[str, dict], start_id: List[str] = None) -> List[str]:
"""
Breadth-first traversal over linked panel objects starting from multiple roots.
Enhancements:
1) Accept a list of start_ids. Traversal is global: nodes visited from earlier
start_ids are not re-visited.
2) If no start_ids are given, compute all root nodes automatically. A root
node is defined as a node that never appears as a panel_id in ANY input
field containing the substring 'panel_id'.
3) Visit each node at most once. Uses BFS order.
"""
# ------------------------------------------------------------------
# Helper: extract neighbors (panel links)
# ------------------------------------------------------------------
def find_neighbors(obj: dict) -> List[str]:
"""Return all linked panel IDs (strings) via any '*panel_id*' key."""
nbrs = []
inp = obj.get("input", {})
for key, val in inp.items():
if "panel_id" in key and val != "":
if isinstance(val, str):
nbrs.append(val)
elif isinstance(val, list):
nbrs.extend([v for v in val if isinstance(v, str)])
return nbrs
# ------------------------------------------------------------------
# If start_id is not provided, discover all root nodes
# ------------------------------------------------------------------
if not start_id:
# Collect all nodes that appear as panel targets
pointed_to: Set[str] = set()
for obj in objects.values():
for pid in find_neighbors(obj):
pointed_to.add(pid)
# Roots = all nodes not pointed to
start_id = [nid for nid in objects.keys() if nid not in pointed_to]
# Convert to list if single value was passed
if isinstance(start_id, str):
start_id = [start_id]
# ------------------------------------------------------------------
# BFS traversal from multiple starting nodes
# ------------------------------------------------------------------
visited: Set[str] = set()
order: List[str] = []
queue: deque[Tuple[str, int]] = deque()
# Seed queue with all starting nodes at depth 0
for sid in start_id:
queue.append((sid, 0))
# ------------------------------------------------------------------
# BFS main loop
# ------------------------------------------------------------------
while queue:
current, depth = queue.popleft()
if current in visited or current not in objects:
continue
visited.add(current)
obj = objects[current]
indent = " " * depth
node = f"{indent}{current:5s}"
input_items = obj.get("input", {}).items()
output_items = obj.get("output", {}).items()
input_args = {k: v for k, v in input_items if "panel_id" not in k}
node += f" = {obj.get('tool', '')}{str(input_args)}"
output_args = {k: v for k, v in output_items if k in ["nlevels", "rows"]}
if output_args:
node += f" -> {str(output_args)}"
panel_args = [
i
for j in [
v if isinstance(v, list)
else [(v if isinstance(v, str) else f"{(v)}")]
for k, v in input_items
if "panel_id" in k and v is not None and v != ""
]
for i in j
]
for panel_arg in panel_args:
node += f"\n{indent}{' ' * 8}...{panel_arg}"
order.append(node + "\n")
# --------------------- add neighbors (BFS) ------------------------
for nxt in panel_args:
if isinstance(nxt, str) and nxt not in visited:
queue.append((nxt, depth + 1))
return order
def generate_graphviz(objects: Dict[str, dict], start_node: str = None) -> str:
"""
Generate a Graphviz DOT representation of the panel dependency graph.
Nodes use HTML-like <table> labels showing:
- panel_id (title row)
- tool name
- all input arguments EXCEPT those containing 'panel_id'
Edges:
- have no label for "panel_id"
- have a label for any other *panel_id field*, e.g. "other_panel_id"
"""
# ----------------------------------------------------------------------
# Helper: extract neighbors with field names
# ----------------------------------------------------------------------
def find_neighbors_with_fields(obj: dict) -> List[Tuple[str, str]]:
"""Return list of (target_panel_id, fieldname)."""
neighbors = []
inp = obj.get("input", {})
for key, val in inp.items():
if "panel_id" in key and val is not None and val != "":
if isinstance(val, str):
neighbors.append((val, key))
elif isinstance(val, list):
neighbors.extend((v, key) for v in val if isinstance(v, str))
return neighbors
# ----------------------------------------------------------------------
# Determine set of nodes to include
# ----------------------------------------------------------------------
if not start_node:
nodes_to_include = set(objects.keys())
else:
visited: Set[str] = set()
stack = [start_node] if isinstance(start_node, str) else start_node
while stack:
curr = stack.pop()
if curr in visited or curr not in objects:
continue
visited.add(curr)
for nxt, _field in find_neighbors_with_fields(objects[curr]):
if nxt not in visited:
stack.append(nxt)
nodes_to_include = visited
# ----------------------------------------------------------------------
# Begin DOT
# ----------------------------------------------------------------------
lines = ['digraph PanelGraph {']
lines.append(' rankdir=TB;') # LR for left-to-right
lines.append(' node [shape=plaintext, fontsize=10];') # HTML table nodes
# ----------------------------------------------------------------------
# Emit nodes as HTML <table>
# ----------------------------------------------------------------------
for node in sorted(nodes_to_include):
obj = objects.get(node, {})
input_args = obj.get("input", {})
tool = obj.get("tool", "")
output_args = obj.get("output", {})
output_suffix = ""
if 'nlevels' in output_args:
output_suffix += f" [{output_args['nlevels']}]"
if 'rows' in output_args:
output_suffix += f" ({output_args['rows']:,})"
# Sanitize for DOT object name
safe_node = re.sub(r"[^a-zA-Z0-9_]", "_", node)
# Escape HTML chars in values
safe_tool = html.escape(str(tool).replace('Panel_', ''))
# Build table rows
rows = []
# Title row = panel ID
rows.append(
f'<tr><td colspan="2" bgcolor="#D0D0FF"><b>{html.escape(node)}</b>{output_suffix}</td></tr>'
)
# Tool row
rows.append(
f'<tr><td align="left">tool</td><td align="left"><b>{safe_tool}</b></td></tr>'
)
# ---------------------------------------------------------
# Detect *missing* panel links from ANY input key containing "panel_id"
# ---------------------------------------------------------
missing_links = []
for key, value in input_args.items():
if "panel_id" not in key or value is None or value == "":
continue
# value may be a string or list of strings
panel_ids = []
if isinstance(value, list):
panel_ids.extend([v for v in value if isinstance(v, str)])
else:
panel_ids.append(str(value))
# Check each target against known nodes
for pid in panel_ids:
if pid not in nodes_to_include:
missing_links.append((key, pid))
# Add missing-link rows (one per missing reference)
for key, pid in missing_links:
if key == "panel_id":
safe_key = html.escape(key)
color = "red"
else:
safe_key = html.escape(key.replace("_panel_id", ""))
color = "blue"
rows.append(
f'<tr><td align="left"><font color="{color}">{safe_key}</font></td>'
f'<td align="left"><font color="{color}">{html.escape(pid)}</font></td></tr>'
)
# --------------------------------------------
# Add all non-panel_id input args normally
# --------------------------------------------
for key, val in input_args.items():
if "panel_id" in key:
continue # skip here, handled above
rows.append(
f'<tr><td align="left"><font color="darkgreen">{html.escape(key)}</font></td>'
f'<td align="left"><font color="darkgreen">{html.escape(str(val))}</font></td></tr>'
)
# Make the HTML table
table = (
f'<<table border="1" cellborder="0" cellspacing="0" cellpadding="4">'
f'{"".join(rows)}'
f'</table>>'
)
lines.append(f' "{safe_node}" [label={table}];')
# ----------------------------------------------------------------------
# Emit edges with selective labels
# ----------------------------------------------------------------------
for node in sorted(nodes_to_include):
if node not in objects:
continue
obj = objects[node]
safe_src = re.sub(r"[^a-zA-Z0-9_]", "_", node)
for neighbor, field_name in find_neighbors_with_fields(obj):
if neighbor not in nodes_to_include:
continue
safe_dst = re.sub(r"[^a-zA-Z0-9_]", "_", neighbor)
# Standard panel_id → red arrow
if field_name == "panel_id":
lines.append(
f' "{safe_src}" -> "{safe_dst}" '
f'[dir=back, color="red", fontcolor="red"];'
)
else:
# Nonstandard panel_id field → blue arrow & blue label
safe_field = html.escape(field_name.replace("_panel_id", ""))
lines.append(
f' "{safe_src}" -> "{safe_dst}" '
f'[dir=back, color="blue", fontcolor="blue", label="{safe_field}"];'
)
lines.append("}")
return "\n".join(lines)
# ---------------- EXAMPLE USAGE ----------------
def generate_dot(objects, start_key):
path = traverse_links(objects, start_key)
# print("".join(path[::-1])) # Print in visit order
dot = generate_graphviz(objects, start_node=start_key)
with open(OUTPUT / "subgraph.dot", "w") as f:
f.write(dot)
src = Source(dot)
src.format = "png"
src.render(str(OUTPUT / "subgraph"), cleanup=True)
return SUBGRAPH_PNG
if __name__ == "__main__":
objects = load_objects()
print(f"Loaded {len(objects)} valid objects with results_panel_id keys.")
# Example traversal from a known starting ID
start_key = input("Enter ending results_panel_id (including leading undescore) for traversal: ").strip()
print(generate_dot(objects, start_key))