Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
17029f0
November update WIP
rsatrioadi Nov 27, 2024
92edc8e
Fix some algorithms
rsatrioadi Nov 27, 2024
86be259
Add error handling
rsatrioadi Nov 28, 2024
abcea4b
Describe package interactions
rsatrioadi Dec 16, 2024
21bc9aa
Remove leftover break
rsatrioadi Dec 16, 2024
4310148
Update graph lib
rsatrioadi Jan 15, 2025
8f3bd4e
Client feedback fix
rsatrioadi Jan 29, 2025
03c6d3a
Parameterize layer name and descriptions
rsatrioadi Feb 3, 2025
9af24af
Update config example
rsatrioadi Feb 3, 2025
fec7e1b
Use tool calling mode to constrain response format
rsatrioadi Feb 18, 2025
7948bad
Update template
rsatrioadi Feb 18, 2025
d271a27
Edge access bug fix
rsatrioadi Feb 19, 2025
7ec48e7
Write additional nodes and edges to jsonl output
rsatrioadi Feb 20, 2025
203de2e
Bug fix
rsatrioadi Feb 21, 2025
e78afc8
Refine code, WIP
rsatrioadi Feb 28, 2025
37a208c
Refactor
rsatrioadi Feb 28, 2025
83c3e3e
Refactor
rsatrioadi Feb 28, 2025
1d5e5b3
Restructure and migrate to ontology v2
rsatrioadi May 21, 2025
e6ec776
Restructure WIP
rsatrioadi May 21, 2025
758b29d
Fix dependency profile bug
rsatrioadi May 21, 2025
948d838
Restructure WIP ...
rsatrioadi May 22, 2025
d061b4d
Minor improvements
rsatrioadi May 28, 2025
a8d1800
Generalise classification (Codex)
Mar 1, 2026
44d6e4f
Change hasParameter to parameterizes (SABO-2)
Mar 1, 2026
6a4e4f9
Add optional SecDFD classification
Mar 9, 2026
c290a3d
Merge pull request #1 from rsatrioadi/codex-refactor
rsatrioadi Mar 9, 2026
a6b90e9
Add pydantic>=2.0 and bump openai to >=1.50
Apr 28, 2026
31d9edf
Replace hand-maintained JSON schema dicts with Pydantic v2 models
Apr 28, 2026
d52d99e
Switch LLM client to structured outputs with retry
Apr 28, 2026
65f0fd1
Add resumable checkpoint support
Apr 28, 2026
8dd08ab
Replace topo-sort loop with futures DAG executor; add override/siblin…
Apr 28, 2026
a03449f
Wire max_workers, resume, and ancestor map into LLMFilter
Apr 28, 2026
fe7e85e
Add Stereocode method and class stereotype classification
Apr 28, 2026
4af4ecf
Merge pull request #2 from rsatrioadi/codex-refactor
rsatrioadi Apr 28, 2026
c200ac5
Add setup.py
May 6, 2026
961bc6a
Expand config example
May 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.jsonl
*.log
*.ini
.*
!.gitignore
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ The `command` argument can be one of the following:
- `description`: A one-sentence summary of *packages*, *classes*, and *methods*/*constructors*.
- `roleStereotype`: A classification of *classes* into one of [Wirfs-Brock's role stereotypes](https://wirfs-brock.com/PDFs/Characterizing%20Classes.pdf).
- `layer`: A classification of *packages*, *classes*, and *methods*/*constructors* into architectural layers.
- `secdfdTypes` (optional, enable via `[secdfd]` config): Multi-label SecDFD classification for v2 `Type`, `Operation`, and `Variable` nodes.

Currently, this command adds all the properties above, i.e., there is no way to select only one or two properties to add to the graph.

Expand Down
18 changes: 17 additions & 1 deletion arcana/__main__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import argparse
import configparser
import time
import json
import logging
import sys

from arcana.filters import CLISeeder, MetricsFilter, LLMFilter, MergeFilter
from arcana.checkpoint import writer
from arcana.llm_filter.filter import LLMFilter
from arcana.merge_filter import MergeFilter
from arcana.metrics import MetricsFilter
from arcana.seeder import CLISeeder
from arcanalib.graph import Graph
from arcanalib.pipefilter import Pipeline

Expand Down Expand Up @@ -55,6 +61,16 @@ def main():
commands = args.command.split('-')

if commands:
current_time_str = time.strftime("%Y%m%d-%H%M%S")
jsonl_file = f'arcana-{current_time_str}.jsonl'
w = writer(jsonl_file)
log_file = f'arcana-{current_time_str}.log'
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s %(name)s %(levelname)s: %(message)s",
filename=log_file,
filemode="a", # append
)
pipeline = Pipeline(*[
filters[command](config)
for command in commands
Expand Down
101 changes: 101 additions & 0 deletions arcana/checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import json
import logging
from threading import Lock

from arcana.custom_encoder import CustomJSONEncoder

logger = logging.getLogger(__name__)


class JSONLWriter:
_instance = None
_lock = Lock()

def __new__(cls, path: str, append: bool = False):
with cls._lock:
if cls._instance is None:
inst = super().__new__(cls)
mode = "a" if append else "w"
inst._file = open(path, mode, buffering=1)
cls._instance = inst
return cls._instance

def write(self, data: dict):
with self._lock:
self._file.write(json.dumps(data, cls=CustomJSONEncoder) + "\n")

def flush(self):
with self._lock:
try:
self._file.flush()
except Exception:
pass


_writer_path: str = "checkpoints.jsonl"
_writer_append: bool = False


def configure_writer(path: str, append: bool = False):
"""Call once before the first writer() use to set path and open mode."""
global _writer_path, _writer_append
_writer_path = path
_writer_append = append


def writer(path=None):
effective = path or _writer_path
return JSONLWriter(effective, _writer_append)


# ---------------------------------------------------------------------------
# Checkpoint loading (for resume)
# ---------------------------------------------------------------------------

def load_checkpoint(path: str, graph):
"""Load a JSONL checkpoint file into *graph*, applying node properties and edges.

Handles both node entries {"data": {"id": ..., "labels": [...], "properties": {...}}}
and edge entries {"data": {"id": ..., "source": ..., "target": ..., "label": ..., "properties": {...}}}.
"""
loaded_nodes = 0
loaded_edges = 0
try:
with open(path, encoding="utf-8") as f:
for lineno, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
try:
entry = json.loads(line).get('data', {})
except json.JSONDecodeError as e:
logger.warning("Skipping malformed checkpoint line %d: %s", lineno, e)
continue

eid = entry.get('id')
if not eid:
continue

if 'source' in entry and 'target' in entry and 'label' in entry:
# Edge entry
graph.add_edge(
entry['source'],
entry['target'],
entry['label'],
**entry.get('properties', {}),
)
loaded_edges += 1
else:
# Node entry
props = entry.get('properties', {})
if eid in graph.nodes:
graph.nodes[eid].properties.update(props)
else:
graph.add_node(eid, *entry.get('labels', []), **props)
loaded_nodes += 1

except FileNotFoundError:
logger.warning("Checkpoint file not found: %s", path)
return

logger.info("Loaded checkpoint %s: %d nodes, %d edges.", path, loaded_nodes, loaded_edges)
9 changes: 9 additions & 0 deletions arcana/custom_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import json

class CustomJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, set):
# Convert set to list
return list(obj)
# Call the default method for other types
return super().default(obj)
Loading