From b765afeb177920957b5b098a1db006e8044d4c0b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 04:35:41 +0000
Subject: [PATCH 1/9] Initial plan


From 8c67136a85940f4cb1ef0f1b1b365814827bd0f2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 04:43:37 +0000
Subject: [PATCH 2/9] Complete migration to pyproject.toml and refactor code
 structure

Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
 pyproject.toml                             |   54 +
 setlr/__init__.py                          | 1034 ++------------------
 setlr/core.py                              | 1027 +++++++++++++++++++
 tests/setlr_test/test_api_compatibility.py |   98 ++
 tests/setlr_test/test_error_messages.py    |   14 +-
 5 files changed, 1272 insertions(+), 955 deletions(-)
 create mode 100644 pyproject.toml
 create mode 100644 setlr/core.py
 create mode 100644 tests/setlr_test/test_api_compatibility.py

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..6facde3
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,54 @@
+[build-system]
+requires = ["setuptools>=68.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "setlr"
+version = "1.0.1"
+description = "setlr is a tool for Semantic Extraction, Transformation, and Loading."
+readme = "README.md"
+license = {text = "Apache License 2.0"}
+authors = [
+    {name = "Jamie McCusker", email = "mccusj@cs.rpi.edu"}
+]
+keywords = ["rdf", "semantic", "etl"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Topic :: Utilities",
+    "License :: OSI Approved :: Apache Software License",
+]
+requires-python = ">=3.8"
+dependencies = [
+    "future",
+    "pip>=9.0.0",
+    "cython",
+    "numpy",
+    "rdflib>=6.0.0",
+    "pandas>=0.23.0",
+    "requests",
+    "toposort",
+    "beautifulsoup4",
+    "jinja2",
+    "lxml",
+    "six",
+    "xlrd",
+    "ijson",
+    "click",
+    "tqdm",
+    "requests-testadapter",
+    "python-slugify",
+    "pyshacl[js]",
+]
+
+[project.urls]
+Homepage = "http://packages.python.org/setlr"
+
+[project.scripts]
+setlr = "setlr:main"
+
+[tool.setuptools]
+packages = ["setlr"]
+include-package-data = true
+
+[tool.setuptools.package-data]
+setlr = ["**/*"]
diff --git a/setlr/__init__.py b/setlr/__init__.py
index c8a92f0..eba796a 100644
--- a/setlr/__init__.py
+++ b/setlr/__init__.py
@@ -1,954 +1,90 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from builtins import str
-from builtins import next
-from builtins import object
-from rdflib import *
-from rdflib.util import guess_format
-import rdflib
-import csv
-import json
-import sys, collections
-import requests
-import pandas
-import re
-import os
-from six import text_type as str
-
-from jinja2 import Template
-from toposort import toposort_flatten
-from numpy import isnan
-import uuid
-import tempfile
-import ijson
-from . import iterparse_filter
-#import xml.etree.ElementTree as ET
-import xml.etree.ElementTree
-
-from itertools import chain
-
-import zipfile
-import gzip
-
-import logging
-
-from tqdm import tqdm
-
-import hashlib
-from slugify import slugify
-from pyshacl import validate
-
-from .trig_store import TrigStore
-
-def hash(value):
-    m = hashlib.sha256()
-    m.update(value.encode('utf-8'))
-    return m.hexdigest()
-
-csvw = Namespace('http://www.w3.org/ns/csvw#')
-ov = Namespace('http://open.vocab.org/terms/')
-setl = Namespace('http://purl.org/twc/vocab/setl/')
-prov = Namespace('http://www.w3.org/ns/prov#')
-pv = Namespace('http://purl.org/net/provenance/ns#')
-sp = Namespace('http://spinrdf.org/sp#')
-sd = Namespace('http://www.w3.org/ns/sparql-service-description#')
-dc = Namespace('http://purl.org/dc/terms/')
-void = Namespace('http://rdfs.org/ns/void#')
-shacl = Namespace('http://www.w3.org/ns/shacl#')
-api_vocab = Namespace('http://purl.org/linked-data/api/vocab#')
-
-sys.setrecursionlimit(10000)
-
-from requests_testadapter import Resp
-
-# Regex pattern for extracting Jinja2 template variables (compiled once for performance)
-TEMPLATE_VAR_PATTERN = re.compile(r'\{\{([^}]+)\}\}')
-
-def camelcase(s):
-    return slugify(s).title().replace("-","")
-
-class LocalFileAdapter(requests.adapters.HTTPAdapter):
-    def build_response_from_file(self, request):
-        file_path = request.url[7:]
-        with open(file_path, 'rb') as file:
-            buff = bytearray(os.path.getsize(file_path))
-            file.readinto(buff)
-            resp = Resp(buff)
-            r = self.build_response(request, resp)
-            return r
-    def send(self, request, stream=False, timeout=None,
-             verify=True, cert=None, proxies=None):
-        return self.build_response_from_file(request)
-
-requests_session = requests.session()
-requests_session.mount('file://', LocalFileAdapter())
-requests_session.mount('file:///', LocalFileAdapter())
-
-datatypeConverters = collections.defaultdict(lambda: str)
-datatypeConverters.update({
-    XSD.string: str,
-    XSD.decimal: float,
-    XSD.integer: int,
-    XSD.float: float,
-    XSD.double: float
-})
-
-run_samples = -1
-
-_rdf_formats_to_guess = [
-    'xml',
-    'json-ld',
-    'trig',
-    'nquads',
-    'trix'
-]
-
-
-def read_csv(location, result):
-    args = dict(
-        sep = result.value(csvw.delimiter, default=Literal(",")).value,
-        #header = result.value(csvw.headerRow, default=Literal(0)).value),
-        skiprows = result.value(csvw.skipRows, default=Literal(0)).value,
-        dtype=str,
-        # dtype = object    # Does not seem to play well with future and python2/3 conversion
-    )
-    if result.value(csvw.header):
-        args['header'] = [0]
-    with get_content(location, result) as fo:
-        df = pandas.read_csv(fo, encoding='utf-8', **args)
-        #logger.debug("Loaded %s", location)
-        return df
-
-def read_graph(location, result, g = None):
-    if g is None:
-        g = ConjunctiveGraph()
-    graph = ConjunctiveGraph(store=g.store, identifier=result.identifier)
-    if len(graph) == 0:
-        data = get_content(location, result).read()
-        f = guess_format(location)
-        for fmt in [f] + _rdf_formats_to_guess:
-            try:
-                graph.parse(data=data, format=fmt)
-                break
-            except Exception as e:
-                #print e
-                pass
-        if len(graph) == 0:
-            logger.error("Could not parse graph: %s", location)
-        if result[RDF.type:OWL.Ontology]:
-            for ontology in graph.subjects(RDF.type, OWL.Ontology):
-                imports = [graph.resource(x) for x in graph.objects(ontology, OWL.imports)]
-                for i in imports:
-                    read_graph(i.identifier, i, g = g)
-    return g
-
-class FileLikeFromIter(object):
-    _closed = False
-
-    def __init__(self, content_iter):
-        self.iter = content_iter
-        self.data = b''
-
-    def __iter__(self):
-        return self.iter
-
-    def readable(self):
-        return True
-
-    def writable(self):
-        return False
-
-    def seekable(self):
-        return False
-
-    def closed(self):
-        if self._closed:
-            return True
-        if len(self.data) > 0:
-            return False
-        try:
-            self.data = next(self.iter)
-        except StopIteration:
-            self.closed = True
-            return True
-        return False
-
-    # Enter and Exit are needed to allow this to work with with
-    def __enter__(self):
-        return self
-
-    # Could be improved for better error/exception handling
-    def __exit__(self, err_type, value, tracebock):
-        pass
-
-    def read(self, n=None):
-        if n is None:
-            return self.data + b''.join(l for l in self.iter)
-        else:
-            while len(self.data) < n:
-                try:
-                    self.data = b''.join((self.data, next(self.iter)))
-                except StopIteration:
-                    break
-            result, self.data = self.data[:n], self.data[n:]
-            return result
-
-def _open_local_file(location):
-    if location.startswith("file://"):
-        if os.name == 'nt': # skip the initial
-            return open(location.replace('file:///','').replace('file://',''),'rb')
-        else:
-            return open(location.replace('file://',''),'rb')
-
-content_handlers = [
-    _open_local_file,
-    lambda location: FileLikeFromIter(requests.get(location,stream=True).iter_content(1024*1024))
+"""setlr: Semantic Extract, Transform and Load-er
+
+This package provides tools for generating RDF graphs from tabular data
+using declarative SETL (Semantic Extract, Transform, Load) scripts.
+
+Main functions:
+    run_setl(setl_graph): Execute a SETL script (recommended)
+    _setl(setl_graph): Deprecated, use run_setl() instead
+    main(): Command-line interface entry point
+"""
+
+# Import the core functionality
+from .core import (
+    # Main API functions
+    run_setl,
+    _setl,  # Deprecated, but kept for backward compatibility
+    main,
+    
+    # Utility functions that might be used by library users
+    read_csv,
+    read_excel,
+    read_json,
+    read_xml,
+    read_graph,
+    extract,
+    json_transform,
+    transform,
+    load,
+    isempty,
+    hash,
+    camelcase,
+    get_content,
+    
+    # Logger for configuration
+    logger,
+    
+    # Namespaces
+    csvw,
+    ov,
+    setl,
+    prov,
+    pv,
+    sp,
+    sd,
+    dc,
+    void,
+    shacl,
+    api_vocab,
+)
+
+# Version
+__version__ = '1.0.1'
+
+# Define what gets imported with "from setlr import *"
+__all__ = [
+    'run_setl',
+    'main',
+    # Include commonly used utilities
+    'read_csv',
+    'read_excel', 
+    'read_json',
+    'read_xml',
+    'read_graph',
+    'extract',
+    'json_transform',
+    'transform',
+    'load',
+    'isempty',
+    'hash',
+    'camelcase',
+    'get_content',
+    # Namespaces
+    'csvw',
+    'ov',
+    'setl',
+    'prov',
+    'pv',
+    'sp',
+    'sd',
+    'dc',
+    'void',
+    'shacl',
+    'api_vocab',
+    # Keep _setl for backward compatibility but not in __all__ to discourage use
 ]
 
-def get_content(location, result):
-    response = None
-    for handler in content_handlers:
-        response = handler(location)
-        if response is not None:
-            break
-    if result[RDF.type:setl.Tempfile]:
-        result = to_tempfile(response)
-
-    for t in result[RDF.type]:
-        # Do we know how to unpack this?
-        if t.identifier in unpackers:
-            response = unpackers[t.identifier](response)
-    return response
-
-def to_tempfile(f):
-    tf = tempfile.TemporaryFile()
-    logger.debug("Writing %s to disk.", f)
-    for chunk in f:
-        if chunk: # filter out keep-alive new chunks
-            tf.write(chunk)
-    tf.seek(0)
-    logger.debug("Finished writing %s to disk.", f)
-    return tf
-
-def unpack_zipfile(f):
-    zf = zipfile.ZipFile(f, mode='r')
-    files = zf.infolist()
-    return zf.open(files[0])
-
-unpackers = {
-#    setl.Tempfile : lambda x: x,
-    setl.ZipFile : lambda x: unpack_zipfile(to_tempfile(x)),
-    setl.GZipFile : lambda f: gzip.GzipFile(fileobj=f,mode='r')
-}
-
-packers = {
-#    setl.Tempfile : lambda x: x,
-    setl.GZipFile : lambda f: gzip.GzipFile(fileobj=f,mode='wb')
-}
-
-def read_excel(location, result):
-    args = dict(
-        sheet_name = result.value(setl.sheetname, default=Literal(0)).value,
-        header = [int(x) for x in result.value(csvw.headerRow, default=Literal('0')).value.split(',')],
-        skiprows = result.value(csvw.skipRows, default=Literal(0)).value
-    )
-    if result.value(csvw.header):
-        args['header'] = [result.value(csvw.header).value]
-    with get_content(location, result) as fo:
-        df = pandas.read_excel(fo, encoding='utf-8', **args)
-        return df
-
-def read_xml(location, result):
-    validate_dtd = False
-    if result[RDF.type:setl.DTDValidatedXML]:
-        validate_dtd = True
-    f = iterparse_filter.IterParseFilter(validate_dtd=validate_dtd)
-    if result.value(setl.xpath) is None:
-        logger.debug("no xpath to select on from %s", location)
-        f.iter_end("/*")
-    for xp in result[setl.xpath]:
-        f.iter_end(xp.value)
-    with get_content(location, result) as fo:
-        for (i, (event, ele)) in enumerate(tqdm(f.iterparse(fo))):
-            yield i, ele
-
-
-def read_json(location, result):
-    selector = result.value(api_vocab.selector)
-    if selector is not None:
-        selector = selector.value
-    else:
-        selector = ""
-    with get_content(location, result) as fo:
-        yield from enumerate(tqdm(ijson.items(fo, selector)))
-
-
-extractors = {
-    setl.XPORT : lambda location, result: pandas.read_sas(get_content(location, result), format='xport'),
-    setl.SAS7BDAT : lambda location, result: pandas.read_sas(get_content(location, result), format='sas7bdat'),
-    setl.Excel : read_excel,
-    csvw.Table : read_csv,
-    OWL.Ontology : read_graph,
-    void.Dataset : read_graph,
-    setl.JSON : read_json,
-    setl.XML : read_xml,
-    URIRef("https://www.iana.org/assignments/media-types/text/plain") : lambda location, result: get_content(location, result)
-}
-
-
-try:
-    from bs4 import BeautifulSoup
-    extractors[setl.HTML] = lambda location, result: BeautifulSoup(get_content(location, result).read(), 'html.parser')
-except Exception as e:
-    pass
-
-
-def load_csv(csv_resource):
-    column_descriptions = {}
-    for col in csv_resource[csvw.column]:
-        label = col.value(RDFS.label).value
-        column_descriptions[label] = col
-    csv_graph = Graph(identifier=csv_resource)
-    s = [x for x in csv.reader(open(str(csv_resource.value(csvw.url).identifier).replace("file://","")),
-                   delimiter=str(csv_resource.value(csvw.delimiter,default=",").value),
-                   quotechar=str(csv_resource.value(csvw.quoteChar,default='"').value))]
-    header = None
-    properties = []
-    propertyMap = {}
-    skip_value = csv_resource.value(csvw.null)
-    if skip_value is not None:
-        skip_value = skip_value.value
-    for i, r in enumerate(s):
-        if header is None:
-            header = r
-            for j, h in enumerate(header):
-                col_desc = None
-                if h in column_descriptions:
-                    col_desc = column_descriptions[h]
-                col = csv_graph.resource(URIRef("urn:col_"+str(h)))
-                col.add(RDFS.label, Literal(h))
-                col.add(ov.csvCol, Literal(j))
-                if col_desc is not None:
-                    col.add(RDFS.range, col_desc.value(RDFS.range, default=XSD.string))
-                properties.append(col)
-                propertyMap[h] = col
-            continue
-        res = csv_graph.resource(csv_resource.identifier+"_row_"+str(i))
-        res.add(RDF.type, csvw.Row)
-        res.add(csvw.rownum, Literal(i))
-        for j, value in enumerate(r):
-            if skip_value is not None and skip_value == value:
-                continue
-            #print i, j, value
-            prop = properties[j]
-            datatype = prop.value(RDFS['range'], default=XSD.string)
-            lit =  Literal(value, datatype=datatype.identifier)
-            #print i, prop.identifier, lit.n3()
-            res.add(prop.identifier, lit)
-    logger.debug("Table has %s rows, %s columns, and %s triples", len(s), len(header), len(csv_graph))
-    return csv_graph
-
-formats = {
-    None:'xml',
-    "application/rdf+xml":'xml',
-    "text/rdf":'xml',
-    'text/turtle':'turtle',
-    'application/turtle':'turtle',
-    'application/x-turtle':'turtle',
-    'text/plain':'nt',
-    'text/n3':'n3',
-    'application/trig':'trig',
-    'application/json':'json-ld'
-}
-
-def create_python_function(f, resources):
-    global_vars = {'this' : f, 'resources': resources}
-    local_vars = {}
-    script = f.value(prov.value)
-    for qd in f[prov.qualifiedDerivation]:
-        entity = resources[qd.value(prov.entity).identifier]
-        name = qd.value(prov.hadRole).value(dc.identifier)
-        local_vars[name.value] = entity
-    exec(script.value, local_vars, global_vars)
-    resources[f.identifier] = global_vars['result']
-
-def get_order(setl_graph):
-    nodes = collections.defaultdict(set)
-
-    for typ in actions:
-        for task in setl_graph.subjects(RDF.type, typ):
-            task = setl_graph.resource(task)
-            for used in task[prov.used]:
-                nodes[task.identifier].add(used.identifier)
-
-            for usage in task[prov.qualifiedUsage]:
-                used = usage.value(prov.entity)
-                nodes[task.identifier].add(used.identifier)
-            for generated in task.subjects(prov.wasGeneratedBy):
-                nodes[generated.identifier].add(task.identifier)
-            for derivation in task[prov.qualifiedDerivation]:
-                derived = derivation.value(prov.entity)
-                nodes[task.identifier].add(derived.identifier)
-
-    return toposort_flatten(nodes)
-
-def extract(e, resources):
-    logger.info('Extract %s',e.identifier)
-    used = e.value(prov.used)
-    for result in e.subjects(prov.wasGeneratedBy):
-        if used is None:
-            used = result
-        for t in result[RDF.type]:
-            # Do we know how to generate this?
-            if t.identifier in extractors:
-                logger.info("Using %s", used.identifier)
-                resources[result.identifier] = extractors[t.identifier](used.identifier, result)
-                return resources[result.identifier]
-
-def isempty(value):
-    try:
-        return isnan(value)
-    except (TypeError, ValueError):
-        return value is None
-
-def clone(value):
-    __doc__ = '''This is only a JSON-level cloning of objects. Atomic objects are invariant, and don't need to be cloned.'''
-    if isinstance(value, list):
-        return [x for x in value]
-    elif isinstance(value, dict):
-        return dict(value)
-    else:
-        return value
-
-functions = {}
-def get_function(expr, local_keys):
-    used_local_keys = [k for k in local_keys if k in expr]
-    key = tuple([expr]+sorted(used_local_keys))
-    if key not in functions:
-        script = '''lambda %s,**kwargs: %s'''% (', '.join(sorted(used_local_keys)), expr)
-        #print(script)
-        fn = eval(script)
-        fn.__name__ = expr.encode("ascii", "ignore").decode('utf8')
-        functions[key] = fn
-    return functions[key]
-
-templates = {}
-def get_template(templ):
-    if templ not in templates:
-        t = Template(templ)
-        templates[templ] = t
-    return templates[templ]
-
-def flatten_lists(o):
-    if isinstance(o, list):
-        result = []
-        for x in o:
-            flattened = flatten_lists(x)
-            if isinstance(flattened, list):
-                result.extend(flattened)
-            else:
-                result.append(flattened)
-        return result
-    elif isinstance(o, dict):
-        for key in o.keys():
-            o[key] = flatten_lists(o[key])
-        return o
-    else:
-        return o
-
-def process_row(row, template, rowname, table, resources, transform, variables):
-    result = []
-    e = {'row':row,
-         'name': rowname,
-         'table': table,
-         'resources': resources,
-         'template': template,
-         "transform": transform,
-         "setl_graph": transform.graph,
-         "isempty":isempty,
-         "slugify" : slugify,
-         "camelcase" : camelcase,
-         "hash":hash,
-         "isinstance":isinstance,
-         "str":str,
-         "float":float,
-         "int":int,
-         "chain": lambda x: chain(*x),
-         "list":list
-    }
-    e.update(variables)
-    e.update(rdflib.__dict__)
-    todo = [[x, result, e] for x in template]
-
-    while len(todo) > 0:
-        task, parent, env = todo.pop()
-        key = None
-        value = task
-        this = None
-        if isinstance(parent, dict):
-            if len(task) != 2:
-                logger.debug(task)
-            key, value = task
-            kt = get_template(key)
-            key = kt.render(**env)
-        if isinstance(value, dict):
-            if '@if' in value:
-                try:
-                    fn = get_function(value['@if'], list(env.keys()))
-                    incl = fn(**env)
-                    if incl is None or not incl:
-                        continue
-                except KeyError:
-                    continue
-                except AttributeError:
-                    continue
-                except TypeError:
-                    continue
-                except Exception as e:
-                    logger.error("=" * 80)
-                    logger.error("Error evaluating @if conditional: %s", value['@if'])
-                    transform_obj = env.get('transform', {})
-                    transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
-                    logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
-                    logger.error("Error type: %s", type(e).__name__)
-                    logger.error("Error message: %s", str(e))
-                    logger.error("Row-specific variables:")
-                    for key in ['row', 'name']:
-                        if key in env:
-                            v = env[key]
-                            try:
-                                logger.error("  %s: %s", key, str(v)[:200])
-                            except Exception:
-                                logger.error("  %s: <%s>", key, type(v).__name__)
-                    logger.error("=" * 80)
-                    raise RuntimeError(f"Error in @if conditional '{value['@if']}': {type(e).__name__}: {str(e)}") from e
-            if '@for' in value:
-                f = value['@for']
-                if isinstance(f, list):
-                    f = ' '.join(f)
-                variable_list, expression = f.split(" in ", 1)
-                variable_list = re.split(r',\s+', variable_list.strip())
-                val = value
-                if '@do' in value:
-                    val = value['@do']
-                else:
-                    del val['@for']
-                try:
-                    fn = get_function(expression, list(env.keys()))
-                    values = fn(**env)
-                    if values is not None:
-                        for v in values:
-                            if len(variable_list) == 1:
-                                v = [v]
-                            new_env = dict(env)
-                            for i, variable in enumerate(variable_list):
-                                new_env[variable] = v[i]
-                            child = clone(val)
-                            todo.append((child, parent, new_env))
-                except KeyError:
-                    pass
-                except Exception as e:
-                    logger.error("=" * 80)
-                    logger.error("Error in @for loop: %s", value['@for'])
-                    transform_obj = env.get('transform', {})
-                    transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
-                    logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
-                    logger.error("Error type: %s", type(e).__name__)
-                    logger.error("Error message: %s", str(e))
-                    logger.error("Expression: %s", expression)
-                    logger.error("Variables to assign: %s", variable_list)
-                    logger.error("Available variables: %s", sorted([k for k in env.keys() if not k.startswith('_')]))
-                    logger.error("=" * 80)
-                    raise RuntimeError(f"Error in @for loop '{value['@for']}': {type(e).__name__}: {str(e)}") from e
-                continue
-            if '@with' in value:
-                f = value['@with']
-                if isinstance(f, list):
-                    f = ' '.join(f)
-                expression, variable_list = f.split(" as ", 1)
-                variable_list = re.split(r',\s+', variable_list.strip())
-                val = value
-                if '@do' in value:
-                    val = value['@do']
-                else:
-                    del val['@with']
-                try:
-                    fn = get_function(expression, list(env.keys()))
-                    v = fn(**env)
-                    if v is not None:
-                        if len(variable_list) == 1 and not (
-                                isinstance(v, collections.Iterable)
-                                and not isinstance(v, str)):
-                            v = [v]
-                        new_env = dict(env)
-                        for i, variable in enumerate(variable_list):
-                            new_env[variable] = v[i]
-                        child = clone(val)
-                        todo.append((child, parent, new_env))
-                except KeyError:
-                    pass
-                except Exception as e:
-                    logger.error("=" * 80)
-                    logger.error("Error in @with expression: %s", value['@with'])
-                    transform_obj = env.get('transform', {})
-                    transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
-                    logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
-                    logger.error("Error type: %s", type(e).__name__)
-                    logger.error("Error message: %s", str(e))
-                    logger.error("Expression: %s", expression)
-                    logger.error("Variables to assign: %s", variable_list)
-                    logger.error("Available variables: %s", sorted([k for k in env.keys() if not k.startswith('_')]))
-                    logger.error("=" * 80)
-                    raise RuntimeError(f"Error in @with expression '{value['@with']}': {type(e).__name__}: {str(e)}") from e
-                continue
-            this = {}
-            for child in list(value.items()):
-                if child[0] == '@if':
-                    continue
-                if child[0] == '@for':
-                    continue
-                todo.append((child, this, env))
-        elif isinstance(value, list):
-            this = []
-            for child in value:
-                todo.append((child, this, env))
-        elif isinstance(value, str):
-            try:
-                template = get_template(str(value))
-                this = template.render(**env)
-            except Exception as e:
-                logger.error("=" * 80)
-                logger.error("Error rendering Jinja2 template: %s", value[:200] if len(value) > 200 else value)
-                transform_obj = env.get('transform', {})
-                transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
-                logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
-                logger.error("Error type: %s", type(e).__name__)
-                logger.error("Error message: %s", str(e))
-                logger.error("Template variables referenced in template:")
-                # Try to extract variable references from the template
-                matches = TEMPLATE_VAR_PATTERN.findall(value)
-                if matches:
-                    for match in matches:
-                        var_name = match.strip().split('.')[0].split('[')[0].strip()
-                        if var_name in env:
-                            val = env[var_name]
-                            if type(val).__name__ == 'Element':
-                                # XML Element
-                                try:
-                                    val = xml.etree.ElementTree.tostring(val).decode('utf-8', errors='replace')[:200]
-                                except Exception:
-                                    val = "<XML Element>"
-                            else:
-                                try:
-                                    val = str(val)[:200]
-                                except Exception:
-                                    val = f"<{type(val).__name__}>"
-                            logger.error("  %s = %s", var_name, val)
-                        else:
-                            logger.error("  %s = <NOT FOUND>", var_name)
-                logger.error("=" * 80)
-                raise RuntimeError(f"Error rendering template: {type(e).__name__}: {str(e)}") from e
-        else:
-            this = value
-
-        if key is not None:
-            parent[key] = this
-        else:
-            parent.append(this)
-
-    return flatten_lists(result)
-
-def json_transform(transform, resources):
-    logger.info("Transform %s", transform.identifier)
-    tables = [u for u in transform[prov.used]]
-    variables = {}
-    for usage in transform[prov.qualifiedUsage]:
-        used = usage.value(prov.entity)
-        role = usage.value(prov.hadRole)
-        roleID  = role.value(dc.identifier)
-        variables[roleID.value] = resources[used.identifier]
-        #print "Using", used.identifier, "as", roleID.value
-
-    generated = list(transform.subjects(prov.wasGeneratedBy))[0]
-    logger.info("Generating %s", generated.identifier)
-
-    connected_downstream_graph = '''
-construct {
-   ?target ?p ?o
-} where {
-   ?source (<>|!<>)* ?target.
-   ?target ?p ?o.
-}
-'''
-    shape_graph = Graph()
-    for shape in transform.objects(dc.conformsTo):
-        if shape[RDF.type:shacl.NodeShape] or shape[RDF.type:shacl.PropertyShape]:
-            logger.info("Validating against SHACL shape %s", shape.identifier)
-            shape_graph += transform.graph.query(connected_downstream_graph,
-                                                 initBindings={"source":shape.identifier})
-    if generated.identifier in resources:
-        result = resources[generated.identifier]
-    else:
-        result = ConjunctiveGraph()
-        if generated[RDF.type : setl.Persisted]:
-            store = TrigStore()
-            result = ConjunctiveGraph(store=store)
-        if generated[RDF.type : setl.Persisted]:
-            tempdir = tempfile.mktemp()
-            logger.info("Persisting %s to %s", generated.identifier, tempdir)
-            result.store.open(tempdir, True)
-    s = transform.value(prov.value).value
-    try:
-        jslt = json.loads(s)
-    except json.JSONDecodeError as e:
-        logger.error("Error parsing JSON-LD template for transform %s", transform.identifier)
-        lineno = getattr(e, 'lineno', 0)
-        colno = getattr(e, 'colno', 0)
-        msg = getattr(e, 'msg', str(e))
-        logger.error("JSON parsing error at line %d, column %d: %s", lineno, colno, msg)
-        # Show context around the error (8 lines before, 3 after for better bracket matching)
-        lines = s.split("\n")
-        start_line = max(0, lineno - 8)
-        end_line = min(len(lines), lineno + 3)
-        logger.error("Template context:")
-        for i in range(start_line, end_line):
-            prefix = ">>> " if i == lineno - 1 else "    "
-            logger.error("%s%d: %s", prefix, i + 1, lines[i])
-        raise ValueError(f"Invalid JSON-LD template in transform {transform.identifier}: {msg} at line {lineno}, column {colno}") from e
-    except Exception as e:
-        logger.error("Error parsing JSON-LD template for transform %s: %s", transform.identifier, str(e))
-        logger.error("Template content:\n%s", s[:500])  # Show first 500 chars
-        raise ValueError(f"Invalid JSON-LD template in transform {transform.identifier}: {str(e)}") from e
-    context = transform.value(setl.hasContext)
-    if context is not None:
-        context = json.loads(context.value)
-    for t in tables:
-        logger.info("Using %s", t.identifier)
-        table = resources[t.identifier]
-        it = table
-        if isinstance(table, pandas.DataFrame):
-            #if run_samples:
-            #    table = table.head()
-            it = tqdm(table.iterrows(), total=table.shape[0])
-            #logger.info("Transforming %s rows.", len(table.index))
-        else:
-            logger.info("Transform %s", t.identifier)
-        for rowname, row in it:
-            if run_samples > 0 and rowname >= run_samples:
-                break
-            try:
-                root = None
-                data = None
-                root = {
-                    "@id": generated.identifier,
-                    "@graph": process_row(row, jslt, rowname, table, resources, transform, variables)
-                }
-                if context is not None:
-                    root['@context'] = context
-
-                #logger.debug(json.dumps(root, indent=4))
-                #before = len(result)
-                #graph = ConjunctiveGraph(identifier=generated.identifier)
-                #graph.parse(data=json.dumps(root),format="json-ld")
-                data = json.dumps(root)
-                #del root
-                
-                if len(shape_graph) > 0:
-                    d = ConjunctiveGraph()
-                    d.parse(data=data,format='json-ld')
-                    conforms, report, message = validate(d,
-                                                         shacl_graph=shape_graph,
-                                                         advanced=True,
-                                                         debug=False)
-                    if not conforms:
-                        print(message)
-                result.parse(data=data, format="json-ld")
-                #del data
-                #after = len(result)
-                #logger.debug("Row "+str(rowname))#+" added "+str(after-before)+" triples.")
-                #sys.stdout.flush()
-            except Exception as e:
-                logger.error("=" * 80)
-                logger.error("Error in transform %s while processing row %s", transform.identifier, rowname)
-                if isinstance(table, pandas.DataFrame):
-                    # Format row data with better NaN handling
-                    row_dict = {}
-                    for key, value in dict(row).items():
-                        if pandas.isna(value):
-                            row_dict[key] = "<empty/missing>"
-                        else:
-                            row_dict[key] = value
-                    logger.error("Row data: %s", row_dict)
-                else:
-                    logger.error("Row identifier: %s", rowname)
-                
-                # Try to provide more specific error information
-                error_type = type(e).__name__
-                if "JSON-LD" in str(e) or "json" in str(e).lower():
-                    logger.error("JSON-LD processing error: %s", str(e))
-                    if data is not None:
-                        logger.error("Generated JSON-LD (first 1000 chars):\n%s", data[:1000])
-                elif hasattr(e, 'lineno'):
-                    logger.error("%s at line %d: %s", error_type, e.lineno, str(e))
-                else:
-                    logger.error("%s: %s", error_type, str(e))
-                
-                logger.error("=" * 80)
-                raise RuntimeError(f"Failed to transform row {rowname} in transform {transform.identifier}: {error_type}: {str(e)}") from e
-
-    resources[generated.identifier] = result
-
-def transform(transform_resource, resources):
-    logger.info('Transforming %s',transform_resource.identifier)
-
-    transform_graph = ConjunctiveGraph()
-    for result in transform_graph.subjects(prov.wasGeneratedBy):
-        transform_graph = ConjunctiveGraph(identifier=result.identifier)
-
-    used = set(transform_resource[prov.used])
-
-    for csv in [u for u in used if u[RDF.type:csvw.Table]]:
-        csv_graph = Graph(store=transform_graph.store, identifier=csv)
-        csv_graph += graphs[csv.identifier]
-
-
-    for script in [u for u in used if u[RDF.type:setl.PythonScript]]:
-        logger.info("Script: %s", script.identifier)
-        s = script.value(prov.value).value
-        l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
-        gl = dict()
-        exec(s, gl, l)
-
-    for jsldt in [u for u in used if u[RDF.type:setl.PythonScript]]:
-        logger.info("Script: %s", script.identifier)
-        s = script.value(prov.value).value
-        l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
-        gl = dict()
-        exec(s, gl, l)
-
-    for update in [u for u in used if u[RDF.type:sp.Update]]:
-        logger.info("Update: %s", update.identifier)
-        query = update.value(prov.value).value
-        transform_graph.update(query)
-
-    for construct in [u for u in used if u[RDF.type:sp.Construct]]:
-        logger.info("Construct: %s", construct.identifier)
-        query = construct.value(prov.value).value
-        g = transform_graph.query(query)
-        transform_graph += g
-
-    for csv in [u for u in used if u[RDF.type:csvw.Table]]:
-        g = Graph(identifier=csv.identifier,store=transform_graph.store)
-        g.remove((None, None, None))
-        transform_graph.store.remove_graph(csv.identifier)
-
-    for result in transform_graph.subjects(prov.wasGeneratedBy):
-        graphs[result.identifier] = transform_graph
-
-def _load_open(generated):
-    if generated.identifier.startswith("file://"):
-        if os.name == 'nt': # skip the initial
-            filename = generated.identifier.replace('file:///','').replace('file://','')
-        else:
-            filename = generated.identifier.replace('file://','')
-
-    fh = open(filename, 'wb')
-    for type, pack in packers.items():
-        if generated[RDF.type : type]:
-            return pack(fh)
-    return fh
-
-def load(load_resource, resources):
-    logger.info('Load %s',load_resource.identifier)
-    file_graph = Dataset(default_union=True)
-    to_disk = False
-    for used in load_resource[prov.used]:
-        if used[RDF.type : setl.Persisted]:
-            to_disk = True
-            file_graph = Dataset(store='Sleepycat', default_union=True)
-            tempdir = tempfile.mkdtemp()
-            logger.debug("Gathering %s into %s", load_resource.identifier, tempdir)
-            file_graph.store.open(tempdir, True)
-            break
-    if len(list(load_resource[prov.used])) == 1:
-        logger.info("Using %s",load_resource.value(prov.used).identifier)
-        file_graph = resources[load_resource.value(prov.used).identifier]
-    else:
-        for used in load_resource[prov.used]:
-            logger.info("Using %s",used.identifier)
-            used_graph = resources[used.identifier]
-            file_graph.namespace_manager = used_graph.namespace_manager
-            #print used_graph.serialize(format="trig")
-            file_graph.addN(used_graph.quads())
-
-    for generated in load_resource.subjects(prov.wasGeneratedBy):
-        # TODO: support LDP-based loading
-        if generated[RDF.type:pv.File]:
-            fmt = generated.value(dc['format'])
-            if fmt is not None:
-                fmt = fmt.value
-            if fmt in formats:
-                fmt = formats[fmt]
-                #print fmt
-            with _load_open(generated) as o:
-                file_graph.serialize(o, format=fmt)
-
-        elif generated[RDF.type:sd.Service]:
-            from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
-            endpoint = generated.value(sd.endpoint, default=generated).identifier
-            store = SPARQLUpdateStore(endpoint, endpoint, autocommit=False)
-            endpoint_graph = Dataset(store=store, identifier=generated.identifier, default_union=True)
-            endpoint_graph.addN(file_graph.quads())
-            endpoint_graph.commit()
-    #if to_disk:
-    #    file_graph.close()
-
-
-actions = {
-    setl.Extract : extract,
-    setl.Transform : json_transform,
-    setl.Load : load,
-    setl.PythonScript : create_python_function,
-    setl.IsEmpty : isempty
-}
-
-def _setl(setl_graph):
-    global logger
-    if logger is None:
-        logger = logging.getLogger(__name__)
-    resources = {}
-    resources.update(actions)
-
-    tasks = [setl_graph.resource(t) for t in get_order(setl_graph)]
-
-    for task in tasks:
-        action = [actions[t.identifier] for t in task[RDF.type] if t.identifier in actions]
-        if len(action) > 0:
-            action[0](task, resources)
-    return resources
-logger = None
-
-import click
-@click.command()
-@click.option('--quiet', '-q', is_flag=True, default=False, help="Minimize logging.")
-@click.option('-n', default=-1, help="Only process the first N rows.", type=int)
-#@click.option('--rdf-validation', default=None, help="Save the RDF validation report to this file.")
-#@click.option('--text-validation', default=None, help="Save the text validation report to this file.")
-@click.argument('script', type=click.Path(exists=True))
-def main(script, rdf_validation=None, text_validation=None, quiet=False, n=-1):
-    logging_level = logging.DEBUG
-    if quiet:
-        logging_level = logging.WARNING
-    logging.basicConfig(level=logging_level)
-
-    global logger
-    logger = logging.getLogger(__name__)
-
-    global run_samples
-    run_samples = n
-    setl_graph = ConjunctiveGraph()
-    content = open(script).read()
-    setl_graph.parse(data=content, format="turtle")
-
-    graphs = _setl(setl_graph)
+# Note: _setl is still importable for backward compatibility but not in __all__
diff --git a/setlr/core.py b/setlr/core.py
new file mode 100644
index 0000000..e728bcd
--- /dev/null
+++ b/setlr/core.py
@@ -0,0 +1,1027 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from builtins import str
+from builtins import next
+from builtins import object
+from rdflib import *
+from rdflib.util import guess_format
+import rdflib
+import csv
+import json
+import sys, collections
+import requests
+import pandas
+import re
+import os
+from six import text_type as str
+
+from jinja2 import Template
+from toposort import toposort_flatten
+from numpy import isnan
+import uuid
+import tempfile
+import ijson
+from . import iterparse_filter
+#import xml.etree.ElementTree as ET
+import xml.etree.ElementTree
+
+from itertools import chain
+
+import zipfile
+import gzip
+
+import logging
+
+from tqdm import tqdm
+
+import hashlib
+from slugify import slugify
+from pyshacl import validate
+
+from .trig_store import TrigStore
+
+def hash(value):
+    m = hashlib.sha256()
+    m.update(value.encode('utf-8'))
+    return m.hexdigest()
+
+csvw = Namespace('http://www.w3.org/ns/csvw#')
+ov = Namespace('http://open.vocab.org/terms/')
+setl = Namespace('http://purl.org/twc/vocab/setl/')
+prov = Namespace('http://www.w3.org/ns/prov#')
+pv = Namespace('http://purl.org/net/provenance/ns#')
+sp = Namespace('http://spinrdf.org/sp#')
+sd = Namespace('http://www.w3.org/ns/sparql-service-description#')
+dc = Namespace('http://purl.org/dc/terms/')
+void = Namespace('http://rdfs.org/ns/void#')
+shacl = Namespace('http://www.w3.org/ns/shacl#')
+api_vocab = Namespace('http://purl.org/linked-data/api/vocab#')
+
+sys.setrecursionlimit(10000)
+
+from requests_testadapter import Resp
+
+# Regex pattern for extracting Jinja2 template variables (compiled once for performance)
+TEMPLATE_VAR_PATTERN = re.compile(r'\{\{([^}]+)\}\}')
+
+def camelcase(s):
+    return slugify(s).title().replace("-","")
+
+class LocalFileAdapter(requests.adapters.HTTPAdapter):
+    def build_response_from_file(self, request):
+        file_path = request.url[7:]
+        with open(file_path, 'rb') as file:
+            buff = bytearray(os.path.getsize(file_path))
+            file.readinto(buff)
+            resp = Resp(buff)
+            r = self.build_response(request, resp)
+            return r
+    def send(self, request, stream=False, timeout=None,
+             verify=True, cert=None, proxies=None):
+        return self.build_response_from_file(request)
+
+requests_session = requests.session()
+requests_session.mount('file://', LocalFileAdapter())
+requests_session.mount('file:///', LocalFileAdapter())
+
+datatypeConverters = collections.defaultdict(lambda: str)
+datatypeConverters.update({
+    XSD.string: str,
+    XSD.decimal: float,
+    XSD.integer: int,
+    XSD.float: float,
+    XSD.double: float
+})
+
+run_samples = -1
+
+_rdf_formats_to_guess = [
+    'xml',
+    'json-ld',
+    'trig',
+    'nquads',
+    'trix'
+]
+
+
+def read_csv(location, result):
+    args = dict(
+        sep = result.value(csvw.delimiter, default=Literal(",")).value,
+        #header = result.value(csvw.headerRow, default=Literal(0)).value),
+        skiprows = result.value(csvw.skipRows, default=Literal(0)).value,
+        dtype=str,
+        # dtype = object    # Does not seem to play well with future and python2/3 conversion
+    )
+    if result.value(csvw.header):
+        args['header'] = [0]
+    with get_content(location, result) as fo:
+        df = pandas.read_csv(fo, encoding='utf-8', **args)
+        #logger.debug("Loaded %s", location)
+        return df
+
+def read_graph(location, result, g = None):
+    if g is None:
+        g = ConjunctiveGraph()
+    graph = ConjunctiveGraph(store=g.store, identifier=result.identifier)
+    if len(graph) == 0:
+        data = get_content(location, result).read()
+        f = guess_format(location)
+        for fmt in [f] + _rdf_formats_to_guess:
+            try:
+                graph.parse(data=data, format=fmt)
+                break
+            except Exception as e:
+                #print e
+                pass
+        if len(graph) == 0:
+            logger.error("Could not parse graph: %s", location)
+        if result[RDF.type:OWL.Ontology]:
+            for ontology in graph.subjects(RDF.type, OWL.Ontology):
+                imports = [graph.resource(x) for x in graph.objects(ontology, OWL.imports)]
+                for i in imports:
+                    read_graph(i.identifier, i, g = g)
+    return g
+
+class FileLikeFromIter(object):
+    _closed = False
+
+    def __init__(self, content_iter):
+        self.iter = content_iter
+        self.data = b''
+
+    def __iter__(self):
+        return self.iter
+
+    def readable(self):
+        return True
+
+    def writable(self):
+        return False
+
+    def seekable(self):
+        return False
+
+    def closed(self):
+        if self._closed:
+            return True
+        if len(self.data) > 0:
+            return False
+        try:
+            self.data = next(self.iter)
+        except StopIteration:
+            self.closed = True
+            return True
+        return False
+
+    # Enter and Exit are needed to allow this to work with with
+    def __enter__(self):
+        return self
+
+    # Could be improved for better error/exception handling
+    def __exit__(self, err_type, value, tracebock):
+        pass
+
+    def read(self, n=None):
+        if n is None:
+            return self.data + b''.join(l for l in self.iter)
+        else:
+            while len(self.data) < n:
+                try:
+                    self.data = b''.join((self.data, next(self.iter)))
+                except StopIteration:
+                    break
+            result, self.data = self.data[:n], self.data[n:]
+            return result
+
+def _open_local_file(location):
+    if location.startswith("file://"):
+        if os.name == 'nt': # skip the initial
+            return open(location.replace('file:///','').replace('file://',''),'rb')
+        else:
+            return open(location.replace('file://',''),'rb')
+
+content_handlers = [
+    _open_local_file,
+    lambda location: FileLikeFromIter(requests.get(location,stream=True).iter_content(1024*1024))
+]
+
+def get_content(location, result):
+    response = None
+    for handler in content_handlers:
+        response = handler(location)
+        if response is not None:
+            break
+    if result[RDF.type:setl.Tempfile]:
+        result = to_tempfile(response)
+
+    for t in result[RDF.type]:
+        # Do we know how to unpack this?
+        if t.identifier in unpackers:
+            response = unpackers[t.identifier](response)
+    return response
+
+def to_tempfile(f):
+    tf = tempfile.TemporaryFile()
+    logger.debug("Writing %s to disk.", f)
+    for chunk in f:
+        if chunk: # filter out keep-alive new chunks
+            tf.write(chunk)
+    tf.seek(0)
+    logger.debug("Finished writing %s to disk.", f)
+    return tf
+
+def unpack_zipfile(f):
+    zf = zipfile.ZipFile(f, mode='r')
+    files = zf.infolist()
+    return zf.open(files[0])
+
+unpackers = {
+#    setl.Tempfile : lambda x: x,
+    setl.ZipFile : lambda x: unpack_zipfile(to_tempfile(x)),
+    setl.GZipFile : lambda f: gzip.GzipFile(fileobj=f,mode='r')
+}
+
+packers = {
+#    setl.Tempfile : lambda x: x,
+    setl.GZipFile : lambda f: gzip.GzipFile(fileobj=f,mode='wb')
+}
+
+def read_excel(location, result):
+    args = dict(
+        sheet_name = result.value(setl.sheetname, default=Literal(0)).value,
+        header = [int(x) for x in result.value(csvw.headerRow, default=Literal('0')).value.split(',')],
+        skiprows = result.value(csvw.skipRows, default=Literal(0)).value
+    )
+    if result.value(csvw.header):
+        args['header'] = [result.value(csvw.header).value]
+    with get_content(location, result) as fo:
+        df = pandas.read_excel(fo, encoding='utf-8', **args)
+        return df
+
+def read_xml(location, result):
+    validate_dtd = False
+    if result[RDF.type:setl.DTDValidatedXML]:
+        validate_dtd = True
+    f = iterparse_filter.IterParseFilter(validate_dtd=validate_dtd)
+    if result.value(setl.xpath) is None:
+        logger.debug("no xpath to select on from %s", location)
+        f.iter_end("/*")
+    for xp in result[setl.xpath]:
+        f.iter_end(xp.value)
+    with get_content(location, result) as fo:
+        for (i, (event, ele)) in enumerate(tqdm(f.iterparse(fo))):
+            yield i, ele
+
+
+def read_json(location, result):
+    selector = result.value(api_vocab.selector)
+    if selector is not None:
+        selector = selector.value
+    else:
+        selector = ""
+    with get_content(location, result) as fo:
+        yield from enumerate(tqdm(ijson.items(fo, selector)))
+
+
+extractors = {
+    setl.XPORT : lambda location, result: pandas.read_sas(get_content(location, result), format='xport'),
+    setl.SAS7BDAT : lambda location, result: pandas.read_sas(get_content(location, result), format='sas7bdat'),
+    setl.Excel : read_excel,
+    csvw.Table : read_csv,
+    OWL.Ontology : read_graph,
+    void.Dataset : read_graph,
+    setl.JSON : read_json,
+    setl.XML : read_xml,
+    URIRef("https://www.iana.org/assignments/media-types/text/plain") : lambda location, result: get_content(location, result)
+}
+
+
+try:
+    from bs4 import BeautifulSoup
+    extractors[setl.HTML] = lambda location, result: BeautifulSoup(get_content(location, result).read(), 'html.parser')
+except Exception as e:
+    pass
+
+
+def load_csv(csv_resource):
+    column_descriptions = {}
+    for col in csv_resource[csvw.column]:
+        label = col.value(RDFS.label).value
+        column_descriptions[label] = col
+    csv_graph = Graph(identifier=csv_resource)
+    s = [x for x in csv.reader(open(str(csv_resource.value(csvw.url).identifier).replace("file://","")),
+                   delimiter=str(csv_resource.value(csvw.delimiter,default=",").value),
+                   quotechar=str(csv_resource.value(csvw.quoteChar,default='"').value))]
+    header = None
+    properties = []
+    propertyMap = {}
+    skip_value = csv_resource.value(csvw.null)
+    if skip_value is not None:
+        skip_value = skip_value.value
+    for i, r in enumerate(s):
+        if header is None:
+            header = r
+            for j, h in enumerate(header):
+                col_desc = None
+                if h in column_descriptions:
+                    col_desc = column_descriptions[h]
+                col = csv_graph.resource(URIRef("urn:col_"+str(h)))
+                col.add(RDFS.label, Literal(h))
+                col.add(ov.csvCol, Literal(j))
+                if col_desc is not None:
+                    col.add(RDFS.range, col_desc.value(RDFS.range, default=XSD.string))
+                properties.append(col)
+                propertyMap[h] = col
+            continue
+        res = csv_graph.resource(csv_resource.identifier+"_row_"+str(i))
+        res.add(RDF.type, csvw.Row)
+        res.add(csvw.rownum, Literal(i))
+        for j, value in enumerate(r):
+            if skip_value is not None and skip_value == value:
+                continue
+            #print i, j, value
+            prop = properties[j]
+            datatype = prop.value(RDFS['range'], default=XSD.string)
+            lit =  Literal(value, datatype=datatype.identifier)
+            #print i, prop.identifier, lit.n3()
+            res.add(prop.identifier, lit)
+    logger.debug("Table has %s rows, %s columns, and %s triples", len(s), len(header), len(csv_graph))
+    return csv_graph
+
+formats = {
+    None:'xml',
+    "application/rdf+xml":'xml',
+    "text/rdf":'xml',
+    'text/turtle':'turtle',
+    'application/turtle':'turtle',
+    'application/x-turtle':'turtle',
+    'text/plain':'nt',
+    'text/n3':'n3',
+    'application/trig':'trig',
+    'application/json':'json-ld'
+}
+
+def create_python_function(f, resources):
+    global_vars = {'this' : f, 'resources': resources}
+    local_vars = {}
+    script = f.value(prov.value)
+    for qd in f[prov.qualifiedDerivation]:
+        entity = resources[qd.value(prov.entity).identifier]
+        name = qd.value(prov.hadRole).value(dc.identifier)
+        local_vars[name.value] = entity
+    exec(script.value, local_vars, global_vars)
+    resources[f.identifier] = global_vars['result']
+
+def get_order(setl_graph):
+    nodes = collections.defaultdict(set)
+
+    for typ in actions:
+        for task in setl_graph.subjects(RDF.type, typ):
+            task = setl_graph.resource(task)
+            for used in task[prov.used]:
+                nodes[task.identifier].add(used.identifier)
+
+            for usage in task[prov.qualifiedUsage]:
+                used = usage.value(prov.entity)
+                nodes[task.identifier].add(used.identifier)
+            for generated in task.subjects(prov.wasGeneratedBy):
+                nodes[generated.identifier].add(task.identifier)
+            for derivation in task[prov.qualifiedDerivation]:
+                derived = derivation.value(prov.entity)
+                nodes[task.identifier].add(derived.identifier)
+
+    return toposort_flatten(nodes)
+
+def extract(e, resources):
+    logger.info('Extract %s',e.identifier)
+    used = e.value(prov.used)
+    for result in e.subjects(prov.wasGeneratedBy):
+        if used is None:
+            used = result
+        for t in result[RDF.type]:
+            # Do we know how to generate this?
+            if t.identifier in extractors:
+                logger.info("Using %s", used.identifier)
+                resources[result.identifier] = extractors[t.identifier](used.identifier, result)
+                return resources[result.identifier]
+
+def isempty(value):
+    try:
+        return isnan(value)
+    except (TypeError, ValueError):
+        return value is None
+
+def clone(value):
+    __doc__ = '''This is only a JSON-level cloning of objects. Atomic objects are invariant, and don't need to be cloned.'''
+    if isinstance(value, list):
+        return [x for x in value]
+    elif isinstance(value, dict):
+        return dict(value)
+    else:
+        return value
+
+functions = {}
+def get_function(expr, local_keys):
+    used_local_keys = [k for k in local_keys if k in expr]
+    key = tuple([expr]+sorted(used_local_keys))
+    if key not in functions:
+        script = '''lambda %s,**kwargs: %s'''% (', '.join(sorted(used_local_keys)), expr)
+        #print(script)
+        fn = eval(script)
+        fn.__name__ = expr.encode("ascii", "ignore").decode('utf8')
+        functions[key] = fn
+    return functions[key]
+
+templates = {}
+def get_template(templ):
+    if templ not in templates:
+        t = Template(templ)
+        templates[templ] = t
+    return templates[templ]
+
+def flatten_lists(o):
+    if isinstance(o, list):
+        result = []
+        for x in o:
+            flattened = flatten_lists(x)
+            if isinstance(flattened, list):
+                result.extend(flattened)
+            else:
+                result.append(flattened)
+        return result
+    elif isinstance(o, dict):
+        for key in o.keys():
+            o[key] = flatten_lists(o[key])
+        return o
+    else:
+        return o
+
+def process_row(row, template, rowname, table, resources, transform, variables):
+    result = []
+    e = {'row':row,
+         'name': rowname,
+         'table': table,
+         'resources': resources,
+         'template': template,
+         "transform": transform,
+         "setl_graph": transform.graph,
+         "isempty":isempty,
+         "slugify" : slugify,
+         "camelcase" : camelcase,
+         "hash":hash,
+         "isinstance":isinstance,
+         "str":str,
+         "float":float,
+         "int":int,
+         "chain": lambda x: chain(*x),
+         "list":list
+    }
+    e.update(variables)
+    e.update(rdflib.__dict__)
+    todo = [[x, result, e] for x in template]
+
+    while len(todo) > 0:
+        task, parent, env = todo.pop()
+        key = None
+        value = task
+        this = None
+        if isinstance(parent, dict):
+            if len(task) != 2:
+                logger.debug(task)
+            key, value = task
+            kt = get_template(key)
+            key = kt.render(**env)
+        if isinstance(value, dict):
+            if '@if' in value:
+                try:
+                    fn = get_function(value['@if'], list(env.keys()))
+                    incl = fn(**env)
+                    if incl is None or not incl:
+                        continue
+                except KeyError:
+                    continue
+                except AttributeError:
+                    continue
+                except TypeError:
+                    continue
+                except Exception as e:
+                    logger.error("=" * 80)
+                    logger.error("Error evaluating @if conditional: %s", value['@if'])
+                    transform_obj = env.get('transform', {})
+                    transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
+                    logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
+                    logger.error("Error type: %s", type(e).__name__)
+                    logger.error("Error message: %s", str(e))
+                    logger.error("Row-specific variables:")
+                    for key in ['row', 'name']:
+                        if key in env:
+                            v = env[key]
+                            try:
+                                logger.error("  %s: %s", key, str(v)[:200])
+                            except Exception:
+                                logger.error("  %s: <%s>", key, type(v).__name__)
+                    logger.error("=" * 80)
+                    raise RuntimeError(f"Error in @if conditional '{value['@if']}': {type(e).__name__}: {str(e)}") from e
+            if '@for' in value:
+                f = value['@for']
+                if isinstance(f, list):
+                    f = ' '.join(f)
+                variable_list, expression = f.split(" in ", 1)
+                variable_list = re.split(r',\s+', variable_list.strip())
+                val = value
+                if '@do' in value:
+                    val = value['@do']
+                else:
+                    del val['@for']
+                try:
+                    fn = get_function(expression, list(env.keys()))
+                    values = fn(**env)
+                    if values is not None:
+                        for v in values:
+                            if len(variable_list) == 1:
+                                v = [v]
+                            new_env = dict(env)
+                            for i, variable in enumerate(variable_list):
+                                new_env[variable] = v[i]
+                            child = clone(val)
+                            todo.append((child, parent, new_env))
+                except KeyError:
+                    pass
+                except Exception as e:
+                    logger.error("=" * 80)
+                    logger.error("Error in @for loop: %s", value['@for'])
+                    transform_obj = env.get('transform', {})
+                    transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
+                    logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
+                    logger.error("Error type: %s", type(e).__name__)
+                    logger.error("Error message: %s", str(e))
+                    logger.error("Expression: %s", expression)
+                    logger.error("Variables to assign: %s", variable_list)
+                    logger.error("Available variables: %s", sorted([k for k in env.keys() if not k.startswith('_')]))
+                    logger.error("=" * 80)
+                    raise RuntimeError(f"Error in @for loop '{value['@for']}': {type(e).__name__}: {str(e)}") from e
+                continue
+            if '@with' in value:
+                f = value['@with']
+                if isinstance(f, list):
+                    f = ' '.join(f)
+                expression, variable_list = f.split(" as ", 1)
+                variable_list = re.split(r',\s+', variable_list.strip())
+                val = value
+                if '@do' in value:
+                    val = value['@do']
+                else:
+                    del val['@with']
+                try:
+                    fn = get_function(expression, list(env.keys()))
+                    v = fn(**env)
+                    if v is not None:
+                        if len(variable_list) == 1 and not (
+                                isinstance(v, collections.Iterable)
+                                and not isinstance(v, str)):
+                            v = [v]
+                        new_env = dict(env)
+                        for i, variable in enumerate(variable_list):
+                            new_env[variable] = v[i]
+                        child = clone(val)
+                        todo.append((child, parent, new_env))
+                except KeyError:
+                    pass
+                except Exception as e:
+                    logger.error("=" * 80)
+                    logger.error("Error in @with expression: %s", value['@with'])
+                    transform_obj = env.get('transform', {})
+                    transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
+                    logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
+                    logger.error("Error type: %s", type(e).__name__)
+                    logger.error("Error message: %s", str(e))
+                    logger.error("Expression: %s", expression)
+                    logger.error("Variables to assign: %s", variable_list)
+                    logger.error("Available variables: %s", sorted([k for k in env.keys() if not k.startswith('_')]))
+                    logger.error("=" * 80)
+                    raise RuntimeError(f"Error in @with expression '{value['@with']}': {type(e).__name__}: {str(e)}") from e
+                continue
+            this = {}
+            for child in list(value.items()):
+                if child[0] == '@if':
+                    continue
+                if child[0] == '@for':
+                    continue
+                todo.append((child, this, env))
+        elif isinstance(value, list):
+            this = []
+            for child in value:
+                todo.append((child, this, env))
+        elif isinstance(value, str):
+            try:
+                template = get_template(str(value))
+                this = template.render(**env)
+            except Exception as e:
+                logger.error("=" * 80)
+                logger.error("Error rendering Jinja2 template: %s", value[:200] if len(value) > 200 else value)
+                transform_obj = env.get('transform', {})
+                transform_id = transform_obj.identifier if hasattr(transform_obj, 'identifier') else 'unknown'
+                logger.error("Transform: %s, Row: %s", transform_id, env.get('name', 'unknown'))
+                logger.error("Error type: %s", type(e).__name__)
+                logger.error("Error message: %s", str(e))
+                logger.error("Template variables referenced in template:")
+                # Try to extract variable references from the template
+                matches = TEMPLATE_VAR_PATTERN.findall(value)
+                if matches:
+                    for match in matches:
+                        var_name = match.strip().split('.')[0].split('[')[0].strip()
+                        if var_name in env:
+                            val = env[var_name]
+                            if type(val).__name__ == 'Element':
+                                # XML Element
+                                try:
+                                    val = xml.etree.ElementTree.tostring(val).decode('utf-8', errors='replace')[:200]
+                                except Exception:
+                                    val = "<XML Element>"
+                            else:
+                                try:
+                                    val = str(val)[:200]
+                                except Exception:
+                                    val = f"<{type(val).__name__}>"
+                            logger.error("  %s = %s", var_name, val)
+                        else:
+                            logger.error("  %s = <NOT FOUND>", var_name)
+                logger.error("=" * 80)
+                raise RuntimeError(f"Error rendering template: {type(e).__name__}: {str(e)}") from e
+        else:
+            this = value
+
+        if key is not None:
+            parent[key] = this
+        else:
+            parent.append(this)
+
+    return flatten_lists(result)
+
+def json_transform(transform, resources):
+    logger.info("Transform %s", transform.identifier)
+    tables = [u for u in transform[prov.used]]
+    variables = {}
+    for usage in transform[prov.qualifiedUsage]:
+        used = usage.value(prov.entity)
+        role = usage.value(prov.hadRole)
+        roleID  = role.value(dc.identifier)
+        variables[roleID.value] = resources[used.identifier]
+        #print "Using", used.identifier, "as", roleID.value
+
+    generated = list(transform.subjects(prov.wasGeneratedBy))[0]
+    logger.info("Generating %s", generated.identifier)
+
+    connected_downstream_graph = '''
+construct {
+   ?target ?p ?o
+} where {
+   ?source (<>|!<>)* ?target.
+   ?target ?p ?o.
+}
+'''
+    shape_graph = Graph()
+    for shape in transform.objects(dc.conformsTo):
+        if shape[RDF.type:shacl.NodeShape] or shape[RDF.type:shacl.PropertyShape]:
+            logger.info("Validating against SHACL shape %s", shape.identifier)
+            shape_graph += transform.graph.query(connected_downstream_graph,
+                                                 initBindings={"source":shape.identifier})
+    if generated.identifier in resources:
+        result = resources[generated.identifier]
+    else:
+        result = ConjunctiveGraph()
+        if generated[RDF.type : setl.Persisted]:
+            store = TrigStore()
+            result = ConjunctiveGraph(store=store)
+        if generated[RDF.type : setl.Persisted]:
+            tempdir = tempfile.mktemp()
+            logger.info("Persisting %s to %s", generated.identifier, tempdir)
+            result.store.open(tempdir, True)
+    s = transform.value(prov.value).value
+    try:
+        jslt = json.loads(s)
+    except json.JSONDecodeError as e:
+        logger.error("Error parsing JSON-LD template for transform %s", transform.identifier)
+        lineno = getattr(e, 'lineno', 0)
+        colno = getattr(e, 'colno', 0)
+        msg = getattr(e, 'msg', str(e))
+        logger.error("JSON parsing error at line %d, column %d: %s", lineno, colno, msg)
+        # Show context around the error (8 lines before, 3 after for better bracket matching)
+        lines = s.split("\n")
+        start_line = max(0, lineno - 8)
+        end_line = min(len(lines), lineno + 3)
+        logger.error("Template context:")
+        for i in range(start_line, end_line):
+            prefix = ">>> " if i == lineno - 1 else "    "
+            logger.error("%s%d: %s", prefix, i + 1, lines[i])
+        raise ValueError(f"Invalid JSON-LD template in transform {transform.identifier}: {msg} at line {lineno}, column {colno}") from e
+    except Exception as e:
+        logger.error("Error parsing JSON-LD template for transform %s: %s", transform.identifier, str(e))
+        logger.error("Template content:\n%s", s[:500])  # Show first 500 chars
+        raise ValueError(f"Invalid JSON-LD template in transform {transform.identifier}: {str(e)}") from e
+    context = transform.value(setl.hasContext)
+    if context is not None:
+        context = json.loads(context.value)
+    for t in tables:
+        logger.info("Using %s", t.identifier)
+        table = resources[t.identifier]
+        it = table
+        if isinstance(table, pandas.DataFrame):
+            #if run_samples:
+            #    table = table.head()
+            it = tqdm(table.iterrows(), total=table.shape[0])
+            #logger.info("Transforming %s rows.", len(table.index))
+        else:
+            logger.info("Transform %s", t.identifier)
+        for rowname, row in it:
+            if run_samples > 0 and rowname >= run_samples:
+                break
+            try:
+                root = None
+                data = None
+                root = {
+                    "@id": generated.identifier,
+                    "@graph": process_row(row, jslt, rowname, table, resources, transform, variables)
+                }
+                if context is not None:
+                    root['@context'] = context
+
+                #logger.debug(json.dumps(root, indent=4))
+                #before = len(result)
+                #graph = ConjunctiveGraph(identifier=generated.identifier)
+                #graph.parse(data=json.dumps(root),format="json-ld")
+                data = json.dumps(root)
+                #del root
+                
+                if len(shape_graph) > 0:
+                    d = ConjunctiveGraph()
+                    d.parse(data=data,format='json-ld')
+                    conforms, report, message = validate(d,
+                                                         shacl_graph=shape_graph,
+                                                         advanced=True,
+                                                         debug=False)
+                    if not conforms:
+                        print(message)
+                result.parse(data=data, format="json-ld")
+                #del data
+                #after = len(result)
+                #logger.debug("Row "+str(rowname))#+" added "+str(after-before)+" triples.")
+                #sys.stdout.flush()
+            except Exception as e:
+                logger.error("=" * 80)
+                logger.error("Error in transform %s while processing row %s", transform.identifier, rowname)
+                if isinstance(table, pandas.DataFrame):
+                    # Format row data with better NaN handling
+                    row_dict = {}
+                    for key, value in dict(row).items():
+                        if pandas.isna(value):
+                            row_dict[key] = "<empty/missing>"
+                        else:
+                            row_dict[key] = value
+                    logger.error("Row data: %s", row_dict)
+                else:
+                    logger.error("Row identifier: %s", rowname)
+                
+                # Try to provide more specific error information
+                error_type = type(e).__name__
+                if "JSON-LD" in str(e) or "json" in str(e).lower():
+                    logger.error("JSON-LD processing error: %s", str(e))
+                    if data is not None:
+                        logger.error("Generated JSON-LD (first 1000 chars):\n%s", data[:1000])
+                elif hasattr(e, 'lineno'):
+                    logger.error("%s at line %d: %s", error_type, e.lineno, str(e))
+                else:
+                    logger.error("%s: %s", error_type, str(e))
+                
+                logger.error("=" * 80)
+                raise RuntimeError(f"Failed to transform row {rowname} in transform {transform.identifier}: {error_type}: {str(e)}") from e
+
+    resources[generated.identifier] = result
+
+def transform(transform_resource, resources):
+    logger.info('Transforming %s',transform_resource.identifier)
+
+    transform_graph = ConjunctiveGraph()
+    for result in transform_graph.subjects(prov.wasGeneratedBy):
+        transform_graph = ConjunctiveGraph(identifier=result.identifier)
+
+    used = set(transform_resource[prov.used])
+
+    for csv in [u for u in used if u[RDF.type:csvw.Table]]:
+        csv_graph = Graph(store=transform_graph.store, identifier=csv)
+        csv_graph += graphs[csv.identifier]
+
+
+    for script in [u for u in used if u[RDF.type:setl.PythonScript]]:
+        logger.info("Script: %s", script.identifier)
+        s = script.value(prov.value).value
+        l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
+        gl = dict()
+        exec(s, gl, l)
+
+    for jsldt in [u for u in used if u[RDF.type:setl.PythonScript]]:
+        logger.info("Script: %s", script.identifier)
+        s = script.value(prov.value).value
+        l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
+        gl = dict()
+        exec(s, gl, l)
+
+    for update in [u for u in used if u[RDF.type:sp.Update]]:
+        logger.info("Update: %s", update.identifier)
+        query = update.value(prov.value).value
+        transform_graph.update(query)
+
+    for construct in [u for u in used if u[RDF.type:sp.Construct]]:
+        logger.info("Construct: %s", construct.identifier)
+        query = construct.value(prov.value).value
+        g = transform_graph.query(query)
+        transform_graph += g
+
+    for csv in [u for u in used if u[RDF.type:csvw.Table]]:
+        g = Graph(identifier=csv.identifier,store=transform_graph.store)
+        g.remove((None, None, None))
+        transform_graph.store.remove_graph(csv.identifier)
+
+    for result in transform_graph.subjects(prov.wasGeneratedBy):
+        graphs[result.identifier] = transform_graph
+
+def _load_open(generated):
+    if generated.identifier.startswith("file://"):
+        if os.name == 'nt': # skip the initial
+            filename = generated.identifier.replace('file:///','').replace('file://','')
+        else:
+            filename = generated.identifier.replace('file://','')
+
+    fh = open(filename, 'wb')
+    for type, pack in packers.items():
+        if generated[RDF.type : type]:
+            return pack(fh)
+    return fh
+
+def load(load_resource, resources):
+    logger.info('Load %s',load_resource.identifier)
+    file_graph = Dataset(default_union=True)
+    to_disk = False
+    for used in load_resource[prov.used]:
+        if used[RDF.type : setl.Persisted]:
+            to_disk = True
+            file_graph = Dataset(store='Sleepycat', default_union=True)
+            tempdir = tempfile.mkdtemp()
+            logger.debug("Gathering %s into %s", load_resource.identifier, tempdir)
+            file_graph.store.open(tempdir, True)
+            break
+    if len(list(load_resource[prov.used])) == 1:
+        logger.info("Using %s",load_resource.value(prov.used).identifier)
+        file_graph = resources[load_resource.value(prov.used).identifier]
+    else:
+        for used in load_resource[prov.used]:
+            logger.info("Using %s",used.identifier)
+            used_graph = resources[used.identifier]
+            file_graph.namespace_manager = used_graph.namespace_manager
+            #print used_graph.serialize(format="trig")
+            file_graph.addN(used_graph.quads())
+
+    for generated in load_resource.subjects(prov.wasGeneratedBy):
+        # TODO: support LDP-based loading
+        if generated[RDF.type:pv.File]:
+            fmt = generated.value(dc['format'])
+            if fmt is not None:
+                fmt = fmt.value
+            if fmt in formats:
+                fmt = formats[fmt]
+                #print fmt
+            with _load_open(generated) as o:
+                file_graph.serialize(o, format=fmt)
+
+        elif generated[RDF.type:sd.Service]:
+            from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
+            endpoint = generated.value(sd.endpoint, default=generated).identifier
+            store = SPARQLUpdateStore(endpoint, endpoint, autocommit=False)
+            endpoint_graph = Dataset(store=store, identifier=generated.identifier, default_union=True)
+            endpoint_graph.addN(file_graph.quads())
+            endpoint_graph.commit()
+    #if to_disk:
+    #    file_graph.close()
+
+
+actions = {
+    setl.Extract : extract,
+    setl.Transform : json_transform,
+    setl.Load : load,
+    setl.PythonScript : create_python_function,
+    setl.IsEmpty : isempty
+}
+
+def _setl(setl_graph):
+    """Internal implementation function. Use run_setl() instead.
+    
+    This function is deprecated and maintained for backward compatibility.
+    
+    Args:
+        setl_graph: A ConjunctiveGraph containing the SETL script.
+        
+    Returns:
+        dict: A dictionary of resources created during the SETL process.
+    """
+    import warnings
+    warnings.warn(
+        "_setl() is deprecated and will be removed in a future version. "
+        "Use run_setl() instead, which provides the same functionality with better documentation.",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    return run_setl(setl_graph)
+
+
+def run_setl(setl_graph):
+    """Execute a SETL (Semantic Extract, Transform, Load) script.
+    
+    This is the main entry point for programmatically running SETL scripts.
+    It processes a SETL graph containing extraction, transformation, and loading
+    instructions for working with RDF data.
+    
+    Args:
+        setl_graph (ConjunctiveGraph): A ConjunctiveGraph containing the SETL script
+            in RDF format. The graph should define resources with types from the
+            SETL vocabulary (http://purl.org/twc/vocab/setl/) including:
+            - setl:Extract: Extract data from sources
+            - setl:Transform: Transform data using JSON-LD templates
+            - setl:Load: Load data to destinations
+    
+    Returns:
+        dict: A dictionary mapping resource URIs to their generated content.
+            The dictionary contains:
+            - Extracted data (DataFrames, RDF graphs, etc.)
+            - Transformed RDF graphs
+            - References to action functions
+            
+    Example:
+        >>> from rdflib import ConjunctiveGraph
+        >>> from setlr import run_setl
+        >>> 
+        >>> # Load a SETL script
+        >>> setl_graph = ConjunctiveGraph()
+        >>> setl_graph.parse("my_script.setl.ttl", format="turtle")
+        >>> 
+        >>> # Execute the script
+        >>> resources = run_setl(setl_graph)
+        >>> 
+        >>> # Access generated resources
+        >>> output_graph = resources['http://example.com/output']
+    
+    Raises:
+        RuntimeError: If there are errors during extraction, transformation, or loading.
+        ValueError: If the SETL script contains invalid JSON-LD templates or configuration.
+    
+    Note:
+        This function initializes the module logger if not already set and processes
+        all SETL tasks in topological order based on their dependencies.
+    """
+    global logger
+    if logger is None:
+        logger = logging.getLogger(__name__)
+    resources = {}
+    resources.update(actions)
+
+    tasks = [setl_graph.resource(t) for t in get_order(setl_graph)]
+
+    for task in tasks:
+        action = [actions[t.identifier] for t in task[RDF.type] if t.identifier in actions]
+        if len(action) > 0:
+            action[0](task, resources)
+    return resources
+
+
+logger = None
+
+import click
+@click.command()
+@click.option('--quiet', '-q', is_flag=True, default=False, help="Minimize logging.")
+@click.option('-n', default=-1, help="Only process the first N rows.", type=int)
+#@click.option('--rdf-validation', default=None, help="Save the RDF validation report to this file.")
+#@click.option('--text-validation', default=None, help="Save the text validation report to this file.")
+@click.argument('script', type=click.Path(exists=True))
+def main(script, rdf_validation=None, text_validation=None, quiet=False, n=-1):
+    """Command-line interface for running SETL scripts.
+    
+    Args:
+        script: Path to the SETL script file (Turtle format).
+        quiet: If True, minimize logging output.
+        n: Only process the first N rows (-1 for all rows).
+    """
+    logging_level = logging.DEBUG
+    if quiet:
+        logging_level = logging.WARNING
+    logging.basicConfig(level=logging_level)
+
+    global logger
+    logger = logging.getLogger(__name__)
+
+    global run_samples
+    run_samples = n
+    setl_graph = ConjunctiveGraph()
+    content = open(script).read()
+    setl_graph.parse(data=content, format="turtle")
+
+    graphs = run_setl(setl_graph)
diff --git a/tests/setlr_test/test_api_compatibility.py b/tests/setlr_test/test_api_compatibility.py
new file mode 100644
index 0000000..698fbf2
--- /dev/null
+++ b/tests/setlr_test/test_api_compatibility.py
@@ -0,0 +1,98 @@
+import unittest
+import warnings
+from rdflib import ConjunctiveGraph
+
+# Import setlr module
+import setlr
+
+
+class TestBackwardCompatibility(unittest.TestCase):
+    """Test that backward compatibility with _setl() is maintained"""
+    
+    def test_setl_deprecated_warning(self):
+        """Test that _setl() shows deprecation warning"""
+        setl_graph = ConjunctiveGraph()
+        
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            result = setlr._setl(setl_graph)
+            
+            # Find our specific deprecation warning
+            our_warnings = [warning for warning in w if "_setl()" in str(warning.message)]
+            self.assertTrue(len(our_warnings) > 0, "Expected deprecation warning for _setl()")
+            self.assertIn("Use run_setl() instead", str(our_warnings[0].message))
+            
+    def test_setl_still_works(self):
+        """Test that _setl() still functions correctly despite deprecation"""
+        from rdflib import URIRef
+        setl_graph = ConjunctiveGraph()
+        
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            result = setlr._setl(setl_graph)
+            
+            # Check that result is a dictionary
+            self.assertIsInstance(result, dict)
+            # Check that it contains the expected actions (keys are URIRef objects)
+            self.assertIn(URIRef('http://purl.org/twc/vocab/setl/Extract'), result)
+            self.assertIn(URIRef('http://purl.org/twc/vocab/setl/Transform'), result)
+
+
+class TestNewAPI(unittest.TestCase):
+    """Test the new run_setl() API"""
+    
+    def test_run_setl_exists(self):
+        """Test that run_setl() is accessible"""
+        self.assertTrue(hasattr(setlr, 'run_setl'))
+        self.assertTrue(callable(setlr.run_setl))
+    
+    def test_run_setl_basic_functionality(self):
+        """Test that run_setl() works correctly"""
+        from rdflib import URIRef
+        setl_graph = ConjunctiveGraph()
+        result = setlr.run_setl(setl_graph)
+        
+        # Check that result is a dictionary
+        self.assertIsInstance(result, dict)
+        # Check that it contains the expected actions (keys are URIRef objects)
+        self.assertIn(URIRef('http://purl.org/twc/vocab/setl/Extract'), result)
+        self.assertIn(URIRef('http://purl.org/twc/vocab/setl/Transform'), result)
+        self.assertIn(URIRef('http://purl.org/twc/vocab/setl/Load'), result)
+    
+    def test_run_setl_no_deprecation_warning(self):
+        """Test that run_setl() does not produce deprecation warning"""
+        setl_graph = ConjunctiveGraph()
+        
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            result = setlr.run_setl(setl_graph)
+            
+            # Filter to only our deprecation warnings (not rdflib's)
+            our_warnings = [warning for warning in w if "_setl()" in str(warning.message)]
+            self.assertEqual(len(our_warnings), 0, "run_setl() should not produce deprecation warning")
+    
+    def test_run_setl_has_documentation(self):
+        """Test that run_setl() has proper documentation"""
+        self.assertIsNotNone(setlr.run_setl.__doc__)
+        self.assertIn("Execute a SETL", setlr.run_setl.__doc__)
+        self.assertIn("Args:", setlr.run_setl.__doc__)
+        self.assertIn("Returns:", setlr.run_setl.__doc__)
+        self.assertIn("Example:", setlr.run_setl.__doc__)
+    
+    def test_setl_and_run_setl_equivalent(self):
+        """Test that _setl() and run_setl() produce the same results"""
+        setl_graph1 = ConjunctiveGraph()
+        setl_graph2 = ConjunctiveGraph()
+        
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            result1 = setlr._setl(setl_graph1)
+        
+        result2 = setlr.run_setl(setl_graph2)
+        
+        # Both should return dictionaries with the same keys
+        self.assertEqual(set(result1.keys()), set(result2.keys()))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/setlr_test/test_error_messages.py b/tests/setlr_test/test_error_messages.py
index ec3d5fe..39eb791 100644
--- a/tests/setlr_test/test_error_messages.py
+++ b/tests/setlr_test/test_error_messages.py
@@ -19,19 +19,21 @@ class TestErrorMessages(unittest.TestCase):
     
     def setUp(self):
         """Set up logging to capture error messages"""
-        # Initialize the setlr logger
-        setlr.logger = logging.getLogger('setlr')
-        setlr.logger.setLevel(logging.ERROR)
+        # Initialize the setlr.core logger
+        import setlr.core
+        setlr.core.logger = logging.getLogger('setlr')
+        setlr.core.logger.setLevel(logging.ERROR)
         
         self.log_capture = StringIO()
         self.handler = logging.StreamHandler(self.log_capture)
         self.handler.setLevel(logging.ERROR)
-        setlr.logger.addHandler(self.handler)
+        setlr.core.logger.addHandler(self.handler)
         
     def tearDown(self):
         """Clean up logging"""
-        if setlr.logger:
-            setlr.logger.removeHandler(self.handler)
+        import setlr.core
+        if setlr.core.logger:
+            setlr.core.logger.removeHandler(self.handler)
     
     def test_invalid_json_template(self):
         """Test error message when JSON template has syntax errors"""

From f2ac96573d6f746315c0b9b144c23fe7729432d6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 04:46:06 +0000
Subject: [PATCH 3/9] Add migration documentation and verify all functionality

Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
 MIGRATION.md | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 MIGRATION.md

diff --git a/MIGRATION.md b/MIGRATION.md
new file mode 100644
index 0000000..459a730
--- /dev/null
+++ b/MIGRATION.md
@@ -0,0 +1,166 @@
+# Migration to pyproject.toml and API Improvements
+
+This document describes the changes made to migrate the project to modern Python packaging standards and improve the API.
+
+## Changes Made
+
+### 1. Migration to pyproject.toml
+
+The project has been migrated from `setup.py` to `pyproject.toml`, following PEP 517/518 standards for modern Python packaging.
+
+- **New file**: `pyproject.toml` - Contains all project metadata, dependencies, and build configuration
+- **Status of setup.py**: The old `setup.py` file is still present for compatibility but is no longer the primary packaging configuration
+
+### 2. Code Restructuring
+
+The implementation code has been moved from `setlr/__init__.py` to `setlr/core.py` following best practices:
+
+- **setlr/core.py**: Contains all implementation code (916+ lines)
+- **setlr/__init__.py**: Now serves as a clean public API interface (~90 lines)
+
+This separation provides:
+- Better code organization
+- Clearer public API surface
+- Easier maintenance
+- Improved IDE support and code navigation
+
+### 3. New Public API: `run_setl()`
+
+A new, well-documented public function `run_setl()` has been introduced:
+
+```python
+from rdflib import ConjunctiveGraph
+from setlr import run_setl
+
+# Load a SETL script
+setl_graph = ConjunctiveGraph()
+setl_graph.parse("my_script.setl.ttl", format="turtle")
+
+# Execute the script
+resources = run_setl(setl_graph)
+
+# Access generated resources
+output_graph = resources['http://example.com/output']
+```
+
+**Features:**
+- Comprehensive docstring with examples
+- Proper type hints in documentation
+- Clear description of parameters and return values
+- Usage examples
+
+### 4. Backward Compatibility
+
+The old `_setl()` function is still available for backward compatibility:
+
+```python
+from setlr import _setl  # Still works, but deprecated
+
+# Old code continues to work
+resources = _setl(setl_graph)
+```
+
+**Deprecation Warning:**
+- Using `_setl()` will emit a `DeprecationWarning`
+- The warning suggests using `run_setl()` instead
+- No breaking changes - existing code continues to work
+
+### 5. Exported API
+
+The following are now officially exported from the `setlr` package:
+
+**Main Functions:**
+- `run_setl()` - Primary API function (recommended)
+- `_setl()` - Deprecated, use `run_setl()` instead
+- `main()` - CLI entry point
+
+**Utility Functions:**
+- `read_csv()`, `read_excel()`, `read_json()`, `read_xml()`, `read_graph()`
+- `extract()`, `json_transform()`, `transform()`, `load()`
+- `isempty()`, `hash()`, `camelcase()`, `get_content()`
+
+**Namespaces:**
+- `csvw`, `ov`, `setl`, `prov`, `pv`, `sp`, `sd`, `dc`, `void`, `shacl`, `api_vocab`
+
+## Migration Guide for Users
+
+### If you were using `_setl()`:
+
+**Before:**
+```python
+from setlr import _setl
+
+resources = _setl(setl_graph)
+```
+
+**After (recommended):**
+```python
+from setlr import run_setl
+
+resources = run_setl(setl_graph)
+```
+
+**Note:** Your old code will continue to work, but you'll see a deprecation warning. Update at your convenience.
+
+### If you were importing internal functions:
+
+**Before:**
+```python
+from setlr import read_csv, extract
+```
+
+**After:**
+```python
+from setlr import read_csv, extract  # Still works!
+```
+
+No changes needed - all utility functions are properly exported.
+
+## For Package Maintainers
+
+### Building the Package
+
+With pyproject.toml, you can now build the package using modern tools:
+
+```bash
+# Install build tool
+pip install build
+
+# Build the package
+python -m build
+```
+
+This creates both wheel and source distributions in the `dist/` directory.
+
+### Installing from Source
+
+```bash
+# Development installation
+pip install -e .
+
+# Regular installation
+pip install .
+```
+
+### Running Tests
+
+```bash
+# Install test dependencies
+pip install nose2 coverage
+
+# Run tests
+nose2 --verbose
+```
+
+## Benefits of This Migration
+
+1. **Modern Standards**: Uses PEP 517/518 standards for Python packaging
+2. **Better Documentation**: Clear, comprehensive API documentation
+3. **Improved Structure**: Cleaner separation between public API and implementation
+4. **Backward Compatible**: No breaking changes for existing users
+5. **Future-Proof**: Follows current Python best practices
+6. **Better IDE Support**: Clearer module structure aids code completion and navigation
+
+## Questions or Issues?
+
+If you encounter any issues with the migration or have questions about the new API, please open an issue on GitHub.

From d1a4dff9f9a29f570a9d02dd3559a2fa8e75d28f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 04:48:07 +0000
Subject: [PATCH 4/9] Remove pip from runtime dependencies (not needed)

Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6facde3..0b1c7f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,6 @@ classifiers = [
 requires-python = ">=3.8"
 dependencies = [
     "future",
-    "pip>=9.0.0",
     "cython",
     "numpy",
     "rdflib>=6.0.0",

From 912b6007d8f9c093b3e40d1d0c6b72230ea3c855 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 04:49:27 +0000
Subject: [PATCH 5/9] Include _setl in __all__ for wildcard import
 compatibility

Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
 setlr/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/setlr/__init__.py b/setlr/__init__.py
index eba796a..96fd12b 100644
--- a/setlr/__init__.py
+++ b/setlr/__init__.py
@@ -57,6 +57,7 @@
 # Define what gets imported with "from setlr import *"
 __all__ = [
     'run_setl',
+    '_setl',  # Deprecated but included for backward compatibility with wildcard imports
     'main',
     # Include commonly used utilities
     'read_csv',
@@ -84,7 +85,4 @@
     'void',
     'shacl',
     'api_vocab',
-    # Keep _setl for backward compatibility but not in __all__ to discourage use
 ]
-
-# Note: _setl is still importable for backward compatibility but not in __all__

From 6f7583e2168ee4a07b861de10ae607ceaaeca060 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 16:04:36 +0000
Subject: [PATCH 6/9] Add development scripts for bootstrap, build, and release

Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
 script/README.md | 101 +++++++++++++++++++++++++++++++++++++++++++++
 script/bootstrap |  80 ++++++++++++++++++++++++++++++++++++
 script/build     |  79 +++++++++++++++++++++++++++++++++++
 script/release   | 105 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 365 insertions(+)
 create mode 100644 script/README.md
 create mode 100755 script/bootstrap
 create mode 100755 script/build
 create mode 100755 script/release

diff --git a/script/README.md b/script/README.md
new file mode 100644
index 0000000..bc0aa41
--- /dev/null
+++ b/script/README.md
@@ -0,0 +1,101 @@
+# Development Scripts
+
+This directory contains scripts for setting up, building, and releasing the setlr project.
+
+## Scripts
+
+### `bootstrap`
+
+Set up a virtual environment suitable for developing and using the project, including all package requirements for build and release.
+
+**Usage:**
+```bash
+./script/bootstrap
+```
+
+This script will:
+- Create a Python virtual environment in `venv/`
+- Install the project in editable mode with all dependencies
+- Install development dependencies (nose2, coverage, flake8, pylint, etc.)
+- Install build and release tools (build, wheel, twine)
+
+**After running bootstrap:**
+```bash
+source venv/bin/activate  # Activate the virtual environment
+```
+
+### `build`
+
+Build the project packages and run all tests and checks.
+
+**Usage:**
+```bash
+./script/build
+```
+
+This script will:
+- Activate the virtual environment (if it exists)
+- Clean previous build artifacts
+- Run linting checks with flake8
+- Run all tests with nose2
+- Build distribution packages (wheel and source tarball)
+
+**Output:**
+- `dist/setlr-*.whl` - Wheel distribution
+- `dist/setlr-*.tar.gz` - Source distribution
+
+### `release`
+
+Upload the current version of the project to PyPI using twine.
+
+**Usage:**
+```bash
+./script/release
+```
+
+This script will:
+- Activate the virtual environment (if it exists)
+- Check that distribution files exist
+- Validate distribution files with twine
+- Prompt for confirmation before uploading
+- Upload to PyPI (requires PyPI credentials or API token)
+
+**Prerequisites:**
+- Run `./script/build` first to create distribution files
+- Have PyPI credentials or API token ready
+
+**Authentication:**
+You can provide credentials via:
+- Interactive prompt (default)
+- Environment variables: `TWINE_USERNAME` and `TWINE_PASSWORD`
+- PyPI API token: Set `TWINE_PASSWORD` to your `pypi-...` token
+
+## Typical Workflow
+
+```bash
+# 1. Set up development environment (first time only)
+./script/bootstrap
+source venv/bin/activate
+
+# 2. Make your changes to the code
+# ... edit files ...
+
+# 3. Build and test
+./script/build
+
+# 4. If all tests pass and you're ready to release
+./script/release
+```
+
+## Requirements
+
+- Python 3.8 or higher
+- Bash shell (Linux/macOS/WSL on Windows)
+- Internet connection (for downloading dependencies)
+
+## Notes
+
+- The virtual environment (`venv/`) is automatically excluded from git via `.gitignore`
+- All scripts use color output for better readability
+- The `build` script will fail if tests don't pass
+- The `release` script requires confirmation before uploading to PyPI
diff --git a/script/bootstrap b/script/bootstrap
new file mode 100755
index 0000000..208e971
--- /dev/null
+++ b/script/bootstrap
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# Bootstrap script: Set up a virtual environment suitable for developing and using the project
+set -e
+
+# Colors for output
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}==> Setting up development environment for setlr${NC}"
+
+# Determine project root (one level up from script directory)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+cd "${PROJECT_ROOT}"
+
+# Check if Python is available
+if ! command -v python3 &> /dev/null; then
+    echo "Error: python3 is not installed"
+    exit 1
+fi
+
+PYTHON_VERSION=$(python3 --version)
+echo -e "${GREEN}✓${NC} Found ${PYTHON_VERSION}"
+
+# Create virtual environment if it doesn't exist
+VENV_DIR="${PROJECT_ROOT}/venv"
+if [ ! -d "${VENV_DIR}" ]; then
+    echo -e "${BLUE}==> Creating virtual environment at ${VENV_DIR}${NC}"
+    python3 -m venv "${VENV_DIR}"
+    echo -e "${GREEN}✓${NC} Virtual environment created"
+else
+    echo -e "${GREEN}✓${NC} Virtual environment already exists at ${VENV_DIR}"
+fi
+
+# Activate virtual environment
+source "${VENV_DIR}/bin/activate"
+
+# Upgrade pip
+echo -e "${BLUE}==> Upgrading pip${NC}"
+pip install --upgrade pip
+
+# Install the project in editable mode with all dependencies
+echo -e "${BLUE}==> Installing setlr in editable mode${NC}"
+pip install -e .
+
+# Install development and build dependencies
+echo -e "${BLUE}==> Installing development dependencies${NC}"
+pip install \
+    nose2 \
+    coverage \
+    flake8 \
+    pycodestyle \
+    pylint \
+    vulture
+
+echo -e "${BLUE}==> Installing build dependencies${NC}"
+pip install \
+    build \
+    wheel \
+    twine
+
+echo -e "${GREEN}✓${NC} All dependencies installed"
+
+# Display next steps
+echo ""
+echo -e "${GREEN}========================================${NC}"
+echo -e "${GREEN}Bootstrap complete!${NC}"
+echo -e "${GREEN}========================================${NC}"
+echo ""
+echo "To activate the virtual environment, run:"
+echo "  source venv/bin/activate"
+echo ""
+echo "To build the project:"
+echo "  ./script/build"
+echo ""
+echo "To release to PyPI:"
+echo "  ./script/release"
+echo ""
diff --git a/script/build b/script/build
new file mode 100755
index 0000000..535207e
--- /dev/null
+++ b/script/build
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# Build script: Build the project packages and run all tests and checks
+set -e
+
+# Colors for output
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}==> Building setlr${NC}"
+
+# Determine project root (one level up from script directory)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+cd "${PROJECT_ROOT}"
+
+# Check if virtual environment exists
+VENV_DIR="${PROJECT_ROOT}/venv"
+if [ ! -d "${VENV_DIR}" ]; then
+    echo -e "${YELLOW}Warning: Virtual environment not found. Run ./script/bootstrap first.${NC}"
+    echo "Continuing with system Python..."
+else
+    # Activate virtual environment
+    source "${VENV_DIR}/bin/activate"
+    echo -e "${GREEN}✓${NC} Using virtual environment"
+fi
+
+# Clean previous builds
+echo -e "${BLUE}==> Cleaning previous builds${NC}"
+rm -rf build/ dist/ *.egg-info setlr.egg-info
+echo -e "${GREEN}✓${NC} Cleaned build artifacts"
+
+# Run linting checks
+echo -e "${BLUE}==> Running linting checks${NC}"
+echo "Running flake8..."
+if flake8 setlr/ tests/ --exclude=setlr/iterparse_filter.py 2>&1 | head -20; then
+    echo -e "${GREEN}✓${NC} flake8 passed (showing first 20 lines)"
+else
+    echo -e "${YELLOW}⚠${NC} flake8 found issues (expected for existing code)"
+fi
+
+# Run tests
+echo -e "${BLUE}==> Running tests${NC}"
+mkdir -p test-results
+
+if nose2 --verbose; then
+    echo -e "${GREEN}✓${NC} All tests passed"
+else
+    echo -e "${RED}✗${NC} Tests failed"
+    exit 1
+fi
+
+# Build the package
+echo -e "${BLUE}==> Building package${NC}"
+python -m build
+
+if [ $? -eq 0 ]; then
+    echo -e "${GREEN}✓${NC} Package built successfully"
+    echo ""
+    echo "Build artifacts created:"
+    ls -lh dist/
+else
+    echo -e "${RED}✗${NC} Build failed"
+    exit 1
+fi
+
+echo ""
+echo -e "${GREEN}========================================${NC}"
+echo -e "${GREEN}Build complete!${NC}"
+echo -e "${GREEN}========================================${NC}"
+echo ""
+echo "Distribution files are in: dist/"
+echo ""
+echo "To release to PyPI, run:"
+echo "  ./script/release"
+echo ""
diff --git a/script/release b/script/release
new file mode 100755
index 0000000..03e8666
--- /dev/null
+++ b/script/release
@@ -0,0 +1,105 @@
+#!/usr/bin/env bash
+# Release script: Upload the current version of the project to PyPI using twine
+set -e
+
+# Colors for output
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}==> Releasing setlr to PyPI${NC}"
+
+# Determine project root (one level up from script directory)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+cd "${PROJECT_ROOT}"
+
+# Check if virtual environment exists
+VENV_DIR="${PROJECT_ROOT}/venv"
+if [ ! -d "${VENV_DIR}" ]; then
+    echo -e "${YELLOW}Warning: Virtual environment not found. Run ./script/bootstrap first.${NC}"
+    echo "Continuing with system Python..."
+else
+    # Activate virtual environment
+    source "${VENV_DIR}/bin/activate"
+    echo -e "${GREEN}✓${NC} Using virtual environment"
+fi
+
+# Check if dist/ directory exists and has files
+if [ ! -d "dist" ] || [ -z "$(ls -A dist)" ]; then
+    echo -e "${RED}✗${NC} No distribution files found in dist/"
+    echo "Run ./script/build first to create distribution files."
+    exit 1
+fi
+
+# Get version from pyproject.toml
+VERSION=$(grep -E "^version\s*=" pyproject.toml | sed -E 's/.*"(.*)".*/\1/')
+echo -e "${BLUE}==> Releasing version ${VERSION}${NC}"
+
+# Display distribution files
+echo ""
+echo "Distribution files to upload:"
+ls -lh dist/
+echo ""
+
+# Check if twine is installed
+if ! command -v twine &> /dev/null; then
+    echo -e "${RED}✗${NC} twine is not installed"
+    echo "Run ./script/bootstrap to install all dependencies."
+    exit 1
+fi
+
+# Check distribution files with twine
+echo -e "${BLUE}==> Checking distribution files${NC}"
+twine check dist/*
+if [ $? -eq 0 ]; then
+    echo -e "${GREEN}✓${NC} Distribution files are valid"
+else
+    echo -e "${RED}✗${NC} Distribution files have issues"
+    exit 1
+fi
+
+# Prompt for confirmation
+echo ""
+echo -e "${YELLOW}========================================${NC}"
+echo -e "${YELLOW}WARNING: This will upload to PyPI!${NC}"
+echo -e "${YELLOW}========================================${NC}"
+echo ""
+echo "Version: ${VERSION}"
+echo "Files to upload:"
+ls -1 dist/
+echo ""
+read -p "Are you sure you want to release to PyPI? (yes/no): " -r
+echo
+
+if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
+    echo "Release cancelled."
+    exit 0
+fi
+
+# Upload to PyPI
+echo -e "${BLUE}==> Uploading to PyPI${NC}"
+echo ""
+echo "Note: You will be prompted for your PyPI credentials."
+echo "You can also set TWINE_USERNAME and TWINE_PASSWORD environment variables,"
+echo "or use a PyPI API token (set TWINE_PASSWORD to 'pypi-...' token)."
+echo ""
+
+twine upload dist/*
+
+if [ $? -eq 0 ]; then
+    echo ""
+    echo -e "${GREEN}========================================${NC}"
+    echo -e "${GREEN}Release successful!${NC}"
+    echo -e "${GREEN}========================================${NC}"
+    echo ""
+    echo "Version ${VERSION} has been uploaded to PyPI"
+    echo "View at: https://pypi.org/project/setlr/${VERSION}/"
+    echo ""
+else
+    echo -e "${RED}✗${NC} Upload failed"
+    exit 1
+fi

From 9340c48249c78bd3d1241036d10e6f705aee6a3e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 16:18:28 +0000
Subject: [PATCH 7/9] Fix build script to properly handle and report failures
 with set -e and set -o pipefail

Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
 script/build | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/script/build b/script/build
index 535207e..1ef5073 100755
--- a/script/build
+++ b/script/build
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 # Build script: Build the project packages and run all tests and checks
 set -e
+set -o pipefail  # Ensure pipeline failures are caught
 
 # Colors for output
 GREEN='\033[0;32m'
@@ -36,17 +37,38 @@ echo -e "${GREEN}✓${NC} Cleaned build artifacts"
 # Run linting checks
 echo -e "${BLUE}==> Running linting checks${NC}"
 echo "Running flake8..."
-if flake8 setlr/ tests/ --exclude=setlr/iterparse_filter.py 2>&1 | head -20; then
-    echo -e "${GREEN}✓${NC} flake8 passed (showing first 20 lines)"
+
+# Capture flake8 output and exit code
+# Temporarily disable errexit for this block since we want to capture the exit code
+set +e
+FLAKE8_OUTPUT=$(flake8 setlr/ tests/ --exclude=setlr/iterparse_filter.py 2>&1)
+FLAKE8_EXIT=$?
+set -e
+
+# Show first 20 lines of output
+echo "$FLAKE8_OUTPUT" | head -20 || true
+
+if [ $FLAKE8_EXIT -eq 0 ]; then
+    echo -e "${GREEN}✓${NC} flake8 passed - no issues found"
 else
-    echo -e "${YELLOW}⚠${NC} flake8 found issues (expected for existing code)"
+    echo -e "${YELLOW}⚠${NC} flake8 found issues (exit code: ${FLAKE8_EXIT})"
+    echo "  Note: Build continues despite linting issues (pre-existing code has many issues)"
+    echo "  To see all issues: flake8 setlr/ tests/ --exclude=setlr/iterparse_filter.py"
+    # Uncomment the next line to make linting failures stop the build:
+    # exit 1
 fi
 
 # Run tests
 echo -e "${BLUE}==> Running tests${NC}"
 mkdir -p test-results
 
-if nose2 --verbose; then
+# Temporarily disable errexit to capture the test exit code
+set +e
+nose2 --verbose
+TEST_EXIT=$?
+set -e
+
+if [ $TEST_EXIT -eq 0 ]; then
     echo -e "${GREEN}✓${NC} All tests passed"
 else
     echo -e "${RED}✗${NC} Tests failed"
@@ -55,9 +77,14 @@ fi
 
 # Build the package
 echo -e "${BLUE}==> Building package${NC}"
+
+# Temporarily disable errexit to capture the build exit code
+set +e
 python -m build
+BUILD_EXIT=$?
+set -e
 
-if [ $? -eq 0 ]; then
+if [ $BUILD_EXIT -eq 0 ]; then
     echo -e "${GREEN}✓${NC} Package built successfully"
     echo ""
     echo "Build artifacts created:"

From eb360201bd2b552012f113fa1c8c6ada92f8c39c Mon Sep 17 00:00:00 2001
From: Jamie McCusker <mccusj2@rpi.edu>
Date: Sun, 18 Jan 2026 13:10:56 -0500
Subject: [PATCH 8/9] tweaks to pass flake8 checks

---
 script/build                               |   2 +-
 setlr/__init__.py                          |   1 +
 setlr/core.py                              | 232 ++++++++++-----------
 setlr/trig_store.py                        |   8 +-
 tests/setlr_test/test_api_compatibility.py |   4 +-
 tests/setlr_test/test_error_messages.py    |   3 -
 tests/setlr_test/test_read_json.py         |   2 +-
 7 files changed, 120 insertions(+), 132 deletions(-)

diff --git a/script/build b/script/build
index 1ef5073..8b5ba61 100755
--- a/script/build
+++ b/script/build
@@ -55,7 +55,7 @@ else
     echo "  Note: Build continues despite linting issues (pre-existing code has many issues)"
     echo "  To see all issues: flake8 setlr/ tests/ --exclude=setlr/iterparse_filter.py"
     # Uncomment the next line to make linting failures stop the build:
-    # exit 1
+    exit 1
 fi
 
 # Run tests
diff --git a/setlr/__init__.py b/setlr/__init__.py
index 96fd12b..5db0092 100644
--- a/setlr/__init__.py
+++ b/setlr/__init__.py
@@ -60,6 +60,7 @@
     '_setl',  # Deprecated but included for backward compatibility with wildcard imports
     'main',
     # Include commonly used utilities
+    'logger',
     'read_csv',
     'read_excel', 
     'read_json',
diff --git a/setlr/core.py b/setlr/core.py
index e728bcd..3a0528b 100644
--- a/setlr/core.py
+++ b/setlr/core.py
@@ -1,25 +1,21 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from builtins import str
-from builtins import next
-from builtins import object
-from rdflib import *
 from rdflib.util import guess_format
 import rdflib
 import csv
 import json
-import sys, collections
+import sys
+import collections
 import requests
 import pandas
 import re
 import os
-from six import text_type as str
+import click
 
 from jinja2 import Template
 from toposort import toposort_flatten
 from numpy import isnan
-import uuid
 import tempfile
 import ijson
 from . import iterparse_filter
@@ -41,26 +37,28 @@
 
 from .trig_store import TrigStore
 
+from requests_testadapter import Resp
+
+
 def hash(value):
     m = hashlib.sha256()
     m.update(value.encode('utf-8'))
     return m.hexdigest()
 
-csvw = Namespace('http://www.w3.org/ns/csvw#')
-ov = Namespace('http://open.vocab.org/terms/')
-setl = Namespace('http://purl.org/twc/vocab/setl/')
-prov = Namespace('http://www.w3.org/ns/prov#')
-pv = Namespace('http://purl.org/net/provenance/ns#')
-sp = Namespace('http://spinrdf.org/sp#')
-sd = Namespace('http://www.w3.org/ns/sparql-service-description#')
-dc = Namespace('http://purl.org/dc/terms/')
-void = Namespace('http://rdfs.org/ns/void#')
-shacl = Namespace('http://www.w3.org/ns/shacl#')
-api_vocab = Namespace('http://purl.org/linked-data/api/vocab#')
+csvw = rdflib.Namespace('http://www.w3.org/ns/csvw#')
+ov = rdflib.Namespace('http://open.vocab.org/terms/')
+setl = rdflib.Namespace('http://purl.org/twc/vocab/setl/')
+prov = rdflib.Namespace('http://www.w3.org/ns/prov#')
+pv = rdflib.Namespace('http://purl.org/net/provenance/ns#')
+sp = rdflib.Namespace('http://spinrdf.org/sp#')
+sd = rdflib.Namespace('http://www.w3.org/ns/sparql-service-description#')
+dc = rdflib.Namespace('http://purl.org/dc/terms/')
+void = rdflib.Namespace('http://rdfs.org/ns/void#')
+shacl = rdflib.Namespace('http://www.w3.org/ns/shacl#')
+api_vocab = rdflib.Namespace('http://purl.org/linked-data/api/vocab#')
 
 sys.setrecursionlimit(10000)
 
-from requests_testadapter import Resp
 
 # Regex pattern for extracting Jinja2 template variables (compiled once for performance)
 TEMPLATE_VAR_PATTERN = re.compile(r'\{\{([^}]+)\}\}')
@@ -87,11 +85,11 @@ def send(self, request, stream=False, timeout=None,
 
 datatypeConverters = collections.defaultdict(lambda: str)
 datatypeConverters.update({
-    XSD.string: str,
-    XSD.decimal: float,
-    XSD.integer: int,
-    XSD.float: float,
-    XSD.double: float
+    rdflib.XSD.string: str,
+    rdflib.XSD.decimal: float,
+    rdflib.XSD.integer: int,
+    rdflib.XSD.float: float,
+    rdflib.XSD.double: float
 })
 
 run_samples = -1
@@ -107,9 +105,9 @@ def send(self, request, stream=False, timeout=None,
 
 def read_csv(location, result):
     args = dict(
-        sep = result.value(csvw.delimiter, default=Literal(",")).value,
-        #header = result.value(csvw.headerRow, default=Literal(0)).value),
-        skiprows = result.value(csvw.skipRows, default=Literal(0)).value,
+        sep = result.value(csvw.delimiter, default=rdflib.Literal(",")).value,
+        #header = result.value(csvw.headerRow, default=rdflib.Literal(0)).value),
+        skiprows = result.value(csvw.skipRows, default=rdflib.Literal(0)).value,
         dtype=str,
         # dtype = object    # Does not seem to play well with future and python2/3 conversion
     )
@@ -122,8 +120,8 @@ def read_csv(location, result):
 
 def read_graph(location, result, g = None):
     if g is None:
-        g = ConjunctiveGraph()
-    graph = ConjunctiveGraph(store=g.store, identifier=result.identifier)
+        g = rdflib.ConjunctiveGraph()
+    graph = rdflib.ConjunctiveGraph(store=g.store, identifier=result.identifier)
     if len(graph) == 0:
         data = get_content(location, result).read()
         f = guess_format(location)
@@ -131,14 +129,13 @@ def read_graph(location, result, g = None):
             try:
                 graph.parse(data=data, format=fmt)
                 break
-            except Exception as e:
-                #print e
+            except Exception:
                 pass
         if len(graph) == 0:
             logger.error("Could not parse graph: %s", location)
-        if result[RDF.type:OWL.Ontology]:
-            for ontology in graph.subjects(RDF.type, OWL.Ontology):
-                imports = [graph.resource(x) for x in graph.objects(ontology, OWL.imports)]
+        if result[rdflib.RDF.type:rdflib.OWL.Ontology]:
+            for ontology in graph.subjects(rdflib.RDF.type, rdflib.OWL.Ontology):
+                imports = [graph.resource(x) for x in graph.objects(ontology, rdflib.OWL.imports)]
                 for i in imports:
                     read_graph(i.identifier, i, g = g)
     return g
@@ -184,7 +181,7 @@ def __exit__(self, err_type, value, tracebock):
 
     def read(self, n=None):
         if n is None:
-            return self.data + b''.join(l for l in self.iter)
+            return self.data + b''.join(line for line in self.iter)
         else:
             while len(self.data) < n:
                 try:
@@ -212,10 +209,10 @@ def get_content(location, result):
         response = handler(location)
         if response is not None:
             break
-    if result[RDF.type:setl.Tempfile]:
+    if result[rdflib.RDF.type:setl.Tempfile]:
         result = to_tempfile(response)
 
-    for t in result[RDF.type]:
+    for t in result[rdflib.RDF.type]:
         # Do we know how to unpack this?
         if t.identifier in unpackers:
             response = unpackers[t.identifier](response)
@@ -249,9 +246,9 @@ def unpack_zipfile(f):
 
 def read_excel(location, result):
     args = dict(
-        sheet_name = result.value(setl.sheetname, default=Literal(0)).value,
-        header = [int(x) for x in result.value(csvw.headerRow, default=Literal('0')).value.split(',')],
-        skiprows = result.value(csvw.skipRows, default=Literal(0)).value
+        sheet_name = result.value(setl.sheetname, default=rdflib.Literal(0)).value,
+        header = [int(x) for x in result.value(csvw.headerRow, default=rdflib.Literal('0')).value.split(',')],
+        skiprows = result.value(csvw.skipRows, default=rdflib.Literal(0)).value
     )
     if result.value(csvw.header):
         args['header'] = [result.value(csvw.header).value]
@@ -261,7 +258,7 @@ def read_excel(location, result):
 
 def read_xml(location, result):
     validate_dtd = False
-    if result[RDF.type:setl.DTDValidatedXML]:
+    if result[rdflib.RDF.type:setl.DTDValidatedXML]:
         validate_dtd = True
     f = iterparse_filter.IterParseFilter(validate_dtd=validate_dtd)
     if result.value(setl.xpath) is None:
@@ -289,27 +286,27 @@ def read_json(location, result):
     setl.SAS7BDAT : lambda location, result: pandas.read_sas(get_content(location, result), format='sas7bdat'),
     setl.Excel : read_excel,
     csvw.Table : read_csv,
-    OWL.Ontology : read_graph,
+    rdflib.OWL.Ontology : read_graph,
     void.Dataset : read_graph,
     setl.JSON : read_json,
     setl.XML : read_xml,
-    URIRef("https://www.iana.org/assignments/media-types/text/plain") : lambda location, result: get_content(location, result)
+    rdflib.URIRef("https://www.iana.org/assignments/media-types/text/plain") : lambda location, result: get_content(location, result)
 }
 
 
 try:
     from bs4 import BeautifulSoup
     extractors[setl.HTML] = lambda location, result: BeautifulSoup(get_content(location, result).read(), 'html.parser')
-except Exception as e:
+except Exception:
     pass
 
 
 def load_csv(csv_resource):
     column_descriptions = {}
     for col in csv_resource[csvw.column]:
-        label = col.value(RDFS.label).value
+        label = col.value(rdflib.RDFS.label).value
         column_descriptions[label] = col
-    csv_graph = Graph(identifier=csv_resource)
+    csv_graph = rdflib.Graph(identifier=csv_resource)
     s = [x for x in csv.reader(open(str(csv_resource.value(csvw.url).identifier).replace("file://","")),
                    delimiter=str(csv_resource.value(csvw.delimiter,default=",").value),
                    quotechar=str(csv_resource.value(csvw.quoteChar,default='"').value))]
@@ -326,24 +323,24 @@ def load_csv(csv_resource):
                 col_desc = None
                 if h in column_descriptions:
                     col_desc = column_descriptions[h]
-                col = csv_graph.resource(URIRef("urn:col_"+str(h)))
-                col.add(RDFS.label, Literal(h))
-                col.add(ov.csvCol, Literal(j))
+                col = csv_graph.resource(rdflib.URIRef("urn:col_"+str(h)))
+                col.add(rdflib.RDFS.label, rdflib.Literal(h))
+                col.add(ov.csvCol, rdflib.Literal(j))
                 if col_desc is not None:
-                    col.add(RDFS.range, col_desc.value(RDFS.range, default=XSD.string))
+                    col.add(rdflib.RDFS.range, col_desc.value(rdflib.RDFS.range, default=rdflib.XSD.string))
                 properties.append(col)
                 propertyMap[h] = col
             continue
         res = csv_graph.resource(csv_resource.identifier+"_row_"+str(i))
-        res.add(RDF.type, csvw.Row)
-        res.add(csvw.rownum, Literal(i))
+        res.add(rdflib.RDF.type, csvw.Row)
+        res.add(csvw.rownum, rdflib.Literal(i))
         for j, value in enumerate(r):
             if skip_value is not None and skip_value == value:
                 continue
             #print i, j, value
             prop = properties[j]
-            datatype = prop.value(RDFS['range'], default=XSD.string)
-            lit =  Literal(value, datatype=datatype.identifier)
+            datatype = prop.value(rdflib.RDFS['range'], default=rdflib.XSD.string)
+            lit = rdflib.Literal(value, datatype=datatype.identifier)
             #print i, prop.identifier, lit.n3()
             res.add(prop.identifier, lit)
     logger.debug("Table has %s rows, %s columns, and %s triples", len(s), len(header), len(csv_graph))
@@ -377,7 +374,7 @@ def get_order(setl_graph):
     nodes = collections.defaultdict(set)
 
     for typ in actions:
-        for task in setl_graph.subjects(RDF.type, typ):
+        for task in setl_graph.subjects(rdflib.RDF.type, typ):
             task = setl_graph.resource(task)
             for used in task[prov.used]:
                 nodes[task.identifier].add(used.identifier)
@@ -399,7 +396,7 @@ def extract(e, resources):
     for result in e.subjects(prov.wasGeneratedBy):
         if used is None:
             used = result
-        for t in result[RDF.type]:
+        for t in result[rdflib.RDF.type]:
             # Do we know how to generate this?
             if t.identifier in extractors:
                 logger.info("Using %s", used.identifier)
@@ -413,7 +410,7 @@ def isempty(value):
         return value is None
 
 def clone(value):
-    __doc__ = '''This is only a JSON-level cloning of objects. Atomic objects are invariant, and don't need to be cloned.'''
+    '''This is only a JSON-level cloning of objects. Atomic objects are invariant, and don't need to be cloned.'''
     if isinstance(value, list):
         return [x for x in value]
     elif isinstance(value, dict):
@@ -459,23 +456,24 @@ def flatten_lists(o):
 
 def process_row(row, template, rowname, table, resources, transform, variables):
     result = []
-    e = {'row':row,
-         'name': rowname,
-         'table': table,
-         'resources': resources,
-         'template': template,
-         "transform": transform,
-         "setl_graph": transform.graph,
-         "isempty":isempty,
-         "slugify" : slugify,
-         "camelcase" : camelcase,
-         "hash":hash,
-         "isinstance":isinstance,
-         "str":str,
-         "float":float,
-         "int":int,
-         "chain": lambda x: chain(*x),
-         "list":list
+    e = {
+        'row':row,
+        'name': rowname,
+        'table': table,
+        'resources': resources,
+        'template': template,
+        "transform": transform,
+        "setl_graph": transform.graph,
+        "isempty":isempty,
+        "slugify" : slugify,
+        "camelcase" : camelcase,
+        "hash":hash,
+        "isinstance":isinstance,
+        "str":str,
+        "float":float,
+        "int":int,
+        "chain": lambda x: chain(*x),
+        "list":list
     }
     e.update(variables)
     e.update(rdflib.__dict__)
@@ -577,9 +575,7 @@ def process_row(row, template, rowname, table, resources, transform, variables):
                     fn = get_function(expression, list(env.keys()))
                     v = fn(**env)
                     if v is not None:
-                        if len(variable_list) == 1 and not (
-                                isinstance(v, collections.Iterable)
-                                and not isinstance(v, str)):
+                        if (len(variable_list) == 1 and not (isinstance(v, collections.Iterable) and not isinstance(v, str))):
                             v = [v]
                         new_env = dict(env)
                         for i, variable in enumerate(variable_list):
@@ -666,7 +662,7 @@ def json_transform(transform, resources):
     for usage in transform[prov.qualifiedUsage]:
         used = usage.value(prov.entity)
         role = usage.value(prov.hadRole)
-        roleID  = role.value(dc.identifier)
+        roleID = role.value(dc.identifier)
         variables[roleID.value] = resources[used.identifier]
         #print "Using", used.identifier, "as", roleID.value
 
@@ -681,20 +677,20 @@ def json_transform(transform, resources):
    ?target ?p ?o.
 }
 '''
-    shape_graph = Graph()
+    shape_graph = rdflib.Graph()
     for shape in transform.objects(dc.conformsTo):
-        if shape[RDF.type:shacl.NodeShape] or shape[RDF.type:shacl.PropertyShape]:
+        if shape[rdflib.RDF.type:shacl.NodeShape] or shape[rdflib.RDF.type:shacl.PropertyShape]:
             logger.info("Validating against SHACL shape %s", shape.identifier)
             shape_graph += transform.graph.query(connected_downstream_graph,
                                                  initBindings={"source":shape.identifier})
     if generated.identifier in resources:
         result = resources[generated.identifier]
     else:
-        result = ConjunctiveGraph()
-        if generated[RDF.type : setl.Persisted]:
+        result = rdflib.ConjunctiveGraph()
+        if generated[rdflib.RDF.type : setl.Persisted]:
             store = TrigStore()
-            result = ConjunctiveGraph(store=store)
-        if generated[RDF.type : setl.Persisted]:
+            result = rdflib.ConjunctiveGraph(store=store)
+        if generated[rdflib.RDF.type : setl.Persisted]:
             tempdir = tempfile.mktemp()
             logger.info("Persisting %s to %s", generated.identifier, tempdir)
             result.store.open(tempdir, True)
@@ -749,13 +745,13 @@ def json_transform(transform, resources):
 
                 #logger.debug(json.dumps(root, indent=4))
                 #before = len(result)
-                #graph = ConjunctiveGraph(identifier=generated.identifier)
+                #graph = rdflib.ConjunctiveGraph(identifier=generated.identifier)
                 #graph.parse(data=json.dumps(root),format="json-ld")
                 data = json.dumps(root)
                 #del root
                 
                 if len(shape_graph) > 0:
-                    d = ConjunctiveGraph()
+                    d = rdflib.ConjunctiveGraph()
                     d.parse(data=data,format='json-ld')
                     conforms, report, message = validate(d,
                                                          shacl_graph=shape_graph,
@@ -802,49 +798,50 @@ def json_transform(transform, resources):
 def transform(transform_resource, resources):
     logger.info('Transforming %s',transform_resource.identifier)
 
-    transform_graph = ConjunctiveGraph()
+    transform_graph = rdflib.ConjunctiveGraph()
     for result in transform_graph.subjects(prov.wasGeneratedBy):
-        transform_graph = ConjunctiveGraph(identifier=result.identifier)
+        transform_graph = rdflib.ConjunctiveGraph(identifier=result.identifier)
 
     used = set(transform_resource[prov.used])
 
-    for csv in [u for u in used if u[RDF.type:csvw.Table]]:
-        csv_graph = Graph(store=transform_graph.store, identifier=csv)
-        csv_graph += graphs[csv.identifier]
+    for csv_file in [u for u in used if u[rdflib.RDF.type:csvw.Table]]:
+        csv_graph = rdflib.Graph(store=transform_graph.store,
+                                 identifier=csv_file)
+        csv_graph += resources[csv_file.identifier]
 
 
-    for script in [u for u in used if u[RDF.type:setl.PythonScript]]:
+    for script in [u for u in used if u[rdflib.RDF.type:setl.PythonScript]]:
         logger.info("Script: %s", script.identifier)
         s = script.value(prov.value).value
-        l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
-        gl = dict()
-        exec(s, gl, l)
+        local_vars = dict(graph = transform_graph, setl_graph = transform_resource.graph)
+        global_vars = dict()
+        exec(s, global_vars, local_vars)
 
-    for jsldt in [u for u in used if u[RDF.type:setl.PythonScript]]:
+    for jsldt in [u for u in used if u[rdflib.RDF.type:setl.PythonScript]]:
         logger.info("Script: %s", script.identifier)
         s = script.value(prov.value).value
-        l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
-        gl = dict()
-        exec(s, gl, l)
+        local_vars = dict(graph = transform_graph, setl_graph = transform_resource.graph)
+        global_vars = dict()
+        exec(s, global_vars, local_vars)
 
-    for update in [u for u in used if u[RDF.type:sp.Update]]:
+    for update in [u for u in used if u[rdflib.RDF.type:sp.Update]]:
         logger.info("Update: %s", update.identifier)
         query = update.value(prov.value).value
         transform_graph.update(query)
 
-    for construct in [u for u in used if u[RDF.type:sp.Construct]]:
+    for construct in [u for u in used if u[rdflib.RDF.type:sp.Construct]]:
         logger.info("Construct: %s", construct.identifier)
         query = construct.value(prov.value).value
         g = transform_graph.query(query)
         transform_graph += g
 
-    for csv in [u for u in used if u[RDF.type:csvw.Table]]:
-        g = Graph(identifier=csv.identifier,store=transform_graph.store)
+    for csv_file in [u for u in used if u[rdflib.RDF.type:csvw.Table]]:
+        g = rdflib.Graph(identifier=csv_file.identifier,store=transform_graph.store)
         g.remove((None, None, None))
-        transform_graph.store.remove_graph(csv.identifier)
+        transform_graph.store.remove_graph(csv_file.identifier)
 
     for result in transform_graph.subjects(prov.wasGeneratedBy):
-        graphs[result.identifier] = transform_graph
+        resources[result.identifier] = transform_graph
 
 def _load_open(generated):
     if generated.identifier.startswith("file://"):
@@ -855,18 +852,16 @@ def _load_open(generated):
 
     fh = open(filename, 'wb')
     for type, pack in packers.items():
-        if generated[RDF.type : type]:
+        if generated[rdflib.RDF.type : type]:
             return pack(fh)
     return fh
 
 def load(load_resource, resources):
     logger.info('Load %s',load_resource.identifier)
-    file_graph = Dataset(default_union=True)
-    to_disk = False
+    file_graph = rdflib.Dataset(default_union=True)
     for used in load_resource[prov.used]:
-        if used[RDF.type : setl.Persisted]:
-            to_disk = True
-            file_graph = Dataset(store='Sleepycat', default_union=True)
+        if used[rdflib.RDF.type : setl.Persisted]:
+            file_graph = rdflib.Dataset(store='Sleepycat', default_union=True)
             tempdir = tempfile.mkdtemp()
             logger.debug("Gathering %s into %s", load_resource.identifier, tempdir)
             file_graph.store.open(tempdir, True)
@@ -884,7 +879,7 @@ def load(load_resource, resources):
 
     for generated in load_resource.subjects(prov.wasGeneratedBy):
         # TODO: support LDP-based loading
-        if generated[RDF.type:pv.File]:
+        if generated[rdflib.RDF.type:pv.File]:
             fmt = generated.value(dc['format'])
             if fmt is not None:
                 fmt = fmt.value
@@ -894,15 +889,13 @@ def load(load_resource, resources):
             with _load_open(generated) as o:
                 file_graph.serialize(o, format=fmt)
 
-        elif generated[RDF.type:sd.Service]:
+        elif generated[rdflib.RDF.type:sd.Service]:
             from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
             endpoint = generated.value(sd.endpoint, default=generated).identifier
             store = SPARQLUpdateStore(endpoint, endpoint, autocommit=False)
-            endpoint_graph = Dataset(store=store, identifier=generated.identifier, default_union=True)
+            endpoint_graph = rdflib.Dataset(store=store, identifier=generated.identifier, default_union=True)
             endpoint_graph.addN(file_graph.quads())
             endpoint_graph.commit()
-    #if to_disk:
-    #    file_graph.close()
 
 
 actions = {
@@ -987,7 +980,7 @@ def run_setl(setl_graph):
     tasks = [setl_graph.resource(t) for t in get_order(setl_graph)]
 
     for task in tasks:
-        action = [actions[t.identifier] for t in task[RDF.type] if t.identifier in actions]
+        action = [actions[t.identifier] for t in task[rdflib.RDF.type] if t.identifier in actions]
         if len(action) > 0:
             action[0](task, resources)
     return resources
@@ -995,7 +988,6 @@ def run_setl(setl_graph):
 
 logger = None
 
-import click
 @click.command()
 @click.option('--quiet', '-q', is_flag=True, default=False, help="Minimize logging.")
 @click.option('-n', default=-1, help="Only process the first N rows.", type=int)
@@ -1020,8 +1012,8 @@ def main(script, rdf_validation=None, text_validation=None, quiet=False, n=-1):
 
     global run_samples
     run_samples = n
-    setl_graph = ConjunctiveGraph()
+    setl_graph = rdflib.ConjunctiveGraph()
     content = open(script).read()
     setl_graph.parse(data=content, format="turtle")
 
-    graphs = run_setl(setl_graph)
+    run_setl(setl_graph)
diff --git a/setlr/trig_store.py b/setlr/trig_store.py
index ada7ac1..acc8c53 100644
--- a/setlr/trig_store.py
+++ b/setlr/trig_store.py
@@ -1,7 +1,4 @@
-import logging
-from threading import Thread
-from os.path import exists, abspath
-from os import mkdir
+from os.path import abspath
 from rdflib.store import Store, VALID_STORE, NO_STORE
 from rdflib.term import URIRef
 from urllib.request import pathname2url
@@ -110,7 +107,8 @@ def __len__(self, context=None):
         def blocks(files, size=65536):
             while True:
                 b = files.read(size)
-                if not b: break
+                if not b:
+                    break
                 yield b
 
         self.db_env.seek(0)
diff --git a/tests/setlr_test/test_api_compatibility.py b/tests/setlr_test/test_api_compatibility.py
index 698fbf2..e45ad79 100644
--- a/tests/setlr_test/test_api_compatibility.py
+++ b/tests/setlr_test/test_api_compatibility.py
@@ -15,7 +15,7 @@ def test_setl_deprecated_warning(self):
         
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
-            result = setlr._setl(setl_graph)
+            setlr._setl(setl_graph)
             
             # Find our specific deprecation warning
             our_warnings = [warning for warning in w if "_setl()" in str(warning.message)]
@@ -65,7 +65,7 @@ def test_run_setl_no_deprecation_warning(self):
         
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
-            result = setlr.run_setl(setl_graph)
+            setlr.run_setl(setl_graph)
             
             # Filter to only our deprecation warnings (not rdflib's)
             our_warnings = [warning for warning in w if "_setl()" in str(warning.message)]
diff --git a/tests/setlr_test/test_error_messages.py b/tests/setlr_test/test_error_messages.py
index 39eb791..9e74838 100644
--- a/tests/setlr_test/test_error_messages.py
+++ b/tests/setlr_test/test_error_messages.py
@@ -1,10 +1,7 @@
 import unittest
-import tempfile
-import os
 import logging
 from rdflib import ConjunctiveGraph, Namespace, Literal
 from io import StringIO
-import sys
 
 # Import setlr module
 import setlr
diff --git a/tests/setlr_test/test_read_json.py b/tests/setlr_test/test_read_json.py
index 10c6286..f4f2966 100644
--- a/tests/setlr_test/test_read_json.py
+++ b/tests/setlr_test/test_read_json.py
@@ -21,4 +21,4 @@ def test_read_json(self):
         self.assertCountEqual(expected_json, result[0][1], "JSON objects not equal")
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 80350d184bfe36823e85705deaf48a2f28d4b03a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 18 Jan 2026 18:14:45 +0000
Subject: [PATCH 9/9] Add GitHub Actions workflow to run bootstrap and build
 scripts

Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
 .github/workflows/build.yml | 47 +++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 .github/workflows/build.yml

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..126323e
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,47 @@
+name: Build
+
+on:
+  push:
+    branches: [ main, master, develop ]
+  pull_request:
+    branches: [ main, master, develop ]
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build on Python ${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    
+    - name: Bootstrap development environment
+      run: ./script/bootstrap
+    
+    - name: Build project
+      run: ./script/build
+    
+    - name: Upload build artifacts
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: dist-${{ matrix.python-version }}
+        path: dist/
+        if-no-files-found: warn
+    
+    - name: Upload test results
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: test-results-build-${{ matrix.python-version }}
+        path: test-results/
+        if-no-files-found: ignore