From d163917821e5cc3445c7e957a2481093a13e87d3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 19 Jan 2026 04:46:29 +0000
Subject: [PATCH 1/2] Initial plan
From f9dacd2aea75dd7cb4e186aa17703fd92f3b6f12 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 19 Jan 2026 04:53:58 +0000
Subject: [PATCH 2/2] Add community health files, GitHub templates, and
documentation
Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com>
---
.github/ISSUE_TEMPLATE/bug_report.yml | 140 +++++
.github/ISSUE_TEMPLATE/feature_request.yml | 148 +++++
.github/PULL_REQUEST_TEMPLATE.md | 61 ++
.github/dependabot.yml | 74 +++
.github/workflows/build.yml | 6 +
.github/workflows/release.yml | 91 +++
.pre-commit-config.yaml | 47 ++
CODE_OF_CONDUCT.md | 133 +++++
CONTRIBUTING.md | 285 +++++++++
README.md | 15 +-
SECURITY.md | 140 +++++
docs/advanced.md | 472 +++++++++++++++
docs/shacl.md | 638 +++++++++++++++++++++
docs/sparql.md | 506 ++++++++++++++++
requirements-dev.txt | 16 +
requirements.txt | 18 +
16 files changed, 2784 insertions(+), 6 deletions(-)
create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml
create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml
create mode 100644 .github/PULL_REQUEST_TEMPLATE.md
create mode 100644 .github/dependabot.yml
create mode 100644 .github/workflows/release.yml
create mode 100644 .pre-commit-config.yaml
create mode 100644 CODE_OF_CONDUCT.md
create mode 100644 CONTRIBUTING.md
create mode 100644 SECURITY.md
create mode 100644 docs/advanced.md
create mode 100644 docs/shacl.md
create mode 100644 docs/sparql.md
create mode 100644 requirements-dev.txt
create mode 100644 requirements.txt
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..0a1eb55
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,140 @@
+name: Bug Report
+description: Report a bug or unexpected behavior in SETLr
+title: "[Bug]: "
+labels: ["bug"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to report a bug! Please fill out the form below with as much detail as possible.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Bug Description
+ description: A clear and concise description of what the bug is.
+ placeholder: Describe the bug you encountered
+ validations:
+ required: true
+
+ - type: textarea
+ id: steps
+ attributes:
+ label: Steps to Reproduce
+ description: Detailed steps to reproduce the issue
+ placeholder: |
+ 1. Create a SETL script with...
+ 2. Run command...
+ 3. Observe error...
+ value: |
+ 1.
+ 2.
+ 3.
+ validations:
+ required: true
+
+ - type: textarea
+ id: expected
+ attributes:
+ label: Expected Behavior
+ description: What did you expect to happen?
+ placeholder: Describe what you expected to happen
+ validations:
+ required: true
+
+ - type: textarea
+ id: actual
+ attributes:
+ label: Actual Behavior
+ description: What actually happened? Include error messages and stack traces if available.
+ placeholder: Describe what actually happened
+ render: shell
+ validations:
+ required: true
+
+ - type: input
+ id: setlr-version
+ attributes:
+ label: SETLr Version
+ description: What version of SETLr are you using?
+ placeholder: "e.g., 1.0.2 (run: pip show setlr)"
+ validations:
+ required: true
+
+ - type: input
+ id: python-version
+ attributes:
+ label: Python Version
+ description: What version of Python are you using?
+ placeholder: "e.g., 3.11.5 (run: python --version)"
+ validations:
+ required: true
+
+ - type: input
+ id: os
+ attributes:
+ label: Operating System
+ description: What operating system are you using?
+ placeholder: "e.g., Ubuntu 22.04, macOS 14.0, Windows 11"
+ validations:
+ required: true
+
+ - type: textarea
+ id: environment
+ attributes:
+ label: Additional Environment Details
+ description: Any other relevant environment information
+ placeholder: |
+ - Installation method (pip, source)
+ - Virtual environment (venv, conda)
+ - Other relevant dependencies
+ render: markdown
+
+ - type: textarea
+ id: sample-data
+ attributes:
+ label: Sample Data/Configuration
+ description: If applicable, provide a minimal example that reproduces the issue
+ placeholder: |
+ Include:
+ - Sample input data (CSV, JSON, etc.)
+ - SETL script (or relevant portion)
+ - Command used to run SETLr
+ render: turtle
+
+ - type: textarea
+ id: logs
+ attributes:
+ label: Logs and Error Output
+ description: Paste any relevant logs or error output
+ render: shell
+
+ - type: textarea
+ id: workarounds
+ attributes:
+ label: Workarounds
+ description: Have you found any workarounds for this issue?
+ placeholder: Describe any workarounds you've found
+
+ - type: textarea
+ id: context
+ attributes:
+ label: Additional Context
+ description: Add any other context about the problem here
+ placeholder: |
+ - Does this issue occur consistently or intermittently?
+ - Did this work in a previous version?
+ - Any other relevant information
+
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: Please confirm the following
+ options:
+ - label: I have searched existing issues to ensure this is not a duplicate
+ required: true
+ - label: I have provided all required information above
+ required: true
+ - label: I am using a supported version of SETLr (1.0.x)
+ required: false
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..587fbee
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,148 @@
+name: Feature Request
+description: Suggest a new feature or enhancement for SETLr
+title: "[Feature]: "
+labels: ["enhancement"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for suggesting a new feature! Please provide as much detail as possible about your idea.
+
+ - type: textarea
+ id: problem
+ attributes:
+ label: Problem Description
+ description: Is your feature request related to a problem? Please describe what you're trying to accomplish.
+ placeholder: |
+ A clear and concise description of the problem or limitation you're facing.
+ Example: "I'm always frustrated when..."
+ validations:
+ required: true
+
+ - type: textarea
+ id: solution
+ attributes:
+ label: Proposed Solution
+ description: Describe the solution you'd like to see
+ placeholder: |
+ A clear and concise description of what you want to happen.
+ Include:
+ - How it would work
+ - Example usage (code, commands, or configuration)
+ - Expected output or behavior
+ validations:
+ required: true
+
+ - type: textarea
+ id: alternatives
+ attributes:
+ label: Alternatives Considered
+ description: Describe any alternative solutions or features you've considered
+ placeholder: |
+ What other approaches have you thought about?
+ Why would this solution be better than alternatives?
+ validations:
+ required: false
+
+ - type: textarea
+ id: use-case
+ attributes:
+ label: Use Case
+ description: Describe your specific use case for this feature
+ placeholder: |
+ Help us understand how you would use this feature:
+ - What kind of data are you working with?
+ - What is your workflow?
+ - How would this feature improve your experience?
+ validations:
+ required: false
+
+ - type: dropdown
+ id: feature-area
+ attributes:
+ label: Feature Area
+ description: Which area of SETLr does this feature relate to?
+ options:
+ - Data Sources (CSV, Excel, JSON, XML, RDF)
+ - Transformations (JSLDT, Python functions)
+ - SPARQL Integration
+ - CLI Tool
+ - Python API
+ - Documentation
+ - Performance
+ - Validation (SHACL)
+ - Other
+ validations:
+ required: true
+
+ - type: textarea
+ id: example
+ attributes:
+ label: Example Code/Configuration
+ description: If applicable, provide example code or configuration showing how the feature would be used
+ placeholder: |
+ ```python
+ # Example usage of the proposed feature
+ from setlr import new_feature
+
+ result = new_feature(...)
+ ```
+ render: python
+
+ - type: textarea
+ id: documentation
+ attributes:
+ label: Documentation Impact
+ description: What documentation would need to be added or updated?
+ placeholder: |
+ - New tutorial sections
+ - API documentation updates
+ - Example additions
+
+ - type: dropdown
+ id: priority
+ attributes:
+ label: Priority
+ description: How important is this feature to you?
+ options:
+ - Critical (blocking my work)
+ - High (would significantly improve my workflow)
+ - Medium (nice to have)
+ - Low (minor improvement)
+ validations:
+ required: false
+
+ - type: textarea
+ id: context
+ attributes:
+ label: Additional Context
+ description: Add any other context, screenshots, or examples about the feature request
+ placeholder: |
+ - Links to similar features in other tools
+ - Screenshots or diagrams
+ - Research or references
+ - Any other relevant information
+
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: Please confirm the following
+ options:
+ - label: I have searched existing issues and feature requests to ensure this is not a duplicate
+ required: true
+ - label: I have provided a clear description of the problem and proposed solution
+ required: true
+ - label: This feature aligns with SETLr's purpose (RDF generation from tabular data)
+ required: true
+
+ - type: textarea
+ id: contribution
+ attributes:
+ label: Contribution
+ description: Would you be willing to contribute to implementing this feature?
+ placeholder: |
+ - I would like to implement this feature myself
+ - I can help test the implementation
+ - I can help with documentation
+ - I can provide use case examples
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..653f47f
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,61 @@
+# Pull Request
+
+## Description
+
+Please include a summary of the changes and the related issue. Explain the motivation and context for these changes.
+
+Fixes #(issue number)
+
+## Type of Change
+
+Please delete options that are not relevant:
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Documentation update
+- [ ] Code refactoring (no functional changes)
+- [ ] Performance improvement
+- [ ] Dependency update
+
+## Motivation and Context
+
+Why is this change required? What problem does it solve?
+
+## Testing Performed
+
+Please describe the tests that you ran to verify your changes:
+
+- [ ] Existing tests pass (`./script/build` or `nose2`)
+- [ ] Added new tests for new functionality
+- [ ] Manual testing performed (please describe below)
+
+**Test Configuration:**
+- Python version:
+- Operating System:
+
+**Manual Test Details:**
+```
+Describe any manual testing you performed
+```
+
+## Checklist
+
+Before submitting this PR, please ensure:
+
+- [ ] My code follows the code style of this project (PEP 8)
+- [ ] I have added docstrings to new functions/classes
+- [ ] I have updated the documentation (if applicable)
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] All new and existing tests pass locally (`./script/build`)
+- [ ] I have updated the CHANGELOG.md file in the `[Unreleased]` section
+- [ ] I have checked that no large files or build artifacts are included
+- [ ] I have reviewed my own code for potential issues
+
+## Screenshots (if applicable)
+
+Add screenshots to help explain your changes (especially for UI changes or output format changes).
+
+## Additional Notes
+
+Add any other context about the pull request here.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..de719ea
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,74 @@
+version: 2
+
+updates:
+ # Python dependencies (pip)
+ - package-ecosystem: "pip"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ day: "monday"
+ open-pull-requests-limit: 5
+ labels:
+ - "dependencies"
+ - "automated"
+ groups:
+ # Group development dependencies together
+ development-dependencies:
+ patterns:
+ - "nose2"
+ - "coverage"
+ - "flake8"
+ - "pycodestyle"
+ - "pylint"
+ - "vulture"
+ - "build"
+ - "wheel"
+ - "twine"
+ - "pre-commit"
+ update-types:
+ - "minor"
+ - "patch"
+
+ # Group production dependencies together
+ production-dependencies:
+ patterns:
+ - "rdflib"
+ - "pandas"
+ - "jinja2"
+ - "numpy"
+ - "cython"
+ - "requests"
+ - "toposort"
+ - "beautifulsoup4"
+ - "lxml"
+ - "six"
+ - "xlrd"
+ - "ijson"
+ - "click"
+ - "tqdm"
+ - "requests-testadapter"
+ - "python-slugify"
+ - "pyshacl"
+ - "future"
+ update-types:
+ - "minor"
+ - "patch"
+
+ # GitHub Actions dependencies
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ day: "monday"
+ open-pull-requests-limit: 5
+ labels:
+ - "dependencies"
+ - "automated"
+ - "github-actions"
+ groups:
+ github-actions:
+ patterns:
+ - "*"
+ update-types:
+ - "minor"
+ - "patch"
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 126323e..3e2b3d0 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -30,6 +30,12 @@ jobs:
- name: Build project
run: ./script/build
+ - name: Check distribution packages
+ if: matrix.python-version == '3.11'
+ run: |
+ pip install twine
+ twine check dist/*
+
- name: Upload build artifacts
if: always()
uses: actions/upload-artifact@v4
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..e2e94a3
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,91 @@
+name: Release
+
+on:
+ push:
+ tags:
+ - 'v*'
+
+jobs:
+ release:
+ name: Create GitHub Release
+ runs-on: ubuntu-latest
+ permissions:
+ contents: write
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Get version from tag
+ id: get_version
+ run: |
+ # Remove 'v' prefix from tag
+ VERSION=${GITHUB_REF#refs/tags/v}
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
+ echo "Version: $VERSION"
+
+ - name: Extract changelog for this version
+ id: changelog
+ run: |
+ VERSION=${{ steps.get_version.outputs.version }}
+
+ # Extract the changelog section for this version
+ # Find the section starting with ## [VERSION] and ending before the next ## [
+ CHANGELOG=$(sed -n "/## \[$VERSION\]/,/## \[/p" CHANGELOG.md | sed '$d')
+
+ # If changelog is empty, use a default message
+ if [ -z "$CHANGELOG" ]; then
+ CHANGELOG="## Release $VERSION\n\nSee [CHANGELOG.md](CHANGELOG.md) for details."
+ fi
+
+ # Write to file to preserve formatting
+ echo "$CHANGELOG" > release_notes.md
+
+ echo "Extracted changelog:"
+ cat release_notes.md
+
+ - name: Create GitHub Release
+ uses: softprops/action-gh-release@v1
+ with:
+ name: Release v${{ steps.get_version.outputs.version }}
+ body_path: release_notes.md
+ draft: false
+ prerelease: false
+ generate_release_notes: false
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install build dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build twine
+
+ - name: Build distribution packages
+ run: python -m build
+
+ - name: Check distribution packages
+ run: twine check dist/*
+
+ - name: Upload release artifacts
+ uses: softprops/action-gh-release@v1
+ with:
+ files: |
+ dist/*.tar.gz
+ dist/*.whl
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Publish to PyPI
+ if: startsWith(github.ref, 'refs/tags/v')
+ env:
+ TWINE_USERNAME: __token__
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+ run: |
+ twine upload dist/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..e9754b8
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,47 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.5.0
+ hooks:
+ - id: trailing-whitespace
+ name: Trim trailing whitespace
+ - id: end-of-file-fixer
+ name: Fix end of files
+ - id: check-yaml
+ name: Check YAML files
+ exclude: ^\.circleci/
+ - id: check-added-large-files
+ name: Check for large files
+ args: ['--maxkb=1000']
+ - id: check-merge-conflict
+ name: Check for merge conflicts
+ - id: check-toml
+ name: Check TOML files
+ - id: check-json
+ name: Check JSON files
+ - id: mixed-line-ending
+ name: Check line endings
+
+ - repo: https://github.com/psf/black
+ rev: 24.1.1
+ hooks:
+ - id: black
+ name: Format code with Black
+ language_version: python3
+ args: ['--line-length=120']
+
+ - repo: https://github.com/pycqa/flake8
+ rev: 7.0.0
+ hooks:
+ - id: flake8
+ name: Lint with Flake8
+ args: ['--config=setup.cfg']
+ exclude: ^(venv|\.git|build|dist|\.eggs|\.tox)/
+
+ - repo: https://github.com/pycqa/isort
+ rev: 5.13.2
+ hooks:
+ - id: isort
+ name: Sort imports with isort
+ args: ['--profile', 'black', '--line-length', '120']
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..8827db6
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,133 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+ and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+ community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances of
+ any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+ without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+[mccusj@cs.rpi.edu](mailto:mccusj@cs.rpi.edu).
+
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..ec33a1f
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,285 @@
+# Contributing to SETLr
+
+Thank you for your interest in contributing to SETLr! We welcome contributions from the community.
+
+## Table of Contents
+
+- [Code of Conduct](#code-of-conduct)
+- [Getting Started](#getting-started)
+- [Development Setup](#development-setup)
+- [Code Standards](#code-standards)
+- [Testing Guidelines](#testing-guidelines)
+- [Commit Message Conventions](#commit-message-conventions)
+- [Pull Request Process](#pull-request-process)
+- [Pre-commit Hooks](#pre-commit-hooks)
+
+## Code of Conduct
+
+This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [mccusj@cs.rpi.edu](mailto:mccusj@cs.rpi.edu).
+
+## Getting Started
+
+1. Fork the repository on GitHub
+2. Clone your fork locally
+3. Create a new branch for your feature or bug fix
+4. Make your changes
+5. Test your changes
+6. Submit a pull request
+
+## Development Setup
+
+SETLr includes scripts to help you set up your development environment quickly.
+
+### Prerequisites
+
+- Python 3.8 or higher
+- Git
+
+### Bootstrap Your Environment
+
+The `bootstrap` script will create a virtual environment and install all dependencies:
+
+```bash
+# Clone the repository
+git clone https://github.com/YOUR_USERNAME/setlr.git
+cd setlr
+
+# Run the bootstrap script
+./script/bootstrap
+
+# Activate the virtual environment
+source venv/bin/activate
+```
+
+The bootstrap script will:
+- Create a Python virtual environment in `venv/`
+- Install setlr in editable mode
+- Install all development dependencies (nose2, coverage, flake8, pylint, vulture)
+- Install build tools (build, wheel, twine)
+
+### Building and Testing
+
+Use the provided scripts for common development tasks:
+
+```bash
+# Run linting checks and tests (full build)
+./script/build
+
+# Run tests only
+nose2 --verbose
+
+# Run tests with coverage
+nose2 --with-coverage --coverage-report html
+
+# Run linting
+flake8 setlr/ tests/
+```
+
+## Code Standards
+
+SETLr follows Python best practices and PEP 8 style guidelines (with some project-specific exceptions defined in `setup.cfg`).
+
+### Style Guidelines
+
+- **PEP 8**: Follow [PEP 8](https://pep8.org/) style guidelines for Python code
+- **Docstrings**: Use [PEP 257](https://pep.org/pep-0257/) docstring conventions
+ - All public modules, functions, classes, and methods should have docstrings
+ - Use triple quotes (`"""`) for docstrings
+ - Start with a one-line summary, followed by a blank line if more detail is needed
+- **Type Hints**: Use type hints for function parameters and return values where appropriate
+- **Import Order**: Organize imports in the following order:
+ 1. Standard library imports
+ 2. Related third-party imports
+ 3. Local application/library specific imports
+- **Line Length**: Keep lines under 120 characters when practical (some flexibility given)
+
+### Naming Conventions
+
+- **Variables and Functions**: `lowercase_with_underscores`
+- **Classes**: `CapitalizedWords` (PascalCase)
+- **Constants**: `UPPERCASE_WITH_UNDERSCORES`
+- **Private**: Prefix with single underscore `_private_function`
+
+### Example Code Style
+
+```python
+from typing import Dict, Optional
+import pandas as pd
+from rdflib import Graph
+
+def process_data(input_file: str, options: Optional[Dict] = None) -> Graph:
+ """
+ Process tabular data and generate an RDF graph.
+
+ Args:
+ input_file: Path to the input data file
+ options: Optional configuration dictionary
+
+ Returns:
+ An RDFlib Graph containing the processed data
+
+ Raises:
+ ValueError: If input_file does not exist
+ """
+ if options is None:
+ options = {}
+
+ # Implementation here
+ pass
+```
+
+## Testing Guidelines
+
+SETLr uses `nose2` as the test runner. All new features and bug fixes should include tests.
+
+### Writing Tests
+
+- Place tests in the `tests/` directory
+- Test files should be named `test_*.py`
+- Test functions should be named `test_*`
+- Use descriptive test names that explain what is being tested
+- Include both positive and negative test cases
+- Mock external dependencies when appropriate
+
+### Running Tests
+
+```bash
+# Run all tests
+nose2 --verbose
+
+# Run specific test file
+nose2 tests.test_module
+
+# Run specific test
+nose2 tests.test_module.TestClass.test_method
+
+# Run with coverage
+nose2 --with-coverage --coverage setlr/
+```
+
+### Test Coverage
+
+- Aim for high test coverage of new code
+- The project uses `coverage` to track test coverage
+- Run `nose2 --with-coverage` to generate coverage reports
+
+## Commit Message Conventions
+
+Write clear and descriptive commit messages:
+
+### Format
+
+```
+Short summary (50 chars or less)
+
+More detailed explanation if necessary. Wrap at 72 characters.
+Explain the problem that this commit is solving. Focus on why you
+are making this change rather than how.
+
+- Bullet points are okay
+- Use present tense ("Add feature" not "Added feature")
+- Reference issues and pull requests
+
+Fixes #123
+```
+
+### Examples
+
+Good commit messages:
+- `Add support for streaming JSON parsing`
+- `Fix memory leak in XML processing`
+- `Update documentation for JSLDT templates`
+- `Refactor SPARQL query builder for clarity`
+
+## Pull Request Process
+
+1. **Create a Branch**: Create a feature branch from `develop` (or `main` if no develop branch exists)
+ ```bash
+ git checkout -b feature/my-new-feature
+ ```
+
+2. **Make Your Changes**: Implement your feature or bug fix
+
+3. **Test Your Changes**: Ensure all tests pass
+ ```bash
+ ./script/build
+ ```
+
+4. **Update Documentation**: Update relevant documentation in the `docs/` directory
+
+5. **Update CHANGELOG**: Add an entry to the `[Unreleased]` section of `CHANGELOG.md`
+
+6. **Commit Your Changes**: Use clear commit messages
+
+7. **Push to Your Fork**:
+ ```bash
+ git push origin feature/my-new-feature
+ ```
+
+8. **Open a Pull Request**: Open a PR against the main repository
+ - Use the PR template
+ - Provide a clear description of the changes
+ - Link to any related issues
+ - Ensure CI checks pass
+
+9. **Code Review**: Address any feedback from maintainers
+
+10. **Merge**: Once approved, a maintainer will merge your PR
+
+### Pull Request Checklist
+
+Before submitting your pull request, ensure:
+
+- [ ] Code follows the project's style guidelines
+- [ ] All tests pass (`./script/build`)
+- [ ] New tests are added for new features
+- [ ] Documentation is updated
+- [ ] CHANGELOG.md is updated
+- [ ] Commit messages are clear and descriptive
+- [ ] No unnecessary files are included (check `.gitignore`)
+
+## Pre-commit Hooks
+
+SETLr uses pre-commit hooks to automatically check code quality before commits.
+
+### Setup Pre-commit
+
+```bash
+# Install pre-commit (if not already installed)
+pip install pre-commit
+
+# Install the git hooks
+pre-commit install
+```
+
+### Using Pre-commit
+
+Once installed, pre-commit will run automatically on `git commit`. You can also run it manually:
+
+```bash
+# Run on all files
+pre-commit run --all-files
+
+# Run on staged files only
+pre-commit run
+```
+
+The pre-commit hooks will:
+- Remove trailing whitespace
+- Fix end of file
+- Check YAML syntax
+- Check for large files
+- Check for merge conflicts
+- Format code with Black
+- Lint code with Flake8
+- Sort imports with isort
+
+## Questions?
+
+If you have questions about contributing, please:
+
+- Check the [documentation](docs/README.md)
+- Open a [discussion](https://github.com/tetherless-world/setlr/discussions)
+- Contact the maintainer at [mccusj@cs.rpi.edu](mailto:mccusj@cs.rpi.edu)
+
+Thank you for contributing to SETLr!
diff --git a/README.md b/README.md
index 87b1fce..d4d480e 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
[](https://github.com/tetherless-world/setlr/actions/workflows/test.yml)
[](https://github.com/tetherless-world/setlr/actions/workflows/lint.yml)
+[](https://codecov.io/gh/tetherless-world/setlr)
**SETLr** is a powerful Python tool for generating RDF graphs from tabular data using declarative SETL (Semantic Extract, Transform, Load) scripts.
@@ -143,12 +144,13 @@ flake8 setlr/
## Contributing
-Contributions are welcome! Please:
-1. Fork the repository
-2. Create a feature branch
-3. Add tests for new functionality
-4. Ensure all tests pass
-5. Submit a pull request
+Contributions are welcome! Please see our [Contributing Guide](CONTRIBUTING.md) for details on:
+- Development setup and workflow
+- Code standards and style guidelines
+- Testing requirements
+- Pull request process
+
+Please note that this project follows a [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code.
## License
@@ -172,3 +174,4 @@ If you use SETLr in your research, please cite:
- 📖 [Documentation](docs/README.md)
- 🐛 [Issue Tracker](https://github.com/tetherless-world/setlr/issues)
- 💬 [Discussions](https://github.com/tetherless-world/setlr/discussions)
+- 🔒 [Security Policy](SECURITY.md) - Report security vulnerabilities
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..10201f1
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,140 @@
+# Security Policy
+
+## Supported Versions
+
+The following versions of SETLr are currently supported with security updates:
+
+| Version | Supported |
+| ------- | ------------------ |
+| 1.0.x | :white_check_mark: |
+| < 1.0 | :x: |
+
+## Reporting a Vulnerability
+
+The SETLr team takes security vulnerabilities seriously. We appreciate your efforts to responsibly disclose your findings.
+
+### How to Report
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report security vulnerabilities by email to:
+
+**[mccusj@cs.rpi.edu](mailto:mccusj@cs.rpi.edu)**
+
+Include the following information in your report:
+
+- Type of vulnerability
+- Full paths of source file(s) related to the vulnerability
+- Location of the affected source code (tag/branch/commit or direct URL)
+- Any special configuration required to reproduce the issue
+- Step-by-step instructions to reproduce the issue
+- Proof-of-concept or exploit code (if possible)
+- Impact of the issue, including how an attacker might exploit it
+
+### Response Timeline
+
+- **Initial Response**: We will acknowledge receipt of your vulnerability report within 48 hours
+- **Status Update**: We will send you regular updates about our progress within 7 days
+- **Resolution**: We aim to resolve critical vulnerabilities within 30 days
+
+### What to Expect
+
+After you submit a report, we will:
+
+1. Confirm receipt of your vulnerability report
+2. Investigate and validate the vulnerability
+3. Work on a fix and determine a release timeline
+4. Keep you informed of our progress
+5. Credit you in the security advisory (if you wish)
+
+### Disclosure Policy
+
+- We ask that you do not publicly disclose the vulnerability until we have released a fix
+- Once the vulnerability is fixed, we will publish a security advisory
+- We will credit you as the reporter (unless you prefer to remain anonymous)
+
+## Security Best Practices for Using SETLr
+
+### Input Validation
+
+When using SETLr to process data:
+
+1. **Validate Input Sources**: Ensure data sources come from trusted origins
+2. **Sanitize File Paths**: Be careful with user-provided file paths to prevent path traversal attacks
+3. **Limit File Sizes**: Implement size limits for input files to prevent denial-of-service attacks
+4. **Validate Templates**: Review JSLDT templates for potential code injection vulnerabilities
+
+### Template Security
+
+JSLDT templates use Jinja2 templating. To prevent template injection attacks:
+
+1. **Avoid User-Provided Templates**: Do not allow untrusted users to provide arbitrary templates
+2. **Use Autoescape**: Enable autoescaping when generating output formats that interpret special characters
+3. **Limit Template Features**: Disable dangerous Jinja2 features if not needed
+4. **Review Python Functions**: Carefully review any custom Python functions used in transforms
+
+### RDF/SPARQL Security
+
+When working with RDF data and SPARQL endpoints:
+
+1. **Validate SPARQL Queries**: Sanitize any dynamic SPARQL queries to prevent SPARQL injection
+2. **Authenticate Endpoints**: Use authentication for SPARQL Update endpoints
+3. **Limit Endpoint Access**: Restrict network access to sensitive SPARQL endpoints
+4. **Validate RDF Input**: Parse RDF data from untrusted sources with caution
+
+### Python Security
+
+When using SETLr's Python API:
+
+1. **Pin Dependencies**: Use specific version numbers for production deployments
+2. **Update Regularly**: Keep SETLr and its dependencies up to date
+3. **Isolate Execution**: Run SETLr in isolated environments (containers, virtual machines)
+4. **Limit Permissions**: Run with minimum required filesystem and network permissions
+
+### XML Processing
+
+When processing XML files:
+
+1. **Prevent XXE Attacks**: SETLr uses lxml which has XXE protection enabled by default
+2. **Limit Entity Expansion**: Be aware of XML bomb attacks with deeply nested entities
+3. **Validate XML Sources**: Only process XML from trusted sources
+
+## Security Features
+
+### Built-in Protections
+
+SETLr includes several security features:
+
+- **Safe XML Parsing**: Uses lxml with secure defaults (XXE protection enabled)
+- **Template Sandboxing**: Jinja2 templates run in a sandboxed environment
+- **Input Validation**: Validates input formats and structures
+- **Error Handling**: Provides detailed error messages without exposing sensitive information
+
+## Known Limitations
+
+- **Python Code Execution**: Custom Python transform functions execute with the permissions of the Python process
+- **File System Access**: SETLr can read and write files based on the provided configuration
+- **Network Access**: Can make HTTP requests and connect to SPARQL endpoints as configured
+
+## Security Updates
+
+Security updates will be released as patch versions (e.g., 1.0.3) and announced through:
+
+- GitHub Security Advisories
+- Release notes in CHANGELOG.md
+- GitHub Releases
+
+Subscribe to repository notifications to stay informed about security updates.
+
+## Questions?
+
+If you have questions about security that are not sensitive in nature, please:
+
+- Open a [GitHub Discussion](https://github.com/tetherless-world/setlr/discussions)
+- Check the [documentation](docs/README.md)
+
+For sensitive security matters, please email [mccusj@cs.rpi.edu](mailto:mccusj@cs.rpi.edu).
+
+---
+
+Thank you for helping keep SETLr and its users safe!
diff --git a/docs/advanced.md b/docs/advanced.md
new file mode 100644
index 0000000..86099cc
--- /dev/null
+++ b/docs/advanced.md
@@ -0,0 +1,472 @@
+# Advanced Features
+
+SETLr provides powerful advanced capabilities for complex data transformation workflows, large-scale processing, and production deployments.
+
+## Overview
+
+This guide covers advanced topics including:
+
+- Multi-source transforms
+- Conditional loading and filtering
+- Performance optimization
+- Error handling and debugging
+- Integration patterns
+
+For specific advanced features, see:
+- [Streaming XML with XPath](streaming-xml.md) - Efficient large file processing
+- [Python Functions in Transforms](python-functions.md) - Custom Python code
+- [SPARQL Support](sparql.md) - Query and update endpoints
+- [SHACL Validation](shacl.md) - Validate your RDF output
+
+## Multi-Source Transforms
+
+SETLr can combine data from multiple sources in a single transform.
+
+### Combining Multiple Tables
+
+```turtle
+@prefix setl: .
+@prefix prov: .
+@prefix csvw: .
+
+# Load first table
+:users a csvw:Table, setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used
+ ] .
+
+# Load second table
+:orders a csvw:Table, setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used
+ ] .
+
+# Transform using both tables
+:output prov:wasGeneratedBy [
+ a setl:Transform, setl:JSLDT ;
+ prov:used :users, :orders ;
+ prov:value '''
+ [{
+ "@for": "user in users",
+ "@do": {
+ "@id": "http://example.com/user/{{user.ID}}",
+ "@type": "Person",
+ "name": "{{user.Name}}",
+ "orders": [{
+ "@for": "order in orders",
+ "@if": "order.UserID == user.ID",
+ "@do": {
+ "@id": "http://example.com/order/{{order.OrderID}}"
+ }
+ }]
+ }
+ }]
+ '''
+] .
+```
+
+### Loading from Different Formats
+
+```turtle
+# CSV data
+:csv_table a csvw:Table, setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used
+ ] .
+
+# JSON data
+:json_data a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used ;
+ setl:hasJSONSelector "$.items[*]"
+ ] .
+
+# XML data
+:xml_data a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used ;
+ setl:hasXPathSelector "//item"
+ ] .
+```
+
+## Conditional Loading
+
+Use conditional logic to selectively process data based on runtime conditions.
+
+### Filtering with @if
+
+```json
+[{
+ "@for": "row in table",
+ "@if": "row.Status == 'active' and row.Score > 50",
+ "@do": {
+ "@id": "http://example.com/entity/{{row.ID}}",
+ "@type": "ActiveEntity",
+ "score": "{{row.Score}}"
+ }
+}]
+```
+
+### Conditional Fields
+
+```json
+{
+ "@id": "http://example.com/person/{{row.ID}}",
+ "@type": "Person",
+ "name": "{{row.Name}}",
+ "email": {
+ "@if": "row.Email",
+ "@do": "mailto:{{row.Email}}"
+ },
+ "phone": {
+ "@if": "row.Phone and row.PhoneVerified",
+ "@do": "{{row.Phone}}"
+ }
+}
+```
+
+## Performance Optimization
+
+### Streaming Processing
+
+For large XML files, use streaming to reduce memory usage:
+
+```turtle
+:big_xml a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used ;
+ setl:hasXPathSelector "//record" ;
+ setl:streaming true # Enable streaming
+ ] .
+```
+
+See [Streaming XML documentation](streaming-xml.md) for details.
+
+### Batch Processing
+
+Process data in batches to control memory usage:
+
+```python
+from rdflib import Graph, Namespace, URIRef
+import setlr
+
+# For very large datasets, process in chunks
+chunk_size = 10000
+offset = 0
+
+output_graph = Graph()
+
+while True:
+ # Create SETL script for this batch
+ setl_graph = create_batch_setl(offset, chunk_size)
+
+ # Process batch
+ resources = setlr.run_setl(setl_graph)
+
+ # Accumulate results
+ batch_output = resources[URIRef('http://example.com/output')]
+ output_graph += batch_output
+
+ # Check if done
+ if len(batch_output) < chunk_size:
+ break
+
+ offset += chunk_size
+
+# Save final results
+output_graph.serialize('output.ttl', format='turtle')
+```
+
+### Pandas Optimization
+
+For CSV/Excel files, pandas is used automatically. Optimize with:
+
+```python
+# Use appropriate dtypes to reduce memory
+# Specify in your data loading if possible
+
+# For very wide tables, select only needed columns
+# by processing the source data first
+```
+
+## Error Handling and Debugging
+
+### Verbose Logging
+
+Enable detailed logging to diagnose issues:
+
+```python
+import logging
+import setlr
+
+# Enable debug logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger('setlr')
+logger.setLevel(logging.DEBUG)
+
+# Now run SETL
+resources = setlr.run_setl(setl_graph)
+```
+
+### Progress Tracking
+
+Use tqdm for progress tracking on large datasets:
+
+```python
+from tqdm import tqdm
+import setlr
+
+# Progress bars are automatically shown for:
+# - Large file processing
+# - Batch operations
+# - Network transfers
+
+resources = setlr.run_setl(setl_graph)
+```
+
+### Validation During Development
+
+Validate intermediate results to catch issues early:
+
+```python
+from rdflib import Graph
+import setlr
+
+# Process data
+resources = setlr.run_setl(setl_graph)
+output = resources[URIRef('http://example.com/output')]
+
+# Validate results
+print(f"Generated {len(output)} triples")
+print(f"Subjects: {len(set(output.subjects()))}")
+print(f"Predicates: {len(set(output.predicates()))}")
+print(f"Objects: {len(set(output.objects()))}")
+
+# Check for specific patterns
+for s, p, o in output.triples((None, RDF.type, None)):
+ print(f"Type: {o}")
+```
+
+### Error Recovery
+
+Handle errors gracefully in production:
+
+```python
+import setlr
+from rdflib import Graph
+
+try:
+ setl_graph = Graph()
+ setl_graph.parse('transform.setl.ttl', format='turtle')
+ resources = setlr.run_setl(setl_graph)
+
+except setlr.SetlrError as e:
+ print(f"SETL processing error: {e}")
+ # Handle gracefully
+
+except Exception as e:
+ print(f"Unexpected error: {e}")
+ # Log and notify
+```
+
+## Integration Patterns
+
+### CI/CD Integration
+
+Integrate SETLr into your CI/CD pipeline:
+
+```yaml
+# GitHub Actions example
+name: Generate RDF
+
+on: [push]
+
+jobs:
+ generate:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install SETLr
+ run: pip install setlr
+
+ - name: Generate RDF
+ run: setlr transform.setl.ttl -o output.ttl
+
+ - name: Upload artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: rdf-output
+ path: output.ttl
+```
+
+### Docker Integration
+
+Use SETLr in containerized environments:
+
+```dockerfile
+FROM python:3.11-slim
+
+# Install SETLr
+RUN pip install setlr
+
+# Copy your SETL scripts and data
+COPY transform.setl.ttl /app/
+COPY data/ /app/data/
+
+WORKDIR /app
+
+# Run transformation
+CMD ["setlr", "transform.setl.ttl", "-o", "/output/result.ttl"]
+```
+
+```bash
+# Build and run
+docker build -t my-setl-transform .
+docker run -v $(pwd)/output:/output my-setl-transform
+```
+
+### Scheduled Processing
+
+Run SETLr transformations on a schedule:
+
+```python
+# scheduled_transform.py
+import schedule
+import time
+from rdflib import Graph
+import setlr
+
+def run_transform():
+ """Run the SETL transformation"""
+ print("Starting transformation...")
+
+ setl_graph = Graph()
+ setl_graph.parse('transform.setl.ttl', format='turtle')
+
+ resources = setlr.run_setl(setl_graph)
+
+ # Save output with timestamp
+ timestamp = time.strftime('%Y%m%d_%H%M%S')
+ output_file = f'output_{timestamp}.ttl'
+
+ output = resources[URIRef('http://example.com/output')]
+ output.serialize(output_file, format='turtle')
+
+ print(f"Transformation complete: {output_file}")
+
+# Schedule to run every day at 2 AM
+schedule.every().day.at("02:00").do(run_transform)
+
+while True:
+ schedule.run_pending()
+ time.sleep(60)
+```
+
+### REST API Wrapper
+
+Expose SETLr as a REST API:
+
+```python
+from flask import Flask, request, jsonify
+from rdflib import Graph
+import setlr
+import tempfile
+
+app = Flask(__name__)
+
+@app.route('/transform', methods=['POST'])
+def transform():
+ """Accept CSV data and SETL script, return RDF"""
+
+ # Get input
+ csv_data = request.files['data']
+ setl_script = request.form['setl']
+
+ # Save to temp files
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as csv_file:
+ csv_data.save(csv_file.name)
+
+ # Update SETL script with temp file path
+ setl_graph = Graph()
+ setl_graph.parse(data=setl_script, format='turtle')
+
+ # Run transformation
+ resources = setlr.run_setl(setl_graph)
+
+ # Return RDF
+ output = resources[URIRef('http://example.com/output')]
+ return output.serialize(format='turtle'), 200, {
+ 'Content-Type': 'text/turtle'
+ }
+
+if __name__ == '__main__':
+ app.run(debug=True)
+```
+
+## Best Practices
+
+### 1. Modular SETL Scripts
+
+Break complex transformations into modules:
+
+```turtle
+# common.setl.ttl - shared definitions
+@prefix : .
+@prefix setl: .
+
+# users.setl.ttl - user-specific transforms
+@prefix : .
+<> owl:imports .
+
+# Main script imports both
+```
+
+### 2. Version Control
+
+- Store SETL scripts in version control
+- Track changes to transforms with your data processing pipeline
+- Use branches for experimental transforms
+
+### 3. Testing
+
+- Test SETL scripts with sample data before production use
+- Validate output with SHACL shapes
+- Compare output to expected results
+
+### 4. Documentation
+
+- Document complex transforms with comments (use rdfs:comment)
+- Maintain README files for transform collections
+- Include example data with your scripts
+
+### 5. Monitoring
+
+- Log transformation results (record counts, errors)
+- Monitor resource usage for large datasets
+- Set up alerts for transformation failures
+
+## Next Steps
+
+- Explore [Streaming XML](streaming-xml.md) for large file processing
+- Learn about [Python Functions](python-functions.md) for custom logic
+- Set up [SPARQL endpoints](sparql.md) for data loading
+- Implement [SHACL validation](shacl.md) for quality control
+
+## Support
+
+For questions about advanced features:
+- Check the [documentation](README.md)
+- Open a [discussion](https://github.com/tetherless-world/setlr/discussions)
+- Report issues on [GitHub](https://github.com/tetherless-world/setlr/issues)
diff --git a/docs/shacl.md b/docs/shacl.md
new file mode 100644
index 0000000..7dd789a
--- /dev/null
+++ b/docs/shacl.md
@@ -0,0 +1,638 @@
+# SHACL Validation
+
+SETLr includes built-in support for validating RDF output using SHACL (Shapes Constraint Language), ensuring your generated data meets quality requirements.
+
+## Overview
+
+SHACL validation enables:
+- Schema validation of generated RDF
+- Data quality checks
+- Constraint enforcement
+- Automated validation in workflows
+- Validation reports
+
+## Basic SHACL Validation
+
+Define shapes to validate your RDF output.
+
+### Simple Shape Example
+
+```turtle
+@prefix setl: .
+@prefix prov: .
+@prefix sh: .
+@prefix ex: .
+
+# Data extraction and transformation
+:input_table a csvw:Table ;
+ prov:wasGeneratedBy [ a setl:Extract ; prov:used ] .
+
+:output a void:Dataset ;
+ prov:wasGeneratedBy [
+ a setl:Transform, setl:JSLDT ;
+ prov:used :input_table ;
+ prov:value '''[{
+ "@for": "row in input_table",
+ "@do": {
+ "@id": "http://example.com/person/{{row.ID}}",
+ "@type": "http://xmlns.com/foaf/0.1/Person",
+ "http://xmlns.com/foaf/0.1/name": "{{row.Name}}",
+ "http://xmlns.com/foaf/0.1/mbox": "mailto:{{row.Email}}"
+ }
+ }]'''
+ ] .
+
+# SHACL validation shape
+:PersonShape a sh:NodeShape ;
+ sh:targetClass foaf:Person ;
+ sh:property [
+ sh:path foaf:name ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:datatype xsd:string ;
+ sh:minLength 1 ;
+ ] ;
+ sh:property [
+ sh:path foaf:mbox ;
+ sh:minCount 1 ;
+ sh:pattern "^mailto:" ;
+ ] .
+
+# Apply validation
+:output setl:hasShapesGraph :shapes_graph .
+
+:shapes_graph prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used
+] .
+```
+
+### Validation in SETL Scripts
+
+```turtle
+@prefix setl: .
+@prefix prov: .
+
+# Transform data
+:output a void:Dataset ;
+ prov:wasGeneratedBy [
+ a setl:Transform, setl:JSLDT ;
+ prov:used :input ;
+ prov:value '''[...]'''
+ ] ;
+ # Enable SHACL validation
+ setl:validateWithShapes :shapes .
+
+# Load shapes from file
+:shapes prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used
+] .
+```
+
+## SHACL Constraints
+
+### Required Properties
+
+Ensure properties exist:
+
+```turtle
+:PersonShape a sh:NodeShape ;
+ sh:targetClass ex:Person ;
+ sh:property [
+ sh:path ex:name ;
+ sh:minCount 1 ; # Required
+ sh:message "Person must have a name"
+ ] .
+```
+
+### Cardinality Constraints
+
+Control how many values:
+
+```turtle
+sh:property [
+ sh:path ex:email ;
+ sh:minCount 1 ; # At least one
+ sh:maxCount 1 ; # At most one
+] .
+
+sh:property [
+ sh:path ex:phoneNumber ;
+ sh:minCount 0 ; # Optional
+ sh:maxCount 5 ; # Up to 5
+] .
+```
+
+### Datatype Constraints
+
+Validate datatypes:
+
+```turtle
+sh:property [
+ sh:path ex:age ;
+ sh:datatype xsd:integer ;
+ sh:minInclusive 0 ;
+ sh:maxInclusive 150 ;
+] .
+
+sh:property [
+ sh:path ex:email ;
+ sh:datatype xsd:string ;
+ sh:pattern "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$" ;
+] .
+
+sh:property [
+ sh:path ex:website ;
+ sh:nodeKind sh:IRI ; # Must be a URI
+] .
+```
+
+### Value Constraints
+
+Restrict allowed values:
+
+```turtle
+sh:property [
+ sh:path ex:status ;
+ sh:in ( "active" "inactive" "pending" ) ;
+] .
+
+sh:property [
+ sh:path ex:priority ;
+ sh:minInclusive 1 ;
+ sh:maxInclusive 10 ;
+] .
+```
+
+### String Constraints
+
+Validate string patterns:
+
+```turtle
+sh:property [
+ sh:path ex:zipCode ;
+ sh:pattern "^\\d{5}(-\\d{4})?$" ;
+ sh:flags "i" ; # Case insensitive
+] .
+
+sh:property [
+ sh:path ex:name ;
+ sh:minLength 2 ;
+ sh:maxLength 100 ;
+] .
+```
+
+### Class Constraints
+
+Ensure correct types:
+
+```turtle
+sh:property [
+ sh:path ex:creator ;
+ sh:class ex:Person ; # Must be a Person
+] .
+
+sh:property [
+ sh:path ex:organization ;
+ sh:or (
+ [ sh:class ex:Company ]
+ [ sh:class ex:Institution ]
+ ) ;
+] .
+```
+
+## Validation Reports
+
+### Interpreting Reports
+
+When validation fails, SETLr generates a detailed report:
+
+```python
+from rdflib import Graph
+import setlr
+
+# Run SETL with validation
+setl_graph = Graph()
+setl_graph.parse('transform.setl.ttl', format='turtle')
+
+try:
+ resources = setlr.run_setl(setl_graph)
+ print("Validation passed!")
+
+except setlr.ValidationError as e:
+ print("Validation failed!")
+ print(e.report) # Access validation report
+
+ # Report contains:
+ # - sh:result - Individual violations
+ # - sh:focusNode - Node that failed
+ # - sh:resultPath - Property that failed
+ # - sh:resultMessage - Error message
+```
+
+### Example Validation Report
+
+```turtle
+@prefix sh: .
+
+[ a sh:ValidationReport ;
+ sh:conforms false ;
+ sh:result [
+ a sh:ValidationResult ;
+ sh:focusNode ;
+ sh:resultPath ;
+ sh:resultSeverity sh:Violation ;
+ sh:resultMessage "Person must have a name" ;
+ sh:sourceConstraintComponent sh:MinCountConstraintComponent ;
+ sh:sourceShape :PersonShape ;
+ ]
+] .
+```
+
+## Advanced SHACL Features
+
+### Conditional Constraints
+
+Use `sh:or`, `sh:and`, `sh:not`:
+
+```turtle
+:UserShape a sh:NodeShape ;
+ sh:targetClass ex:User ;
+ # Must have email OR phone
+ sh:or (
+ [ sh:property [ sh:path ex:email ; sh:minCount 1 ] ]
+ [ sh:property [ sh:path ex:phone ; sh:minCount 1 ] ]
+ ) .
+```
+
+### Qualified Cardinality
+
+Count specific types:
+
+```turtle
+sh:property [
+ sh:path ex:author ;
+ sh:qualifiedValueShape [ sh:class ex:Person ] ;
+ sh:qualifiedMinCount 1 ; # At least one author that is a Person
+] .
+```
+
+### Property Pair Constraints
+
+Compare two properties:
+
+```turtle
+:DateRangeShape a sh:NodeShape ;
+ sh:targetClass ex:Event ;
+ sh:property [
+ sh:path ex:startDate ;
+ sh:lessThan ex:endDate ; # Start before end
+ ] .
+```
+
+### Closed Shapes
+
+Restrict to only defined properties:
+
+```turtle
+:PersonShape a sh:NodeShape ;
+ sh:targetClass ex:Person ;
+ sh:closed true ;
+ sh:ignoredProperties ( rdf:type ) ;
+ sh:property [ sh:path ex:name ] ;
+ sh:property [ sh:path ex:email ] ;
+ # Any other property is invalid
+.
+```
+
+### Custom Messages
+
+Provide helpful error messages:
+
+```turtle
+sh:property [
+ sh:path ex:age ;
+ sh:datatype xsd:integer ;
+ sh:minInclusive 0 ;
+ sh:message "Age must be a non-negative integer"@en ;
+] .
+```
+
+### Severity Levels
+
+Set constraint severity:
+
+```turtle
+sh:property [
+ sh:path ex:email ;
+ sh:minCount 1 ;
+ sh:severity sh:Violation ; # Hard failure
+] .
+
+sh:property [
+ sh:path ex:phone ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ; # Warning only
+] .
+
+sh:property [
+ sh:path ex:fax ;
+ sh:maxCount 1 ;
+ sh:severity sh:Info ; # Information
+] .
+```
+
+## Validation in Python
+
+### Manual Validation
+
+```python
+from rdflib import Graph
+from pyshacl import validate
+
+# Load data graph
+data_graph = Graph()
+data_graph.parse('data.ttl', format='turtle')
+
+# Load shapes graph
+shapes_graph = Graph()
+shapes_graph.parse('shapes.ttl', format='turtle')
+
+# Validate
+conforms, report_graph, report_text = validate(
+ data_graph,
+ shacl_graph=shapes_graph,
+ inference='rdfs', # Enable RDFS inference
+ abort_on_first=False, # Find all violations
+ meta_shacl=False, # Don't validate shapes themselves
+ debug=False
+)
+
+if conforms:
+ print("✓ Validation passed")
+else:
+ print("✗ Validation failed")
+ print(report_text)
+```
+
+### Validation with Inference
+
+Enable reasoning during validation:
+
+```python
+from pyshacl import validate
+
+conforms, report_graph, report_text = validate(
+ data_graph,
+ shacl_graph=shapes_graph,
+ inference='rdfs', # RDFS inference
+ # inference='owlrl', # OWL RL inference
+ # inference='both', # Both RDFS and OWL RL
+)
+```
+
+### Advanced Validation Options
+
+```python
+from pyshacl import validate
+
+conforms, report_graph, report_text = validate(
+ data_graph,
+ shacl_graph=shapes_graph,
+ ont_graph=ontology_graph, # Separate ontology
+ inference='rdfs',
+ abort_on_first=False, # Continue after first violation
+ allow_infos=True, # Include info-level results
+ allow_warnings=True, # Include warnings
+ meta_shacl=True, # Validate the shapes themselves
+ advanced=True, # Enable SHACL-AF features
+ js=True, # Enable JavaScript constraints
+ debug=True # Verbose output
+)
+```
+
+## Practical Examples
+
+### Example 1: Person Validation
+
+```turtle
+@prefix sh: .
+@prefix ex: .
+@prefix foaf: .
+
+ex:PersonShape a sh:NodeShape ;
+ sh:targetClass foaf:Person ;
+ sh:property [
+ sh:path foaf:name ;
+ sh:minCount 1 ;
+ sh:datatype xsd:string ;
+ sh:minLength 1 ;
+ sh:message "Every person must have a non-empty name"
+ ] ;
+ sh:property [
+ sh:path foaf:mbox ;
+ sh:maxCount 1 ;
+ sh:pattern "^mailto:[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$" ;
+ sh:message "Email must be valid mailto: URI"
+ ] ;
+ sh:property [
+ sh:path foaf:age ;
+ sh:maxCount 1 ;
+ sh:datatype xsd:integer ;
+ sh:minInclusive 0 ;
+ sh:maxInclusive 150 ;
+ ] .
+```
+
+### Example 2: Organizational Hierarchy
+
+```turtle
+ex:OrganizationShape a sh:NodeShape ;
+ sh:targetClass ex:Organization ;
+ sh:property [
+ sh:path ex:hasEmployee ;
+ sh:class ex:Person ;
+ ] ;
+ sh:property [
+ sh:path ex:parentOrganization ;
+ sh:maxCount 1 ;
+ sh:class ex:Organization ;
+ ] ;
+ sh:property [
+ sh:path ex:legalName ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:datatype xsd:string ;
+ ] .
+
+ex:PersonShape a sh:NodeShape ;
+ sh:targetClass ex:Person ;
+ sh:property [
+ sh:path ex:worksFor ;
+ sh:maxCount 1 ;
+ sh:class ex:Organization ;
+ ] .
+```
+
+### Example 3: Temporal Data
+
+```turtle
+ex:EventShape a sh:NodeShape ;
+ sh:targetClass ex:Event ;
+ sh:property [
+ sh:path ex:startDate ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:datatype xsd:dateTime ;
+ ] ;
+ sh:property [
+ sh:path ex:endDate ;
+ sh:maxCount 1 ;
+ sh:datatype xsd:dateTime ;
+ ] ;
+ # End date must be after start date
+ sh:sparql [
+ sh:message "End date must be after start date" ;
+ sh:prefixes ex: ;
+ sh:select """
+ SELECT $this
+ WHERE {
+ $this ex:startDate ?start ;
+ ex:endDate ?end .
+ FILTER (?end <= ?start)
+ }
+ """
+ ] .
+```
+
+## Best Practices
+
+### 1. Design Shapes Early
+
+Define validation requirements before generating data:
+- Document expected schema
+- Create shapes alongside SETL scripts
+- Test with sample data
+
+### 2. Use Meaningful Messages
+
+```turtle
+sh:message "Email is required and must be valid"@en ;
+```
+
+### 3. Layer Validations
+
+```turtle
+# Core validation (hard requirements)
+:CorePersonShape sh:severity sh:Violation .
+
+# Quality checks (warnings)
+:QualityPersonShape sh:severity sh:Warning .
+
+# Recommendations (info)
+:OptimalPersonShape sh:severity sh:Info .
+```
+
+### 4. Test Shapes Independently
+
+```python
+# Test shapes separately from SETL
+shapes_graph = Graph()
+shapes_graph.parse('shapes.ttl')
+
+test_data = Graph()
+test_data.parse('test-data.ttl')
+
+conforms, _, report = validate(test_data, shacl_graph=shapes_graph)
+assert conforms, f"Validation failed: {report}"
+```
+
+### 5. Version Control Shapes
+
+- Store shapes with your SETL scripts
+- Version them together
+- Document changes
+
+## Integration Patterns
+
+### CI/CD Validation
+
+```yaml
+# GitHub Actions
+- name: Validate RDF Output
+ run: |
+ python -c "
+ from rdflib import Graph
+ from pyshacl import validate
+
+ data = Graph()
+ data.parse('output.ttl')
+
+ shapes = Graph()
+ shapes.parse('shapes.ttl')
+
+ conforms, _, report = validate(data, shacl_graph=shapes)
+
+ if not conforms:
+ print(report)
+ exit(1)
+ "
+```
+
+### Pre-Production Checks
+
+```python
+def validate_before_load(data_graph, shapes_graph, endpoint):
+ """Validate data before loading to production"""
+ conforms, _, report = validate(data_graph, shacl_graph=shapes_graph)
+
+ if not conforms:
+ raise ValueError(f"Validation failed:\n{report}")
+
+ # Load to production endpoint
+ load_to_sparql(data_graph, endpoint)
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**Issue**: Shapes not found
+```python
+# Ensure shapes are loaded correctly
+shapes_graph = Graph()
+print(f"Loaded {len(shapes_graph)} triples from shapes file")
+```
+
+**Issue**: Validation too strict
+```turtle
+# Use warnings for optional checks
+sh:severity sh:Warning ;
+```
+
+**Issue**: Performance problems
+```python
+# Validate in batches for large datasets
+batch_size = 10000
+for batch in batches(data_graph, batch_size):
+ validate(batch, shacl_graph=shapes_graph)
+```
+
+## Related Documentation
+
+- [Advanced Features](advanced.md) - Integration patterns
+- [Python API](python-api.md) - Programmatic usage
+- [Examples](examples.md) - Complete examples
+
+## External Resources
+
+- [SHACL Specification](https://www.w3.org/TR/shacl/)
+- [pyshacl Documentation](https://github.com/RDFLib/pySHACL)
+- [SHACL Playground](https://shacl.org/playground/)
+
+## Support
+
+For SHACL-related questions:
+- Open a [discussion](https://github.com/tetherless-world/setlr/discussions)
+- Report issues on [GitHub](https://github.com/tetherless-world/setlr/issues)
diff --git a/docs/sparql.md b/docs/sparql.md
new file mode 100644
index 0000000..afb7b48
--- /dev/null
+++ b/docs/sparql.md
@@ -0,0 +1,506 @@
+# SPARQL Support
+
+SETLr provides comprehensive support for SPARQL, allowing you to query RDF data sources and load results to SPARQL Update endpoints.
+
+## Overview
+
+SPARQL integration enables:
+- Querying SPARQL endpoints as data sources
+- Loading transformed data to SPARQL Update endpoints
+- Executing SPARQL queries within transforms
+- Combining SPARQL with other data sources
+
+## SPARQL Queries as Data Sources
+
+Use SPARQL SELECT queries to extract data from RDF sources.
+
+### Basic SPARQL Query
+
+```turtle
+@prefix setl: .
+@prefix prov: .
+@prefix : .
+
+:sparql_data a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ setl:query '''
+ PREFIX foaf:
+
+ SELECT ?name ?email ?homepage
+ WHERE {
+ ?person a foaf:Person ;
+ foaf:name ?name ;
+ foaf:mbox ?email .
+ OPTIONAL { ?person foaf:homepage ?homepage }
+ }
+ '''
+ ] .
+
+:output prov:wasGeneratedBy [
+ a setl:Transform, setl:JSLDT ;
+ prov:used :sparql_data ;
+ prov:value '''[{
+ "@for": "row in sparql_data",
+ "@do": {
+ "@id": "http://example.com/enriched/{{row.name | slugify}}",
+ "@type": "EnrichedPerson",
+ "originalName": "{{row.name}}",
+ "email": "{{row.email}}",
+ "homepage": {
+ "@if": "row.homepage",
+ "@do": "{{row.homepage}}"
+ }
+ }
+ }]'''
+] .
+```
+
+### Querying Remote Endpoints
+
+Query external SPARQL endpoints:
+
+```turtle
+:dbpedia_data a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ setl:endpoint ;
+ setl:query '''
+ PREFIX dbo:
+ PREFIX rdfs:
+
+ SELECT ?city ?name ?population ?country
+ WHERE {
+ ?city a dbo:City ;
+ rdfs:label ?name ;
+ dbo:populationTotal ?population ;
+ dbo:country ?country .
+ FILTER (lang(?name) = "en")
+ FILTER (?population > 1000000)
+ }
+ LIMIT 100
+ '''
+ ] .
+```
+
+### Authenticated Endpoints
+
+For endpoints requiring authentication:
+
+```turtle
+:protected_data a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ setl:endpoint ;
+ setl:username "user" ;
+ setl:password "pass" ;
+ setl:query '''
+ SELECT ?s ?p ?o
+ WHERE { ?s ?p ?o }
+ LIMIT 100
+ '''
+ ] .
+```
+
+**Security Note**: For production use, load credentials from environment variables:
+
+```python
+import os
+from rdflib import Graph, Literal
+import setlr
+
+# Load SETL script
+setl_graph = Graph()
+setl_graph.parse('transform.setl.ttl', format='turtle')
+
+# Add credentials from environment
+for extract in setl_graph.subjects(RDF.type, setl.Extract):
+ if (extract, setl.endpoint, None) in setl_graph:
+ username = os.getenv('SPARQL_USERNAME')
+ password = os.getenv('SPARQL_PASSWORD')
+ if username:
+ setl_graph.add((extract, setl.username, Literal(username)))
+ if password:
+ setl_graph.add((extract, setl.password, Literal(password)))
+
+# Run transform
+resources = setlr.run_setl(setl_graph)
+```
+
+## Loading to SPARQL Endpoints
+
+Write transformed data to SPARQL Update endpoints.
+
+### Basic SPARQL Update
+
+```turtle
+:output a void:Dataset ;
+ prov:wasGeneratedBy [
+ a setl:Transform, setl:JSLDT ;
+ prov:used :input_table ;
+ prov:value '''[{
+ "@for": "row in input_table",
+ "@do": {
+ "@id": "http://example.com/entity/{{row.ID}}",
+ "@type": "Entity",
+ "name": "{{row.Name}}"
+ }
+ }]'''
+ ] ;
+ prov:wasAttributedTo [
+ a setl:Load ;
+ setl:endpoint ;
+ setl:graphName
+ ] .
+```
+
+### Loading to Named Graphs
+
+Specify which named graph to load data into:
+
+```turtle
+:load_config a setl:Load ;
+ setl:endpoint ;
+ setl:graphName ;
+ prov:used :output .
+```
+
+### Update Operations
+
+Perform custom SPARQL Update operations:
+
+```turtle
+:update_operation a setl:Load ;
+ setl:endpoint ;
+ setl:updateQuery '''
+ PREFIX ex:
+
+ DELETE { ?s ex:oldProperty ?o }
+ INSERT { ?s ex:newProperty ?o }
+ WHERE { ?s ex:oldProperty ?o }
+ ''' ;
+ prov:used :output .
+```
+
+### Batch Loading
+
+For large datasets, load in batches:
+
+```python
+from rdflib import Graph, URIRef, Namespace
+import setlr
+
+SETL = Namespace('http://purl.org/twc/vocab/setl/')
+
+# Process in batches
+batch_size = 10000
+output_graph = Graph()
+
+# Load and transform data in batches
+# Then load each batch to endpoint
+for batch_num, batch_data in enumerate(data_batches):
+ # Create batch-specific SETL script
+ setl_graph = create_batch_setl(batch_data, batch_num)
+
+ # Process
+ resources = setlr.run_setl(setl_graph)
+
+ # Batch is automatically loaded to endpoint by SETL script
+ print(f"Loaded batch {batch_num}")
+```
+
+## Combining SPARQL with Other Sources
+
+Mix SPARQL data with CSV, JSON, or other sources.
+
+### Join SPARQL with CSV Data
+
+```turtle
+# Load CSV data
+:csv_table a csvw:Table, setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ prov:used
+ ] .
+
+# Query related RDF data
+:rdf_enrichment a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ setl:endpoint ;
+ setl:query '''
+ SELECT ?id ?category ?score
+ WHERE {
+ ?entity ex:id ?id ;
+ ex:category ?category ;
+ ex:score ?score .
+ }
+ '''
+ ] .
+
+# Combine in transform
+:output prov:wasGeneratedBy [
+ a setl:Transform, setl:JSLDT ;
+ prov:used :csv_table, :rdf_enrichment ;
+ prov:value '''[{
+ "@for": "person in csv_table",
+ "@do": {
+ "@id": "http://example.com/person/{{person.ID}}",
+ "@type": "Person",
+ "name": "{{person.Name}}",
+ "enrichment": [{
+ "@for": "data in rdf_enrichment",
+ "@if": "data.id == person.ID",
+ "@do": {
+ "category": "{{data.category}}",
+ "score": "{{data.score}}"
+ }
+ }]
+ }
+ }]'''
+] .
+```
+
+## SPARQL in Python API
+
+Use SPARQL programmatically with the Python API.
+
+### Query Execution
+
+```python
+from rdflib import Graph
+from SPARQLWrapper import SPARQLWrapper, JSON
+
+# Query a SPARQL endpoint
+sparql = SPARQLWrapper("http://dbpedia.org/sparql")
+sparql.setQuery("""
+ PREFIX dbo:
+
+ SELECT ?city ?population
+ WHERE {
+ ?city a dbo:City ;
+ dbo:populationTotal ?population .
+ }
+ LIMIT 10
+""")
+sparql.setReturnFormat(JSON)
+results = sparql.query().convert()
+
+# Process results
+for result in results["results"]["bindings"]:
+ print(f"{result['city']['value']}: {result['population']['value']}")
+```
+
+### Update Execution
+
+```python
+from SPARQLWrapper import SPARQLWrapper, POST, DIGEST
+
+# Configure endpoint
+sparql = SPARQLWrapper("http://localhost:3030/dataset/update")
+sparql.setMethod(POST)
+sparql.setHTTPAuth(DIGEST)
+sparql.setCredentials("user", "password")
+
+# Execute update
+sparql.setQuery("""
+ PREFIX ex:
+
+ INSERT DATA {
+ GRAPH {
+ ex:entity1 ex:property "value" .
+ }
+ }
+""")
+sparql.query()
+```
+
+## Best Practices
+
+### 1. Limit Result Sets
+
+Always use LIMIT in queries to prevent memory issues:
+
+```sparql
+SELECT ?s ?p ?o
+WHERE { ?s ?p ?o }
+LIMIT 10000
+```
+
+### 2. Use Pagination
+
+For large result sets, paginate:
+
+```sparql
+SELECT ?s ?p ?o
+WHERE { ?s ?p ?o }
+LIMIT 1000
+OFFSET 0
+```
+
+```sparql
+SELECT ?s ?p ?o
+WHERE { ?s ?p ?o }
+LIMIT 1000
+OFFSET 1000
+```
+
+### 3. Optimize Queries
+
+- Use specific predicates and types
+- Filter early in the query
+- Use OPTIONAL sparingly
+- Avoid UNION when possible
+
+### 4. Handle Errors
+
+```python
+from SPARQLWrapper import SPARQLWrapper
+from SPARQLWrapper.SPARQLExceptions import QueryBadFormed, EndPointNotFound
+
+try:
+ sparql = SPARQLWrapper("http://example.com/sparql")
+ sparql.setQuery("SELECT * WHERE { ?s ?p ?o } LIMIT 10")
+ results = sparql.query()
+
+except QueryBadFormed as e:
+ print(f"Invalid SPARQL query: {e}")
+
+except EndPointNotFound as e:
+ print(f"Endpoint not found: {e}")
+
+except Exception as e:
+ print(f"SPARQL error: {e}")
+```
+
+### 5. Connection Pooling
+
+Reuse connections for multiple queries:
+
+```python
+# Configure once, reuse many times
+sparql = SPARQLWrapper("http://example.com/sparql")
+sparql.setReturnFormat(JSON)
+
+for query in queries:
+ sparql.setQuery(query)
+ results = sparql.query().convert()
+ process_results(results)
+```
+
+### 6. Timeout Configuration
+
+Set timeouts to prevent hanging:
+
+```python
+sparql = SPARQLWrapper("http://example.com/sparql")
+sparql.setTimeout(30) # 30 second timeout
+```
+
+## Common Use Cases
+
+### 1. Enriching CSV with LOD
+
+Load CSV, enrich with Linked Open Data:
+
+```turtle
+:csv_data a csvw:Table ;
+ prov:wasGeneratedBy [ a setl:Extract ; prov:used ] .
+
+:lod_enrichment a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ setl:endpoint ;
+ setl:query '''SELECT ?person ?abstract WHERE { ... }'''
+ ] .
+
+# Combine in transform...
+```
+
+### 2. Migrating Between Triplestores
+
+Extract from one triplestore, load to another:
+
+```turtle
+:source_data a setl:Table ;
+ prov:wasGeneratedBy [
+ a setl:Extract ;
+ setl:endpoint ;
+ setl:query '''SELECT * WHERE { ?s ?p ?o }'''
+ ] .
+
+:output prov:wasAttributedTo [
+ a setl:Load ;
+ setl:endpoint ;
+ setl:graphName
+] .
+```
+
+### 3. Periodic Updates
+
+Query external data and update local store:
+
+```python
+import schedule
+from rdflib import Graph
+import setlr
+
+def update_from_sparql():
+ setl_graph = Graph()
+ setl_graph.parse('sparql-update.setl.ttl')
+ setlr.run_setl(setl_graph)
+ print("SPARQL update complete")
+
+# Run every hour
+schedule.every().hour.do(update_from_sparql)
+```
+
+## Troubleshooting
+
+### Connection Issues
+
+```python
+import requests
+
+# Test endpoint connectivity
+try:
+ response = requests.get("http://example.com/sparql", timeout=5)
+ print(f"Endpoint status: {response.status_code}")
+except requests.exceptions.RequestException as e:
+ print(f"Cannot connect: {e}")
+```
+
+### Query Validation
+
+Test queries independently before using in SETL:
+
+```bash
+# Use curl to test
+curl -X POST http://example.com/sparql \
+ -H "Accept: application/sparql-results+json" \
+ --data-urlencode "query=SELECT * WHERE { ?s ?p ?o } LIMIT 10"
+```
+
+### Debug Logging
+
+Enable debug logging:
+
+```python
+import logging
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger('SPARQLWrapper')
+logger.setLevel(logging.DEBUG)
+```
+
+## Related Documentation
+
+- [Advanced Features](advanced.md) - Multi-source transforms
+- [Python API](python-api.md) - Programmatic usage
+- [Examples](examples.md) - Complete examples
+
+## Support
+
+For SPARQL-related questions:
+- Open a [discussion](https://github.com/tetherless-world/setlr/discussions)
+- Report issues on [GitHub](https://github.com/tetherless-world/setlr/issues)
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..2d5cdfd
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,16 @@
+-r requirements.txt
+
+# Testing
+nose2>=0.9.0
+coverage>=5.0
+
+# Linting
+flake8>=3.8.0
+pycodestyle>=2.6.0
+pylint>=2.6.0
+vulture>=2.0
+
+# Build tools
+build>=0.7.0
+wheel>=0.36.0
+twine>=3.4.0
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..75548f8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,18 @@
+rdflib>=6.0.0
+pandas>=0.23.0
+jinja2
+numpy
+cython
+requests
+toposort
+beautifulsoup4
+lxml
+six
+xlrd
+ijson
+click
+tqdm
+requests-testadapter
+python-slugify
+pyshacl[js]
+future