Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
073c033
docs: add dependency modernization implementation plan
Feb 13, 2026
1fa2d9f
build: modernize packaging, CI, and zero-skip test policy
Feb 14, 2026
18d506a
build: harden packaging artifacts and update migration notes
Feb 14, 2026
49ab86a
ci: enforce contract model quality gate with baseline metrics
Feb 14, 2026
54be6a3
feat: add contract model re-export workflow
Feb 14, 2026
2bb666d
models: refresh bundled date model artifact
Feb 14, 2026
bedfe1f
ci: authenticate model bootstrap downloads to avoid API rate limits
Feb 14, 2026
fd735b6
ci: bootstrap nltk before model-quality contract download
Feb 14, 2026
fa52427
fix: stabilize German amount delimiter inference without locale packs
Feb 14, 2026
b07b9b6
fix: stabilize paragraph spans and de_DE delimiter fallback
Feb 14, 2026
d84f72e
docs: record final CI stabilization fixes
Feb 14, 2026
d878e42
feat: add contract-model training workflow and tag overrides
Feb 14, 2026
2e0a59c
feat: add runtime contract-type model fallback and CI smoke
Feb 14, 2026
4cde617
ci: fix contract-type smoke env context
Feb 14, 2026
7f0334a
fix: make catalog path resolution robust on fresh NLTK installs
Feb 14, 2026
021d84f
docs: document contract-type runtime model workflow
Feb 14, 2026
25dc9e8
test: drop nose and migrate assertions
Feb 14, 2026
dc84ee8
chore: reexport bundled sklearn models for py3.11
Feb 14, 2026
7f87221
fix: ensure catalog dir exists and add regression tests
Feb 14, 2026
6676b0c
feat: add contract-type quality gate and publish workflow
Feb 14, 2026
f1ea6f3
fix: adjust contract-type baseline metrics for CI
Feb 14, 2026
8a88ea3
docs: note contract-type baseline metrics runner variance
Feb 14, 2026
21cc0a8
ci: replace skip allowlist with inline annotations
Feb 14, 2026
a03f9ff
fix: avoid creating catalog dirs on import
Feb 14, 2026
1678a59
chore: reexport layered definition model for modern sklearn
Feb 14, 2026
62cc2a1
fix: log and narrow contract-type runtime fallback
Feb 14, 2026
b8d1e76
ci/docs: stabilize contract-type quality gate metrics
Feb 14, 2026
c78e154
docs: document skip-audit annotation policy
Feb 14, 2026
c1c0be1
feat: allow overriding models repo for downloads
Feb 14, 2026
443c416
ci: add publish workflow for is-contract model
Feb 14, 2026
a08bf5c
feat: add asset drift checks and modernize contract model defaults
Feb 14, 2026
1302f09
ci: make skip-audit allowlist stable
Feb 14, 2026
0133cee
nlp: avoid pandas->sklearn dtype warnings
Feb 14, 2026
31996d3
scripts: bootstrap is-contract 0.2 from legacy on 404
Feb 14, 2026
f7feec1
build: upgrade pandas/scipy and refresh uv.lock
Feb 14, 2026
5937e71
fix: stabilize runtime contract-type training
Feb 14, 2026
1748350
docs/ci: make publish workflows fork-friendly
Feb 14, 2026
51c07bc
ci: refresh contract-type baseline metrics
Feb 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .github/workflows/asset-drift.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Asset Drift Check

on:
schedule:
- cron: "0 3 * * *"
workflow_dispatch:

permissions:
contents: read

env:
PYTHON_VERSION: "3.11"

jobs:
drift:
name: Verify Model/Corpus Release Assets
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Install dependencies
run: |
uv venv .venv --python "${PYTHON_VERSION}"
uv sync --frozen --python .venv/bin/python

- name: Download + verify pinned release assets
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
.venv/bin/python scripts/asset_drift_check.py --force-download

- name: Run contract model quality gate
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
.venv/bin/python scripts/model_quality_gate.py \
--baseline-tag pipeline/is-contract/0.1 \
--candidate-tag pipeline/is-contract/0.1 \
--baseline-metrics-json test_data/model_quality/is_contract_baseline_metrics.json \
--max-accuracy-regression 0.0 \
--max-f1-regression 0.0

- name: Run contract-type quality gate
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
.venv/bin/python scripts/contract_type_quality_gate.py \
--baseline-tag pipeline/contract-type/0.2-runtime \
--candidate-tag pipeline/contract-type/0.2-runtime \
--baseline-metrics-json test_data/model_quality/contract_type_baseline_metrics.json \
--max-accuracy-top1-regression 0.0 \
--max-accuracy-topn-regression 0.0 \
--max-f1-macro-regression 0.0 \
--max-f1-weighted-regression 0.0

258 changes: 258 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
name: CI

on:
pull_request:
push:

permissions:
contents: read

env:
PYTHON_VERSION: "3.11"
CONTRACT_MODEL_BASELINE_TAG: "pipeline/is-contract/0.1"
CONTRACT_MODEL_CANDIDATE_TAG: "pipeline/is-contract/0.1"
CONTRACT_MODEL_BASELINE_METRICS: "test_data/model_quality/is_contract_baseline_metrics.json"
LEXNLP_CONTRACT_TYPE_MODEL_TAG: "pipeline/contract-type/0.2-runtime"
CONTRACT_TYPE_MODEL_BASELINE_METRICS: "test_data/model_quality/contract_type_baseline_metrics.json"

jobs:
base-tests:
name: Base Tests
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Cache NLTK data
uses: actions/cache@v4
with:
path: |
~/nltk_data
key: nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('uv.lock') }}
restore-keys: |
nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-

- name: Install dependencies
run: |
uv venv .venv --python "${PYTHON_VERSION}"
uv sync --frozen --python .venv/bin/python --extra dev --extra test

- name: Bootstrap required assets
env:
GITHUB_TOKEN: ${{ github.token }}
run: .venv/bin/python scripts/bootstrap_assets.py --nltk --contract-model

- name: Enforce skip-audit policy
run: .venv/bin/python ci/skip_audit.py

- name: Run base suite
run: .venv/bin/pytest lexnlp

stanford-tests:
name: Stanford Tests
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Cache NLTK data
uses: actions/cache@v4
with:
path: |
~/nltk_data
key: nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('uv.lock') }}
restore-keys: |
nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-

- name: Set up Java
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: "11"

- name: Install dependencies
run: |
uv venv .venv --python "${PYTHON_VERSION}"
uv sync --frozen --python .venv/bin/python --extra dev --extra test

- name: Bootstrap required assets (including Stanford)
env:
GITHUB_TOKEN: ${{ github.token }}
run: .venv/bin/python scripts/bootstrap_assets.py --nltk --contract-model --stanford

- name: Run Stanford suite
env:
LEXNLP_USE_STANFORD: "true"
run: |
.venv/bin/pytest \
lexnlp/nlp/en/tests/test_stanford.py \
lexnlp/extract/en/entities/tests/test_stanford_ner.py

model-quality:
name: Model Quality Gate
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Cache NLTK data
uses: actions/cache@v4
with:
path: |
~/nltk_data
key: nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('uv.lock') }}
restore-keys: |
nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-

- name: Install dependencies
run: |
uv venv .venv --python "${PYTHON_VERSION}"
uv sync --frozen --python .venv/bin/python --extra test

- name: Bootstrap contract-model asset
env:
GITHUB_TOKEN: ${{ github.token }}
run: .venv/bin/python scripts/bootstrap_assets.py --nltk --contract-model

- name: Run contract model quality gate
run: |
.venv/bin/python scripts/model_quality_gate.py \
--baseline-tag "${CONTRACT_MODEL_BASELINE_TAG}" \
--candidate-tag "${CONTRACT_MODEL_CANDIDATE_TAG}" \
--baseline-metrics-json "${CONTRACT_MODEL_BASELINE_METRICS}" \
--output-json artifacts/model_quality_gate.json \
--max-f1-regression 0.0 \
--max-accuracy-regression 0.0

- name: Upload quality-gate result
uses: actions/upload-artifact@v4
with:
name: model-quality-gate
path: artifacts/model_quality_gate.json
if-no-files-found: error

contract-type-smoke:
name: Contract Type Smoke
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Cache NLTK data
uses: actions/cache@v4
with:
path: |
~/nltk_data
key: nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('uv.lock') }}
restore-keys: |
nltk-data-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-

- name: Install dependencies
run: |
uv venv .venv --python "${PYTHON_VERSION}"
uv sync --frozen --python .venv/bin/python --extra test

- name: Bootstrap runtime contract-type model
env:
GITHUB_TOKEN: ${{ github.token }}
run: .venv/bin/python scripts/bootstrap_assets.py --contract-type-model

- name: Run contract-type predictor smoke
run: |
.venv/bin/python - <<'PY'
from lexnlp.extract.en.contracts.predictors import ProbabilityPredictorContractType
predictor = ProbabilityPredictorContractType()
predictions = predictor.make_predictions(
"This Employment Agreement is entered into on January 1, 2024.",
top_n=3,
)
assert len(predictions) > 0
print(predictions.to_dict())
PY

- name: Run contract-type quality gate
run: |
.venv/bin/python scripts/contract_type_quality_gate.py \
--baseline-tag "${LEXNLP_CONTRACT_TYPE_MODEL_TAG}" \
--candidate-tag "${LEXNLP_CONTRACT_TYPE_MODEL_TAG}" \
--baseline-metrics-json "${CONTRACT_TYPE_MODEL_BASELINE_METRICS}" \
--output-json artifacts/contract_type_quality_gate.json \
--max-accuracy-top1-regression 0.0 \
--max-accuracy-topn-regression 0.0 \
--max-f1-macro-regression 0.0 \
--max-f1-weighted-regression 0.0

- name: Upload contract-type quality-gate result
uses: actions/upload-artifact@v4
with:
name: contract-type-quality-gate
path: artifacts/contract_type_quality_gate.json
if-no-files-found: error

packaging-smoke:
name: Packaging Smoke
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Build source and wheel artifacts
run: |
uv build

- name: Validate artifact contents
run: python3 ci/check_dist_contents.py

- name: Install wheel in clean env
run: |
uv venv .venv-smoke --python "${PYTHON_VERSION}"
uv pip install --python .venv-smoke/bin/python dist/*.whl
.venv-smoke/bin/python - <<'PY'
import lexnlp
print(getattr(lexnlp, "__version__", "unknown"))
PY
Loading