diff --git a/.github/workflows/deploy-docs-prod.yaml b/.github/workflows/deploy-docs-prod.yaml index c2e62fb39b..24697c2f30 100644 --- a/.github/workflows/deploy-docs-prod.yaml +++ b/.github/workflows/deploy-docs-prod.yaml @@ -117,7 +117,7 @@ jobs: run: aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID_PROD }} && aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY_PROD }} && aws configure set default.region us-east-1 - name: Deploy docs prod site - run: aws s3 sync site/_site s3://validmind-docs-prod/site --delete --exclude "installation/omnibus/*" --exclude "installation/helm-repo/*" --exclude "llm/*" && aws cloudfront create-invalidation --distribution-id E2BGG3USKQTR9W --paths "/*" --no-cli-pager + run: aws s3 sync site/_site s3://validmind-docs-prod/site --delete --exclude "installation/omnibus/*" --exclude "installation/helm-repo/*" --exclude "notebooks/EXECUTED/*" --exclude "llm/*" && aws cloudfront create-invalidation --distribution-id E2BGG3USKQTR9W --paths "/*" --no-cli-pager # Release headroom and shrink before final lightweight steps & post-job - name: Release reserve & shrink diff --git a/.github/workflows/deploy-docs-staging.yaml b/.github/workflows/deploy-docs-staging.yaml index 48e76d348f..4c8f8fb69b 100644 --- a/.github/workflows/deploy-docs-staging.yaml +++ b/.github/workflows/deploy-docs-staging.yaml @@ -120,7 +120,7 @@ jobs: run: aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID_STAGING }} && aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY_STAGING }} && aws configure set default.region us-west-2 - name: Deploy docs staging site - run: aws s3 sync site/_site s3://validmind-docs-staging/site --delete --exclude "installation/helm-repo/*" --exclude "pr_previews/*" --exclude "llm/*" && aws cloudfront create-invalidation --distribution-id ESWVTZYFL873V --paths "/*" --no-cli-pager + run: aws s3 sync site/_site s3://validmind-docs-staging/site --delete --exclude "installation/helm-repo/*" --exclude "pr_previews/*" --exclude "notebooks/EXECUTED/*" --exclude "llm/*" && aws cloudfront create-invalidation --distribution-id ESWVTZYFL873V --paths "/*" --no-cli-pager # Release headroom and shrink before final lightweight steps & post-job - name: Release reserve & shrink diff --git a/.github/workflows/validate-docs-site.yaml b/.github/workflows/validate-docs-site.yaml index da1d3bc9d1..309a7b302f 100644 --- a/.github/workflows/validate-docs-site.yaml +++ b/.github/workflows/validate-docs-site.yaml @@ -130,7 +130,7 @@ jobs: run: aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID_STAGING }} && aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY_STAGING }} && aws configure set default.region us-east-1 - name: Deploy PR preview - run: aws s3 sync site/_site s3://validmind-docs-staging/site/pr_previews/${{ github.head_ref }} --delete && aws cloudfront create-invalidation --distribution-id ESWVTZYFL873V --paths "/*" --no-cli-pager + run: aws s3 sync site/_site s3://validmind-docs-staging/site/pr_previews/${{ github.head_ref }} --delete --exclude "notebooks/EXECUTED/*" && aws cloudfront create-invalidation --distribution-id ESWVTZYFL873V --paths "/*" --no-cli-pager - name: Post comment with preview URL uses: actions/github-script@v6 diff --git a/.gitignore b/.gitignore index 9684da543c..f4de6a8af5 100644 --- a/.gitignore +++ b/.gitignore @@ -36,9 +36,6 @@ site/validmind-docs.yaml # Python API docs are now generated on the fly site/validmind -# Generated template schema documentation -site/guide/templates/_template-schema-generated.qmd - # Cursor rules .cursor/rules/ .cursor/skills/ diff --git a/README.md b/README.md index 1734a7b31e..2c188de20c 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ Some documentation content is auto-generated from backend source files. These sc #### Template schema documentation -The template schema reference in `site/guide/templates/customize-document-templates.qmd` is auto-generated from the backend JSON Schema. CI workflows generate this automatically, but you can also regenerate locally: +The template schema reference in `site/guide/templates/customize-document-templates.qmd` is auto-generated from the backend JSON Schema, overwriting any baseline output checked into this repo. You can also regenerate locally and commit: ```bash cd site @@ -172,7 +172,7 @@ make template-schema-docs The script reads from: - `backend/src/backend/templates/documentation/model_documentation/mdd_template_schema_v5.json` — template schema definition -Output: `site/guide/templates/_template-schema-generated.qmd` +Output: Content is injected directly into `site/guide/templates/customize-document-templates.qmd` between marker comments. #### Stylesheet organization (IN PROGRESS) diff --git a/site/releases/breaking-changes/breaking-changes.qmd b/internal/releases-archive/breaking-changes/breaking-changes.qmd similarity index 69% rename from site/releases/breaking-changes/breaking-changes.qmd rename to internal/releases-archive/breaking-changes/breaking-changes.qmd index 5436f8f06d..a71687d175 100644 --- a/site/releases/breaking-changes/breaking-changes.qmd +++ b/internal/releases-archive/breaking-changes/breaking-changes.qmd @@ -1,4 +1,7 @@ --- +# Copyright © 2023-2026 ValidMind Inc. All rights reserved. +# Refer to the LICENSE file in the root of this repository for details. +# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial title: "Breaking changes and deprecations" date: last-modified categories: ["breaking changes", "deprecations"] diff --git a/internal/releases-archive/release-scripts/__pycache__/generate_release_objects.cpython-310.pyc b/internal/releases-archive/release-scripts/__pycache__/generate_release_objects.cpython-310.pyc new file mode 100644 index 0000000000..2627bac9f2 Binary files /dev/null and b/internal/releases-archive/release-scripts/__pycache__/generate_release_objects.cpython-310.pyc differ diff --git a/internal/releases-archive/release-scripts/__pycache__/yearly_cleanup.cpython-310.pyc b/internal/releases-archive/release-scripts/__pycache__/yearly_cleanup.cpython-310.pyc new file mode 100644 index 0000000000..ad28ed051b Binary files /dev/null and b/internal/releases-archive/release-scripts/__pycache__/yearly_cleanup.cpython-310.pyc differ diff --git a/release-scripts/generate-release-notes.ipynb b/internal/releases-archive/release-scripts/generate-release-notes.ipynb similarity index 100% rename from release-scripts/generate-release-notes.ipynb rename to internal/releases-archive/release-scripts/generate-release-notes.ipynb diff --git a/release-scripts/generate_release_objects.py b/internal/releases-archive/release-scripts/generate_release_objects.py similarity index 100% rename from release-scripts/generate_release_objects.py rename to internal/releases-archive/release-scripts/generate_release_objects.py diff --git a/release-scripts/year-end-cleanup.ipynb b/internal/releases-archive/release-scripts/year-end-cleanup.ipynb similarity index 100% rename from release-scripts/year-end-cleanup.ipynb rename to internal/releases-archive/release-scripts/year-end-cleanup.ipynb diff --git a/release-scripts/yearly_cleanup.py b/internal/releases-archive/release-scripts/yearly_cleanup.py similarity index 100% rename from release-scripts/yearly_cleanup.py rename to internal/releases-archive/release-scripts/yearly_cleanup.py diff --git a/scripts/generate_template_schema_docs.py b/scripts/generate_template_schema_docs.py index 3becbac57d..c790489403 100644 --- a/scripts/generate_template_schema_docs.py +++ b/scripts/generate_template_schema_docs.py @@ -30,7 +30,10 @@ BACKEND_ROOT = Path(os.environ.get("BACKEND_ROOT", REPO_ROOT.parent / "backend")) SCHEMA_FILE = BACKEND_ROOT / "src/backend/templates/documentation/model_documentation/mdd_template_schema_v5_ui.json" -OUTPUT_FILE = REPO_ROOT / "site/guide/templates/_template-schema-generated.qmd" +TARGET_FILE = REPO_ROOT / "site/guide/templates/_template-schema-generated.qmd" + +# Minimum expected file size in bytes (sanity check for valid output) +MIN_OUTPUT_SIZE = 40000 # CSS stylesheets to link in the generated HTML STYLESHEETS = [ @@ -62,7 +65,7 @@ def main(): print(f"Generating schema documentation from {SCHEMA_FILE}") # Create temporary output - temp_output = OUTPUT_FILE.with_suffix(".tmp.html") + temp_output = TARGET_FILE.with_suffix(".tmp.html") subprocess.run([ "generate-schema-doc", @@ -98,6 +101,20 @@ def main(): '', html_content ) + # Strip the title tag (not needed when embedded in Quarto page) + html_content = re.sub( + r'\s*[^<]*\s*', + '\n', + html_content + ) + # Strip h1 headings (the page already has a heading from Quarto) + html_content = re.sub( + r'

[^<]*

', + '', + html_content + ) + # Clean up multiple consecutive blank lines + html_content = re.sub(r'\n{3,}', '\n\n', html_content) # Build stylesheet link tags stylesheet_links = '\n'.join( @@ -124,34 +141,45 @@ def main(): '' ) - # Copyright header to place before the raw HTML block - copyright_header = """""" - - # Wrap HTML in Quarto raw HTML block for .qmd file, with comment before - qmd_content = f"""{copyright_header} +Source: {SCHEMA_FILE.relative_to(BACKEND_ROOT.parent)} +--> ```{{=html}} {html_content} ``` """ - # Write final output - OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True) - with open(OUTPUT_FILE, "w") as f: - f.write(qmd_content) + # Validate output before writing + if len(output_content) < MIN_OUTPUT_SIZE: + print(f"Error: Generated output is too small ({len(output_content)} bytes)") + print("This likely indicates a generation failure.") + temp_output.unlink(missing_ok=True) + sys.exit(1) + + if "" not in html_content: + print("Error: Generated output does not contain valid HTML structure") + temp_output.unlink(missing_ok=True) + sys.exit(1) + + # Write to target file + TARGET_FILE.write_text(output_content) # Clean up temp file temp_output.unlink() - print(f"Generated {OUTPUT_FILE}") + print(f"Generated template schema documentation: {TARGET_FILE}") + print(f"Output size: {len(output_content)} bytes") if __name__ == "__main__": diff --git a/site/Makefile b/site/Makefile index 6773da52d5..d3372f286c 100644 --- a/site/Makefile +++ b/site/Makefile @@ -427,7 +427,7 @@ release-notes: template-schema-docs: @echo "\nGenerating template schema documentation ..." @if [ ! -d "$(SRC_ROOT)/backend" ]; then echo "Error: Backend not cloned. Run 'make clone' first."; exit 1; fi - @pip install -q json-schema-for-humans + @python -m pip install -q json-schema-for-humans @BACKEND_ROOT=$(SRC_ROOT)/backend python ../scripts/generate_template_schema_docs.py test-descriptions: diff --git a/site/_extensions/category-filter/_extension.yml b/site/_extensions/category-filter/_extension.yml new file mode 100644 index 0000000000..f12627f1d7 --- /dev/null +++ b/site/_extensions/category-filter/_extension.yml @@ -0,0 +1,6 @@ +title: Category Filter +author: Nik Richers +version: 1.0.0 +contributes: + filters: + - category-filter.lua diff --git a/site/_extensions/category-filter/category-filter.lua b/site/_extensions/category-filter/category-filter.lua new file mode 100644 index 0000000000..62fbdf5ee5 --- /dev/null +++ b/site/_extensions/category-filter/category-filter.lua @@ -0,0 +1,59 @@ +-- Category Filter Lua Extension +-- Reads listing-filter from YAML frontmatter and injects config as a data attribute + +local function meta_to_native(val) + if val == nil then + return nil + end + + local mt = getmetatable(val) + + -- Check for pandoc List type (used for YAML arrays) + if mt and mt.__name == "List" then + local arr = {} + for i, item in ipairs(val) do + arr[i] = meta_to_native(item) + end + return arr + end + + -- Check for pandoc Inlines (YAML string values become Inlines) + if mt and (mt.__name == "Inlines" or mt.__name == "Blocks") then + return pandoc.utils.stringify(val) + end + + -- Plain Lua types + local t = type(val) + if t == "string" or t == "number" or t == "boolean" then + return val + elseif t == "table" then + -- Check if it's an array-like table + if #val > 0 and val[1] ~= nil then + local arr = {} + for i, item in ipairs(val) do + arr[i] = meta_to_native(item) + end + return arr + else + -- It's a map/object + local obj = {} + for k, v in pairs(val) do + obj[k] = meta_to_native(v) + end + return obj + end + end + + -- Fallback: try stringify + return pandoc.utils.stringify(val) +end + +function Meta(meta) + if meta['listing-filter'] then + local config = meta_to_native(meta['listing-filter']) + local json = quarto.json.encode(config) + local escaped = json:gsub("'", "'") + quarto.doc.include_text("after-body", + '') + end +end diff --git a/site/_quarto.yml b/site/_quarto.yml index 3431441f74..b2482ebcbd 100644 --- a/site/_quarto.yml +++ b/site/_quarto.yml @@ -4,6 +4,10 @@ project: type: website + render: + - "**/*.qmd" + - "**/*.ipynb" + - "**/*.md" metadata-files: # - _drafts.yaml diff --git a/site/about/glossary/mcp/_mcp-server.qmd b/site/about/glossary/mcp/_mcp-server.qmd new file mode 100644 index 0000000000..d758930933 --- /dev/null +++ b/site/about/glossary/mcp/_mcp-server.qmd @@ -0,0 +1,6 @@ + + +MCP server +: A service that exposes tools and resources to AI assistants via the MCP protocol. The {{< var vm.product >}} MCP server provides access to model inventory operations. diff --git a/site/about/glossary/mcp/_mcp-tool.qmd b/site/about/glossary/mcp/_mcp-tool.qmd new file mode 100644 index 0000000000..739c6cd6e5 --- /dev/null +++ b/site/about/glossary/mcp/_mcp-tool.qmd @@ -0,0 +1,6 @@ + + +MCP tool +: An operation exposed by an MCP server that AI assistants can invoke to perform actions, such as querying models or updating artifacts. diff --git a/site/about/glossary/mcp/_mcp.qmd b/site/about/glossary/mcp/_mcp.qmd new file mode 100644 index 0000000000..0dca829df8 --- /dev/null +++ b/site/about/glossary/mcp/_mcp.qmd @@ -0,0 +1,6 @@ + + +MCP (Model Context Protocol) +: An open standard that enables AI assistants to connect to external data sources and tools through a unified protocol, allowing natural language interactions with structured systems. diff --git a/site/developer/model-documentation/install-and-initialize-validmind-for-r.qmd b/site/developer/model-documentation/install-and-initialize-validmind-for-r.qmd index f595c8166d..3398550a75 100644 --- a/site/developer/model-documentation/install-and-initialize-validmind-for-r.qmd +++ b/site/developer/model-documentation/install-and-initialize-validmind-for-r.qmd @@ -12,12 +12,12 @@ listing: fields: [title, description] grid-columns: 2 contents: - - path: https://github.com/validmind/validmind-library/blob/main/notebooks/quickstart/quickstart_model_documentation.Rmd - title: "Quickstart for model documentation" - description: "End-to-end model documentation workflow in R: load data, preprocess, train a GLM model, and run the full documentation test suite." - - path: https://github.com/validmind/validmind-library/blob/main/notebooks/quickstart/quickstart_model_validation.Rmd - title: "Quickstart for model validation" - description: "End-to-end model validation workflow in R: load data, run data quality tests, train a champion GLM model, and run model evaluation tests." + - path: https://github.com/validmind/validmind-library/tree/main/notebooks/quickstart/ + title: "Quickstart for model documentation" + description: "End-to-end model documentation workflow in R: load data, preprocess, train a GLM model, and run the full documentation test suite." + - path: https://github.com/validmind/validmind-library/tree/main/notebooks/quickstart/ + title: "Quickstart for model validation" + description: "End-to-end model validation workflow in R: load data, run data quality tests, train a champion GLM model, and run model evaluation tests." --- Use the ValidMind R package to document and validate models built in R. The package interfaces with the {{< var validmind.developer >}} through `reticulate`, giving you access to the full Python API from R. diff --git a/site/developer/supported-models-and-frameworks.qmd b/site/developer/supported-models-and-frameworks.qmd index d1b25d81af..ab9319d529 100644 --- a/site/developer/supported-models-and-frameworks.qmd +++ b/site/developer/supported-models-and-frameworks.qmd @@ -81,6 +81,12 @@ Retrieval-augmented generation pipelines. - Tests: RAGAS integration - Refer to [RAG evaluation](#rag-evaluation) for dataset requirements and available tests + +## Agentic AI +LLM-based agents that use tools and multi-step reasoning. + +- Scorers: DeepEval integration +- Refer to [Agentic AI evaluation](#agentic-ai-evaluation) for available scorers ::: ## Machine learning models @@ -465,6 +471,28 @@ run_test( - **Context Precision** — Measures relevance of retrieved contexts - **Answer Relevancy** — Assesses if the response addresses the query +## Agentic AI evaluation + +For agentic AI systems, the {{< var validmind.developer >}} integrates with [DeepEval](https://docs.confident-ai.com/) for trace-based evaluation of tool usage and reasoning. + +### Available agentic scorers + +- **TaskCompletion** — Assesses whether the agent achieves the requested outcome +- **PlanQuality** — Measures if generated plans are logical, complete, and efficient +- **PlanAdherence** — Evaluates whether the agent follows its plan during execution +- **ToolCorrectness** — Validates that the agent invokes the expected tools +- **ArgumentCorrectness** — Checks if arguments passed to tools are correct + +### Scorer requirements + +| Scorer | Requires model | Parameters | +|--------|----------------|------------| +| TaskCompletion | Yes (`predict_fn`) | `input_column` | +| PlanQuality | Yes (`predict_fn`) | `input_column` | +| PlanAdherence | Yes (`predict_fn`) | `input_column` | +| ToolCorrectness | No | `input_column`, `expected_tools_called_column`, `actual_tools_called_column` | +| ArgumentCorrectness | No | `input_column`, `actual_tools_called_column` | + ## Python and dependency compatibility The {{< var validmind.developer >}} requires: diff --git a/site/guide/_sidebar.yaml b/site/guide/_sidebar.yaml index b5d7d6dcda..fd3b304124 100644 --- a/site/guide/_sidebar.yaml +++ b/site/guide/_sidebar.yaml @@ -42,9 +42,10 @@ website: contents: - guide/integrations/manage-secrets.qmd - guide/integrations/configure-connections.qmd - - guide/integrations/configure-analytics-exports.qmd - guide/integrations/implement-custom-integrations.qmd - guide/integrations/link-external-models.qmd + - guide/mcp/connect-ai-assistants-via-mcp.qmd + - guide/integrations/configure-analytics-exports.qmd - guide/integrations/integrations-examples.qmd - text: "---" - text: "Workflows" diff --git a/site/guide/integrations/managing-integrations.qmd b/site/guide/integrations/managing-integrations.qmd index bfa1708256..41bcce250e 100644 --- a/site/guide/integrations/managing-integrations.qmd +++ b/site/guide/integrations/managing-integrations.qmd @@ -33,6 +33,7 @@ graph TD E[Public REST API] F[Testing & documentation] G[Service integration] + H[AI assistants] A <--> B A <--> C @@ -40,6 +41,7 @@ graph TD A <--> E A <--> F A <--> G + A <--> H ``` @@ -194,6 +196,29 @@ Link to tickets in external systems and keep them synchronized. Attach tickets t :::: +--- + +### AI assistants + +Connect AI assistants to {{< var vm.product >}} using the Model Context Protocol (MCP). Query and manage your model inventory through natural language conversations.[^7] + +**Supported** + +:::: {.flex .flex-wrap .justify-around} + +::: {.w-33-ns} +- Claude Desktop +::: + +::: {.w-33-ns} +- Cursor IDE +::: + +::: {.w-33-ns} +  +::: + +:::: ## Key concepts @@ -233,4 +258,5 @@ Link to tickets in external systems and keep them synchronized. Attach tickets t [^3]: [Introduction to workflows](/guide/workflows/introduction-to-workflows.qmd) [^4]: [Public REST API](/reference/validmind-rest-api-vm.qmd) [^5]: [Testing & documentation](/developer/validmind-library.qmd) -[^6]: [Configure analytics exports](/guide/integrations/configure-analytics-exports.qmd) \ No newline at end of file +[^6]: [Configure analytics exports](/guide/integrations/configure-analytics-exports.qmd) +[^7]: [Connect AI assistants](/guide/mcp/connect-ai-assistants-via-mcp.qmd) \ No newline at end of file diff --git a/site/guide/mcp/connect-ai-assistants-via-mcp.qmd b/site/guide/mcp/connect-ai-assistants-via-mcp.qmd new file mode 100644 index 0000000000..a2e66f0717 --- /dev/null +++ b/site/guide/mcp/connect-ai-assistants-via-mcp.qmd @@ -0,0 +1,198 @@ +--- +# Copyright © 2023-2026 ValidMind Inc. All rights reserved. +# Refer to the LICENSE file in the root of this repository for details. +# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +title: "Connect AI assistants via MCP" +date: last-modified +--- + +The {{< var vm.product >}} MCP (Model Context Protocol) server enables AI assistants to query and manage your model inventory through natural language, providing seamless access to models, artifacts, templates, and custom fields. + + +::: {.attn} + +## Prerequisites + +- [x] {{< var link.login >}} +- [x] You have your API key and secret.[^1] +- [x] You have a compatible AI assistant installed: + - Cursor IDE[^2] + - Claude Code[^3] + +::: + +## Key concepts + +{{< include /about/glossary/mcp/_mcp.qmd >}} + +{{< include /about/glossary/mcp/_mcp-server.qmd >}} + +{{< include /about/glossary/mcp/_mcp-tool.qmd >}} + +### How does {{< var vm.product >}} MCP work? + +```{mermaid} +graph LR + subgraph ai [AI assistants] + Claude[Claude Code] + Cursor[Cursor IDE] + end + + subgraph mcp [MCP layer] + MCPServer[ValidMind MCP Server] + end + + subgraph vm [ValidMind Platform] + Models[Models] + Artifacts[Artifacts] + Templates[Templates] + CustomFields[Custom fields] + end + + Claude --> MCPServer + Cursor --> MCPServer + MCPServer --> Models + MCPServer --> Artifacts + MCPServer --> Templates + MCPServer --> CustomFields +``` + +Your AI assistant sends natural language queries through the MCP protocol. The {{< var vm.product >}} MCP server translates these into API calls, authenticates using your API key, and returns data from the {{< var validmind.platform >}}. + +### What can I do with {{< var vm.product >}} MCP? + +The MCP server exposes tools for working with your model inventory: + +:::: {.flex .flex-wrap .justify-around} + +::: {.w-50-ns .pr3} + +**Model and artifact operations** + +- List and get models +- List and get artifacts +- Filter by risk level, deployment region, or ownership + +**Custom field operations** + +- List custom fields for models and artifacts +- Update custom field values + +::: + +::: {.w-50-ns .pl3} + +**Template operations** + +- List available templates +- Get template details +- Validate templates +- Duplicate and update templates + +::: + +:::: + +**Example queries you can ask:** + +- "Show me all models that are high risk" +- "Find all the models I am an owner for" +- "List my validation artifacts" +- "What templates are available for credit risk models?" + +## Configure your AI assistant + +::: {.panel-tabset} + +### Cursor IDE + +1. In Cursor, open **Settings** > **Cursor Settings** and navigate to the **MCP** section. + +2. Click **Add new global MCP server** to open your `~/.cursor/mcp.json` file. + +3. Add the {{< var vm.product >}} MCP server configuration: + + ```json + { + "mcpServers": { + "validmind": { + "url": "https://api.prod.validmind.ai/mcp", + "headers": { + "x-api-key": "YOUR_API_KEY", + "x-api-secret": "YOUR_API_SECRET" + } + } + } + } + ``` + +4. Replace `YOUR_API_KEY` and `YOUR_API_SECRET` with your ValidMind credentials.[^1] + +5. Save the file and reload Cursor with the **Command/Ctrl + Shift + P** > **Developer: Reload Window** shortcut. + +6. Return to **Cursor Settings > MCP** and verify that the ValidMind server appears. If it shows as disabled, click the toggle to enable it. + +7. Try asking Cursor questions like: + + - "What models do I own?" + - "Show me all tier 1 models" + - "Register a new model called Customer Churn" + +### Claude Code + +1. In Claude Code, add the {{< var vm.product >}} MCP server (this updates your `~/.claude.json` file): + + ```bash + claude mcp add --transport http validmind https://api.prod.validmind.ai/mcp \ + --header "x-api-key: YOUR_API_KEY" \ + --header "x-api-secret: YOUR_API_SECRET" + ``` + +2. Replace `YOUR_API_KEY` and `YOUR_API_SECRET` with your ValidMind credentials.[^1] + +3. Verify the connection: + + ```bash + claude mcp list + ``` + + You should see `validmind` with status `✓ Connected`. + +4. Start Claude Code and try asking questions like: + + - "What models do I own?" + - "Show me all tier 1 models" + - "Register a new model called Customer Churn" + +::: + +## Troubleshooting + +::: {.panel-tabset} + +### Connection refused or timeout errors + +- Verify your network can reach `api.prod.validmind.ai`. +- Check if your organization uses a firewall or proxy that blocks MCP connections. +- Ensure you are using the correct MCP URL for your environment. + +### Authentication errors (401 or 403) + +- Confirm your API key and secret are correct. +- Check that your API credentials have not expired. +- Verify your account has the necessary permissions to access the requested resources. + +### MCP server not appearing in Cursor + +- Ensure the configuration file is saved at `~/.cursor/mcp.json`. +- Validate the JSON syntax in your configuration file. +- Reload Cursor after making configuration changes. +- Check **Cursor Settings > MCP** to verify the server appears and is enabled. You may need to manually toggle the server on after adding it to the configuration. + +::: + + + +[^1]: [Get your API key and secret](/reference/validmind-rest-api-vm.qmd#get-your-api-key-and-secret) +[^2]: [Cursor](https://cursor.com) +[^3]: [Claude Code](https://docs.anthropic.com/en/docs/claude-code) diff --git a/site/guide/model-validation/manage-artifact-types.qmd b/site/guide/model-validation/manage-artifact-types.qmd index 0f3b7d35a6..8f3669d081 100644 --- a/site/guide/model-validation/manage-artifact-types.qmd +++ b/site/guide/model-validation/manage-artifact-types.qmd @@ -13,7 +13,7 @@ aliases: #### Artifact types -By default, the {{< var validmind.platform >}} provides three native artifact types: +By default, the {{< var validmind.platform >}} provides three artifact types: Validation Issue : Problems discovered during model validation that reflect errors, inconsistencies, or gaps in data, methodology, implementation, or documentation that need to be addressed before the model is considered reliable. @@ -75,7 +75,7 @@ Custom artifact types support the same features as default types, including stat ## Archive and delete artifact types ::: {.callout-important title="Deletion of artifact types is permanent."} -- Native (default) artifact types cannot be deleted, only archived.[^4] When archiving a native artifact type, existing artifacts of that type will be hidden and no new artifacts of that type can be created. +- Default artifact types cannot be deleted, only archived.[^4] When archiving a default artifact type, existing artifacts of that type will be hidden and no new artifacts of that type can be created. - When archiving a custom artifact type, existing artifacts of that type will remain viewable, but no new artifacts of that type can be created. ::: diff --git a/site/guide/templates/_template-schema-generated.qmd b/site/guide/templates/_template-schema-generated.qmd new file mode 100644 index 0000000000..8edc74c349 --- /dev/null +++ b/site/guide/templates/_template-schema-generated.qmd @@ -0,0 +1,1143 @@ + + +```{=html} + + + + + + + + + + + + + + +
+ + +
+ + Type: array
+

Top-level documentation sections shown in the template editor.

+
+ + + + + + No Additional Items

Each item of this array must be:

+
+
+ + + Type: object
+

A section in the document tree.

+
+ + No Additional Properties + + + + + + +
+
+
+

+ +

+
+ +
+
+ + Type: string
+

Unique identifier for the section.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: string
+

Title of the section.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: string
+

Description of the section.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: integer
+

Order of the section in the navigation menu. By default sections are ordered alphabetically. If order is specified, sections will be ordered by the order value, and then alphabetically.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: string
+

Default text for the section. If set, a metadata content row will be created with this text when installing the template on a given project.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: boolean
+

If true, the section will be displayed in the navigation menu, but it will not be accessible via direct link.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: boolean
+

If true, the section will condense all of its subsections into a single section.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: array of string
+

Documentation or validation guidelines for the section.

+
+ + + + + + No Additional Items

Each item of this array must be:

+
+
+ + + Type: string
+ + + + + + + +
+
+
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: array
+

Contents to be displayed on the section.

+
+ + + + + + No Additional Items

Each item of this array must be:

+
+
+ + + Type: object
+

Single content block rendered within a section.

+
+ + + + + + + + +
+
+
+

+ +

+
+ +
+
+ + Type: enum (of string) Default: "text"
+
+

Must be one of:

+
  • "text"
  • "metadata_text"
  • "dynamic"
  • "metric"
  • "test"
  • "guideline"
  • "assessment_summary"
  • "unit_metric"
+
+ + + + + +
+
Examples:
+
"text"
+
+
"test"
+
+
+
+
+
+
+
+
+
+

+ +

+
+ +
+
+ +
+

ID of the content to be displayed for the given content type.

+

+ +

+
+ + + Type: string
+ + + + + + +
+
Example:
+
"sample_text"
+
+
+
+ + + Type: array of string
+ + + + + + + No Additional Items

Each item of this array must be:

+
+
+ + + Type: string
+ + + + + + + +
+

+
Example:
+
[
+    "sample_text",
+    "section_intro"
+]
+
+
+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: object
+

Options for the content block.

+
+ + + + + +
+
Examples:
+
{
+    "default_text": "This is a sample text block."
+}
+
+
{
+    "metric_id": "metric_1",
+    "title": "Custom Title for Metric 1"
+}
+
+
{
+    "test_id": "adf_test"
+}
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: string
+

Default text for the content block. Only applicable for text content blocks.

+
+ + + + + + +
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: string
+

Title of the content block. Only applicable for metric and test content blocks.

+
+ + + + + + +
+
+
+
+
+
+
+
+
+
+
+

+ +

+
+ +
+

Additional Properties of any type are allowed.

+ + Type: object
+ + + + + + + +
+
+
+
+
+
+
+
+
+
+
+
+
+

+ +

+
+ +
+
+ + Type: array
+

Nested child sections.

+
+ + + + + + No Additional Items

Each item of this array must be:

+
+
+ + + Type: object
+

A section in the document tree.

+
Same definition as items +
+
+
+
+
+
+
+
+ +
+ +``` diff --git a/site/guide/templates/customize-document-templates.qmd b/site/guide/templates/customize-document-templates.qmd index fcbf2e1876..78c6ab6641 100644 --- a/site/guide/templates/customize-document-templates.qmd +++ b/site/guide/templates/customize-document-templates.qmd @@ -75,12 +75,6 @@ Once saved, your new template version becomes available for use. ### Template schema - - ::: {.column-page-inset-right} {{< include _template-schema-generated.qmd >}} @@ -173,4 +167,3 @@ c. Finish editing your template, then save a new version. [^9]: [Edit YAML templates](#edit-yaml-templates) [^10]: [Template schema](#template-schema) - diff --git a/site/guide/workflows/_conditional-requirements.qmd b/site/guide/workflows/_conditional-requirements.qmd index 85429514b0..95257ad888 100644 --- a/site/guide/workflows/_conditional-requirements.qmd +++ b/site/guide/workflows/_conditional-requirements.qmd @@ -6,6 +6,7 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> Conditional requirements are required or optional for the following step types:^[[Workflow step types](/guide/workflows/workflow-step-types.qmd)] ### {{< fa cube >}} Model Stage Change [optional]{.smallercaps .pink} + 1. Under **[when these conditions are met]{.smallcaps}**, set your `AND` and `OR` conditions. 2. Add a single independent rule or a linked condition group. These rules and groups can be nested if desired: @@ -13,7 +14,8 @@ Conditional requirements are required or optional for the following step types:^ - Click **{{< fa plus >}} Add Rule** to add an independent rule. - Click **{{< fa plus-minus >}} Add Group** to add a linked group of rules that all must be true to qualify. -### {{< fa arrow-pointer >}} User Action v2 [optional]{.smallercaps .pink} +### {{< fa arrow-pointer >}} User Action [optional]{.smallercaps .pink} + ::: {.panel-tabset} @@ -54,6 +56,7 @@ Conditional requirements are required or optional for the following step types:^ ### {{< fa users >}} Approval [required]{.smallercaps .pink} + 1. Under **[approval group]{.smallcaps}**, click **{{< fa plus >}} Add Individual Approver** to set an individual approver selected from **User Roles**,^[[Manage roles](/guide/configuration/manage-roles.qmd)] **Model Stakeholder Types**,^[[Manage model stakeholder types](/guide/configuration/manage-model-stakeholder-types.qmd)] or **User** type model inventory fields:^[[Manage model inventory fields](/guide/model-inventory/manage-model-inventory-fields.qmd#inventory-field-types)] @@ -72,10 +75,12 @@ Conditional requirements are required or optional for the following step types:^ ### {{< fa bullhorn >}} Broadcast [required]{.smallercaps .pink} + Select from either **Model Stakeholder Types**^[[Manage model stakeholder types](/guide/configuration/manage-model-stakeholder-types.qmd)] or **User Roles**.^[[Manage roles](/guide/configuration/manage-roles.qmd)] ### {{< fa maximize >}} Condition Branch [required]{.smallercaps .pink} + 1. Under each branch's **[conditions]{.smallcaps}**, set your `AND` and `OR` conditions. 2. Add a single independent rule or a linked condition group. These rules and groups can be nested if desired: @@ -84,6 +89,7 @@ Select from either **Model Stakeholder Types**^[[Manage model stakeholder types] - Click **{{< fa plus-minus >}} Add Group** to add a linked group of rules that all must be true to qualify. ### {{< fa clock >}} Wait [required]{.smallercaps .pink} + Select when you would like to [resume workflow]{.smallcaps}: @@ -113,6 +119,7 @@ c. Enter in a **[wait duration]{.smallcaps}** in minutes, hours, days, or months ### {{< fa cloud-arrow-up >}} HTTP Request [optional]{.smallercaps .pink} + ::: {.panel-tabset} @@ -154,7 +161,7 @@ Configure conditional requirements for the following step types: - {{< fa cube >}} Model Stage Change **[optional]{.smallercaps}** -- {{< fa arrow-pointer >}} User Action v2 **[optional]{.smallercaps}** +- {{< fa arrow-pointer >}} User Action **[optional]{.smallercaps}** - {{< fa users >}} Approval **[required]{.smallercaps}** diff --git a/site/guide/workflows/_initiate-workflows.qmd b/site/guide/workflows/_initiate-workflows.qmd index 2561b9d105..5053cdfbf0 100644 --- a/site/guide/workflows/_initiate-workflows.qmd +++ b/site/guide/workflows/_initiate-workflows.qmd @@ -7,7 +7,7 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> By default, the [{{< fa hand >}} Customer Admin]{.bubble} role has sufficient permissions to manually start workflows. ::: -To manually start workflows[^initiate-workflows]: +To initiate workflows manually:[^initiate-workflows] ::: {.panel-tabset} @@ -43,35 +43,6 @@ To manually start workflows[^initiate-workflows]: ::: -### Resume workflows - -To manually resume a workflow in a {{< fa clock >}} Wait state:^[[Conditional requirements](/guide/workflows/conditional-requirements.qmd)] - -::: {.panel-tabset} - -#### On models - -1. In the left sidebar, click **{{< fa cubes >}} Inventory**. - -1. Select a model or find your model by applying a filter or searching for it.^[[Working with the model inventory](/guide/model-inventory/working-with-model-inventory.qmd#search-filter-and-sort-models)] - -1. On the landing page of your model, locate the [active workflows]{.smallcaps} section. - -1. For the workflow you'd like to manually advance, click **{{< fa arrow-right-arrow-left >}} Resume Workflow** to open up the workflow's step overview. - -1. On the **{{< fa clock >}} Wait** step, double-click on **{{< fa arrow-right-arrow-left >}}** to advance the workflow immediately. - -#### On artifacts - -1. On the details page of your artifact,^[[View and filter artifacts](/guide/model-validation/view-filter-artifacts.qmd#view-artifacts)] locate the [active workflows]{.smallcaps} section. - -1. For the workflow you'd like to manually advance, click **{{< fa arrow-right-arrow-left >}} Resume Workflow** to open up the workflow's step overview. - -1. On the **{{< fa clock >}} Wait** step, double-click on **{{< fa arrow-right-arrow-left >}}** to advance the workflow immediately. - -::: - - [^initiate-workflows]: diff --git a/site/guide/workflows/_workflow-step-types.qmd b/site/guide/workflows/_workflow-step-types.qmd index e2abf4c311..dded8cef05 100644 --- a/site/guide/workflows/_workflow-step-types.qmd +++ b/site/guide/workflows/_workflow-step-types.qmd @@ -5,17 +5,20 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> :::: {.content-visible unless-format="revealjs"} ### {{< fa cube >}} Model Stage Change + - Transitions a model into another stage. - Requires available model stages associated with the workflow.^[[Manage model stages](/guide/workflows/manage-model-stages.qmd)] | Field | Description | |---:|---| -| [when these conditions are met]{.smallcaps} (optional) | Add conditional requirements to qualify for this step.^[ [Conditional requirements](/guide/workflows/conditional-requirements.qmd#model-stage-change-optional)] | +| [when these conditions are met]{.smallcaps} (optional) | Add conditional requirements to qualify for this step.^[ [Conditional requirements](/guide/workflows/conditional-requirements.qmd#model-stage-change)] | | [set model stage to]{.smallcaps} | Select the model stage to transition to. | : **{{< fa cube >}} Model Stage Change** step configuration {.hover tbl-colwidths="[40,60]"} ### {{< fa wifi >}} Workflow State Change + + Transitions the workflow into another workflow state.^[[Workflow states](/guide/workflows/workflow-states.qmd)] | Field | Description | @@ -24,19 +27,21 @@ Transitions the workflow into another workflow state.^[[Workflow states](/guide/ | [progress]{.smallcaps} | Indicate the progress of the workflow at this transition into the next state. | : **{{< fa wifi >}} Workflow State Change** step configuration {.hover tbl-colwidths="[30,70]"} -### {{< fa arrow-pointer >}} User Action v2 +### {{< fa arrow-pointer >}} User Action + Creates a button that performs an action on behalf of the user. | Field | Description | |---:|---| -| [display action button when]{.smallcaps} (optional) | Add conditional requirements for displaying this step.^[[Conditional requirements](/guide/workflows/conditional-requirements.qmd#user-action-v2-optional)] | +| [display action button when]{.smallcaps} (optional) | Add conditional requirements for displaying this step.^[[Conditional requirements](/guide/workflows/conditional-requirements.qmd#user-action)] | | [action button label]{.smallcaps} | Text that displays on your action button. | | [requested fields]{.smallcaps} (optional) | Include inventory fields alongside your action.[^step-types-1] | | [user action deadline]{.smallcaps} (optional) | Include a deadline for when users should complete this action by.^[Toggle **Enable deadline** on to configure a deadline.] | -: **{{< fa arrow-pointer >}} User Action v2** step configuration {.hover tbl-colwidths="[40,60]"} +: **{{< fa arrow-pointer >}} User Action** step configuration {.hover tbl-colwidths="[40,60]"} ### {{< fa users >}} Approval + Display a preconfigured message to users in the selected **[approval group]{.smallcaps}**. @@ -51,6 +56,7 @@ Display a preconfigured message to users in the selected **[approval group]{.sma : **{{< fa users >}} Approval** step configuration {.hover tbl-colwidths="[35,65]"} ### {{< fa bullhorn >}} Broadcast + - Send email notifications to users in selected groups. - Select from either **User Roles** or **Model Stakeholder Types**.^[[Conditional requirements](/guide/workflows/conditional-requirements.qmd#broadcast-required)] @@ -63,6 +69,7 @@ Display a preconfigured message to users in the selected **[approval group]{.sma : **{{< fa bullhorn >}} Broadcast** step configuration {.hover tbl-colwidths="[25,75]"} ### {{< fa maximize >}} Condition Branch + - Allows diverging paths based on conditional requirements. - A `default` branch will be automatically included as a catch-all `else` path to ensure that your conditional logic is complete. @@ -83,6 +90,7 @@ To configure a condition branch: Proceed with linking the Condition Branch with your subsequent steps.^[[Link workflow together](/guide/workflows/configure-workflows.qmd#link-workflow-together)] ### {{< fa clock >}} Wait + - Creates a time condition for displaying next available action.^[[Conditional requirements](/guide/workflows/conditional-requirements.qmd#wait-required)] - Used to enforce a time delay or a calendar date milestone. @@ -103,10 +111,12 @@ To configure a wait step, select when you would like to [resume workflow]{.small : **{{< fa clock >}} Wait** step configuration {.hover tbl-colwidths="[30,15,45]"} ### {{< fa arrow-right-arrow-left >}} Run Workflow + Triggers another workflow indicated by **[select workflow to run]{.smallcaps}**, allowing you to chain workflows. ### {{< fa circle-nodes >}} Webhook + When a workflow reaches a webhook step, the workflow will pause. Listen for a `POST` request to resume the workflow: @@ -137,8 +147,9 @@ Send your `POST` request to the webhook URL and include these headers with your ::: ### {{< fa cloud-arrow-up >}} HTTP Request + -Sends a HTTP request with optional additional conditions.^[[Conditional requirements](/guide/workflows/conditional-requirements.qmd#http-request-optional)] +Sends a HTTP request with optional additional conditions.^[[Conditional requirements](/guide/workflows/conditional-requirements.qmd#http-request)] | Field | Description | |---:|---| @@ -154,6 +165,7 @@ Sends a HTTP request with optional additional conditions.^[[Conditional requirem ### {{< fa circle-stop >}} End + - Designates the end of your workflow. - Use this step when all of your other steps have been configured and linked together.^[[Link workflow together](/guide/workflows/configure-workflows.qmd#link-workflow-together)] @@ -162,7 +174,7 @@ Sends a HTTP request with optional additional conditions.^[[Conditional requirem [^step-types-1]: - - Select from the drop-down to add it to your User Action v2 step. + - Select from the drop-down to add it to your User Action step. - [Manage model inventory fields](/guide/model-inventory/manage-model-inventory-fields.qmd) Users must have write permissions for the field in order to enter values. @@ -208,7 +220,7 @@ Transitions a model into another stage. Transitions the workflow into another workflow state. -#### {{< fa arrow-pointer >}} User Action v2 +#### {{< fa arrow-pointer >}} User Action Creates a button that performs an action on behalf of the user. diff --git a/site/guide/workflows/configure-workflows.qmd b/site/guide/workflows/configure-workflows.qmd index f69884b85d..f5f0f325a6 100644 --- a/site/guide/workflows/configure-workflows.qmd +++ b/site/guide/workflows/configure-workflows.qmd @@ -70,6 +70,13 @@ To create a copy of an existing workflow: ## Edit existing workflows +::: {.callout title="Saving edits to a workflow already executed on models or artifacts will create a new version of the workflow."} + +- Workflows executed on models or artifacts will not be updated to the new version. To apply the latest version of a workflow, abort the existing version of the workflow and manually initiate the workflow again.[^6] +- Older versions of a workflow are read-only and cannot be edited or initiated on models or artifacts. + +::: + You can edit a workflow in several ways: ::: {.panel-tabset} @@ -86,27 +93,30 @@ You can edit a workflow in several ways: 1. In the left sidebar, click **{{< fa cubes >}} Inventory**. -2. Select a model or find your model by applying a filter or searching for it.[^6] +2. Select a model or find your model by applying a filter or searching for it.[^7] 3. On the landing page of your model, locate the [active workflows]{.smallcaps} section. -4. Click on the name of the workflow you'd like to abort to open that specific workflow's details. +4. Click on the name of the workflow you'd like to edit to open that specific workflow's details. 5. On the workflow's detail modal, click on the **{{< fa ellipsis-vertical >}}** in the top-right hand corner and select **{{< fa pencil >}} Edit Workflow**. #### Via workflows applied to artifacts -1. On the details page of your artifact,[^7] locate the [active workflows]{.smallcaps} section. +1. On the details page of your artifact,[^8] locate the [active workflows]{.smallcaps} section. -2. Click on the name of the workflow you'd like to abort to open that specific workflow's details. +2. Click on the name of the workflow you'd like to edit to open that specific workflow's details. 3. On the workflow's detail modal, click on the **{{< fa ellipsis-vertical >}}** in the top-right hand corner and select **{{< fa pencil >}} Edit Workflow**. ::: -a. Make your desired changes to step configuration[^8] and step relationships[^9] on your workflow. +a. Make your desired changes to step configuration[^9] and step relationships[^10] on your workflow. + +b. When you are finished, click **Save New Version** to apply your changes. + +c. Enter in your **[version notes]{.smallcaps}** to describe your changes. -b. When you are finished, click **Save Workflow** to apply your changes. ### Delete workflow steps @@ -167,11 +177,13 @@ Deleting workflow steps on workflows active on models may result in malfunctioni [^5]: If a cloned workflow's [workflow start]{.smallcaps} trigger conflicts with an existing workflow, the cloned workflow will be set to manual initiation instead. -[^6]: [Working with the model inventory](/guide/model-inventory/working-with-model-inventory.qmd#search-filter-and-sort-models) +[^6]: [Manage workflows](/guide/workflows/manage-workflows.qmd#update-workflow-versions) + +[^7]: [Working with the model inventory](/guide/model-inventory/working-with-model-inventory.qmd#search-filter-and-sort-models) -[^7]: [View and filter artifacts](/guide/model-validation/view-filter-artifacts.qmd#view-artifacts) +[^8]: [View and filter artifacts](/guide/model-validation/view-filter-artifacts.qmd#view-artifacts) -[^8]: [Configure workflow steps](#configure-workflow-steps) +[^9]: [Configure workflow steps](#configure-workflow-steps) -[^9]: [Link workflow together](#link-workflow-together) +[^10]: [Link workflow together](#link-workflow-together) diff --git a/site/guide/workflows/manage-workflows.qmd b/site/guide/workflows/manage-workflows.qmd index 1ae776e73a..372a02f986 100644 --- a/site/guide/workflows/manage-workflows.qmd +++ b/site/guide/workflows/manage-workflows.qmd @@ -35,6 +35,47 @@ Review workflows associated with your models, and initiate or abort workflows co {{< include /guide/workflows/_initiate-workflows.qmd >}} +### Resume workflows + +To manually resume a workflow in a {{< fa clock >}} Wait state:[^4] + +::: {.panel-tabset} + +#### On models + +1. In the left sidebar, click **{{< fa cubes >}} Inventory**. + +1. Select a model or find your model by applying a filter or searching for it.[^5] + +1. On the landing page of your model, locate the [active workflows]{.smallcaps} section. + +1. For the workflow you'd like to manually advance, click **{{< fa arrow-right-arrow-left >}} Resume Workflow** to open up the workflow's step overview. + +1. On the **{{< fa clock >}} Wait** step, double-click on **{{< fa arrow-right-arrow-left >}}** to advance the workflow immediately. + +#### On artifacts + +1. On the details page of your artifact,[^6] locate the [active workflows]{.smallcaps} section. + +1. For the workflow you'd like to manually advance, click **{{< fa arrow-right-arrow-left >}} Resume Workflow** to open up the workflow's step overview. + +1. On the **{{< fa clock >}} Wait** step, double-click on **{{< fa arrow-right-arrow-left >}}** to advance the workflow immediately. + +:::: + +## Update workflow versions + +::: {.callout-important} +Applying the latest version of a workflow will reset the workflow to the beginning. + +::: + +If you have updated a workflow's configuration and want to apply the newest version of the workflow to your models or artifacts:[^7] + +1. Abort the existing execution of the workflow.[^8] + +2. Manually initiate the workflow again.[^9] + ## Abort workflows {{< include /guide/workflows/_abort-workflows.qmd >}} @@ -46,4 +87,16 @@ Review workflows associated with your models, and initiate or abort workflows co [^2]: [Configure workflows](configure-workflows.qmd) -[^3]: [Manage permissions](/guide/configuration/manage-permissions.qmd) \ No newline at end of file +[^3]: [Manage permissions](/guide/configuration/manage-permissions.qmd) + +[^4]: [Conditional requirements](conditional-requirements.qmd#wait) + +[^5]: [Working with the model inventory](/guide/model-inventory/working-with-model-inventory.qmd#search-filter-and-sort-models) + +[^6]: [View and filter artifacts](/guide/model-validation/view-filter-artifacts.qmd#view-artifacts) + +[^7]: [Configure workflows](configure-workflows.qmd#edit-existing-workflows) + +[^8]: [Abort workflows](/guide/workflows/manage-workflows.qmd#abort-workflows) + +[^9]: [Initiate workflows](/guide/workflows/manage-workflows.qmd#initiate-workflows) \ No newline at end of file diff --git a/site/guide/workflows/workflow-configuration-examples.qmd b/site/guide/workflows/workflow-configuration-examples.qmd index cea959bb99..63d23581f6 100644 --- a/site/guide/workflows/workflow-configuration-examples.qmd +++ b/site/guide/workflows/workflow-configuration-examples.qmd @@ -62,6 +62,7 @@ For more information, refer to [Integrations examples](/guide/integrations/integ Detail views of the demo workflow step types[^11] used in the example workflows: #### {{< fa cube >}} Model Stage Change + In this example, the model transitions into a stage[^12] called [In Documentation]{.bubble}: @@ -83,6 +84,7 @@ In this example, the model transitions into a stage[^12] called [In Documentatio :::: #### {{< fa wifi >}} Workflow State Change + In this example, the workflow transitions into a state[^13] called [Model In Development]{.bubble} and progresses the workflow completion to 25%: @@ -101,18 +103,19 @@ In this example, the workflow transitions into a state[^13] called [Model In Dev :::: -#### {{< fa arrow-pointer >}} User Action v2 +#### {{< fa arrow-pointer >}} User Action + In this example, the user is presented with an action button labelled **{{< fa arrow-right >}} Submit Documentation For Review** that requires them to include a model export in a [model export]{.smallcaps} attachment type field, then input the date the model was **Submitted for Initial Validation On**.[^14] :::: {.flex .flex-wrap .justify-around} ::: {.w-60-ns} -![Example {{< fa arrow-pointer >}} User Action v2 configuration modal](configure_user-action.png){width="90%" fig-alt="A screenshot showing an example User Action v2 configuration modal" .screenshot group="user-action"} +![Example {{< fa arrow-pointer >}} User Action configuration modal](configure_user-action.png){width="90%" fig-alt="A screenshot showing an example User Action configuration modal" .screenshot group="user-action"} ::: ::: {.w-40-ns} -![Example {{< fa arrow-pointer >}} User Action v2 step](example_user-action.png){fig-alt="A screenshot showing an example User Action v2 step" .screenshot group="user-action"} +![Example {{< fa arrow-pointer >}} User Action step](example_user-action.png){fig-alt="A screenshot showing an example User Action step" .screenshot group="user-action"} ::: @@ -121,11 +124,11 @@ In this example, the user is presented with an action button labelled **{{< fa a :::: {.flex .flex-wrap .justify-around} ::: {.w-50-ns .pr3} -![Example {{< fa arrow-pointer >}} User Action v2 submission panel requesting upload of a model export](example_user-action-submit1.png){fig-alt="A screenshot showing an example User Action v2 submission panel requesting upload of a model export" .screenshot group="user-action"} +![Example {{< fa arrow-pointer >}} User Action submission panel requesting upload of a model export](example_user-action-submit1.png){fig-alt="A screenshot showing an example User Action submission panel requesting upload of a model export" .screenshot group="user-action"} ::: ::: {.w-50-ns} -![Example {{< fa arrow-pointer >}} User Action v2 submission panel requesting input of initial submission date](example_user-action-submit2.png){fig-alt="A screenshot showing an example User Action v2 submission panel requesting input of initial submission date" .screenshot group="user-action"} +![Example {{< fa arrow-pointer >}} User Action submission panel requesting input of initial submission date](example_user-action-submit2.png){fig-alt="A screenshot showing an example User Action submission panel requesting input of initial submission date" .screenshot group="user-action"} ::: @@ -133,6 +136,7 @@ In this example, the user is presented with an action button labelled **{{< fa a #### {{< fa users >}} Approval + In this example, the [peer reviewers]{.smallcaps} model stakeholders[^15] assigned to the model are presented with an approval message and a `Data Sources` field to review: @@ -154,6 +158,7 @@ In this example, the [peer reviewers]{.smallcaps} model stakeholders[^15] assign ![Example {{< fa users >}} Approval message](example_approval-message.png){fig-alt="A screenshot showing an example Approval message" width=90% .screenshot group="approval"} #### {{< fa bullhorn >}} Broadcast + In this example, the [Customer Admin]{.bubble} user role[^16] is sent an email notification after a model is deployed: @@ -175,6 +180,7 @@ In this example, the [Customer Admin]{.bubble} user role[^16] is sent an email n :::: #### {{< fa maximize >}} Condition Branch + In this example, models where the field [genai model]{.smallcaps} is set to true undergo a special [legal review]{.smallcaps} model stakeholder[^17] approval process:[^18] @@ -194,6 +200,7 @@ In this example, models where the field [genai model]{.smallcaps} is set to true :::: #### {{< fa clock >}} Wait + In this example, the next step on the workflow waits until the timestamp indicated in a [deployment scheduled]{.smallcaps} date time field:[^19] @@ -213,6 +220,7 @@ In this example, the next step on the workflow waits until the timestamp indicat :::: #### {{< fa arrow-right-arrow-left >}} Run Workflow + In this example, the current workflow initiates another workflow called `On model validation`: @@ -232,6 +240,7 @@ In this example, the current workflow initiates another workflow called `On mode :::: #### {{< fa circle-stop >}} End + :::: {.flex .flex-wrap .justify-around} @@ -255,7 +264,7 @@ In this example, the workflow is designated to stop after running an additional **Workflow steps used:** - - [{{< fa arrow-pointer >}} User Action v2](#user-action-v2) + - [{{< fa arrow-pointer >}} User Action](#user-action) - [{{< fa cube >}} Model Stage Change](#model-stage-change) - [{{< fa wifi >}} Workflow State Change](#workflow-state-change) - [{{< fa users >}} Approval](#approval) @@ -270,7 +279,7 @@ In this example, the workflow is designated to stop after running an additional **Workflow steps used:** - - [{{< fa arrow-pointer >}} User Action v2](#user-action-v2) + - [{{< fa arrow-pointer >}} User Action](#user-action) - [{{< fa cube >}} Model Stage Change](#model-stage-change) - [{{< fa wifi >}} Workflow State Change](#workflow-state-change) - [{{< fa maximize >}} Condition Branch](#condition-branch) @@ -287,7 +296,7 @@ In this example, the workflow is designated to stop after running an additional **Workflow steps used:** - - [{{< fa arrow-pointer >}} User Action v2](#user-action-v2) + - [{{< fa arrow-pointer >}} User Action](#user-action) - [{{< fa cube >}} Model Stage Change](#model-stage-change) - [{{< fa wifi >}} Workflow State Change](#workflow-state-change) - [{{< fa bullhorn >}} Broadcast](#broadcast) diff --git a/site/notebooks.zip b/site/notebooks.zip index 3889c7a656..f82be4b495 100644 Binary files a/site/notebooks.zip and b/site/notebooks.zip differ diff --git a/site/notebooks/EXECUTED/model_development/1-set_up_validmind.ipynb b/site/notebooks/EXECUTED/model_development/1-set_up_validmind.ipynb index f82f57eaa1..4244924b9a 100644 --- a/site/notebooks/EXECUTED/model_development/1-set_up_validmind.ipynb +++ b/site/notebooks/EXECUTED/model_development/1-set_up_validmind.ipynb @@ -171,7 +171,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/EXECUTED/model_validation/1-set_up_validmind_for_validation.ipynb b/site/notebooks/EXECUTED/model_validation/1-set_up_validmind_for_validation.ipynb index c5dc1fb394..05ad11c2c3 100644 --- a/site/notebooks/EXECUTED/model_validation/1-set_up_validmind_for_validation.ipynb +++ b/site/notebooks/EXECUTED/model_validation/1-set_up_validmind_for_validation.ipynb @@ -261,7 +261,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/how_to/tests/explore_tests/explore_test_suites.ipynb b/site/notebooks/how_to/tests/explore_tests/explore_test_suites.ipynb index 6eb1e5ef79..976eaedefe 100644 --- a/site/notebooks/how_to/tests/explore_tests/explore_test_suites.ipynb +++ b/site/notebooks/how_to/tests/explore_tests/explore_test_suites.ipynb @@ -137,7 +137,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/how_to/tests/explore_tests/explore_tests.ipynb b/site/notebooks/how_to/tests/explore_tests/explore_tests.ipynb index 56c58b62cf..3bfda30329 100644 --- a/site/notebooks/how_to/tests/explore_tests/explore_tests.ipynb +++ b/site/notebooks/how_to/tests/explore_tests/explore_tests.ipynb @@ -107,7 +107,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/how_to/tests/run_tests/1_run_dataset_based_tests.ipynb b/site/notebooks/how_to/tests/run_tests/1_run_dataset_based_tests.ipynb index 9af05b3b3e..c4937af216 100644 --- a/site/notebooks/how_to/tests/run_tests/1_run_dataset_based_tests.ipynb +++ b/site/notebooks/how_to/tests/run_tests/1_run_dataset_based_tests.ipynb @@ -153,7 +153,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/how_to/tests/run_tests/2_run_comparison_tests.ipynb b/site/notebooks/how_to/tests/run_tests/2_run_comparison_tests.ipynb index 1ba4627bc8..ffcd999fc2 100644 --- a/site/notebooks/how_to/tests/run_tests/2_run_comparison_tests.ipynb +++ b/site/notebooks/how_to/tests/run_tests/2_run_comparison_tests.ipynb @@ -1,1094 +1,1095 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "1d29276f", - "metadata": {}, - "source": [ - "# Run comparison tests\n", - "\n", - "Learn how to use the ValidMind Library to run comparison tests that take any datasets or models as inputs. Identify comparison tests to run, initialize ValidMind dataset and model objects in preparation for passing them to tests, and then run tests — generating outputs automatically logged to your model's documentation in the ValidMind Platform.\n", - "\n", - "
We recommend that you first complete our introductory notebook on running tests.\n", - "

\n", - "Run dataset-based tests
" - ] - }, - { - "cell_type": "markdown", - "id": "61065444", - "metadata": {}, - "source": [ - "::: {.content-hidden when-format=\"html\"}\n", - "## Contents \n", - "- [About ValidMind](#toc1__) \n", - " - [Before you begin](#toc1_1__) \n", - " - [New to ValidMind?](#toc1_2__) \n", - " - [Key concepts](#toc1_3__) \n", - "- [Setting up](#toc2__) \n", - " - [Install the ValidMind Library](#toc2_1__) \n", - " - [Initialize the ValidMind Library](#toc2_2__) \n", - " - [Register sample model](#toc2_2_1__) \n", - " - [Apply documentation template](#toc2_2_2__) \n", - " - [Get your code snippet](#toc2_2_3__) \n", - " - [Preview the documentation template](#toc2_3__) \n", - " - [Initialize the Python environment](#toc2_4__) \n", - "- [Explore a ValidMind test](#toc3__) \n", - "- [Working with ValidMind datasets](#toc4__) \n", - " - [Import the sample dataset](#toc4_1__) \n", - " - [Split the dataset](#toc4_2__) \n", - " - [Initialize the ValidMind dataset](#toc4_3__) \n", - "- [Working with ValidMind models](#toc5__) \n", - " - [Train a sample model](#toc5_1__) \n", - " - [Initialize the ValidMind model](#toc5_2__) \n", - " - [Assign predictions](#toc5_3__) \n", - "- [Running ValidMind tests](#toc6__) \n", - " - [Run classifier performance test with one model](#toc6_1__) \n", - " - [Run comparison tests](#toc6_2__) \n", - " - [Run classifier performance test with multiple models](#toc6_2_1__) \n", - " - [Run classifier performance test with multiple parameter values](#toc6_2_2__) \n", - " - [Run comparison test with multiple datasets](#toc6_2_3__) \n", - "- [Work with test results](#toc7__) \n", - "- [Next steps](#toc8__) \n", - " - [Discover more learning resources](#toc8_1__) \n", - "- [Upgrade ValidMind](#toc9__) \n", - "\n", - ":::\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "id": "67a4d9dc", - "metadata": {}, - "source": [ - "\n", - "\n", - "## About ValidMind\n", - "\n", - "ValidMind is a suite of tools for managing model risk, including risk associated with AI and statistical models. \n", - "\n", - "You use the ValidMind Library to automate documentation and validation tests, and then use the ValidMind Platform to collaborate on model documentation. Together, these products simplify model risk management, facilitate compliance with regulations and institutional standards, and enhance collaboration between yourself and model validators." - ] - }, - { - "cell_type": "markdown", - "id": "eeb30df8", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Before you begin\n", - "\n", - "This notebook assumes you have basic familiarity with Python, including an understanding of how functions work. If you are new to Python, you can still run the notebook but we recommend further familiarizing yourself with the language. \n", - "\n", - "If you encounter errors due to missing modules in your Python environment, install the modules with `pip install`, and then re-run the notebook. For more help, refer to [Installing Python Modules](https://docs.python.org/3/installing/index.html)." - ] - }, - { - "cell_type": "markdown", - "id": "293c3f98", - "metadata": {}, - "source": [ - "\n", - "\n", - "### New to ValidMind?\n", - "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", - "\n", - "
For access to all features available in this notebook, you'll need access to a ValidMind account.\n", - "

\n", - "Register with ValidMind
" - ] - }, - { - "cell_type": "markdown", - "id": "4fc836d0", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Key concepts\n", - "\n", - "**Model documentation**: A structured and detailed record pertaining to a model, encompassing key components such as its underlying assumptions, methodologies, data sources, inputs, performance metrics, evaluations, limitations, and intended uses. It serves to ensure transparency, adherence to regulatory requirements, and a clear understanding of potential risks associated with the model’s application.\n", - "\n", - "**Documentation template**: Functions as a test suite and lays out the structure of model documentation, segmented into various sections and sub-sections. Documentation templates define the structure of your model documentation, specifying the tests that should be run, and how the results should be displayed.\n", - "\n", - "**Tests**: A function contained in the ValidMind Library, designed to run a specific quantitative test on the dataset or model. Tests are the building blocks of ValidMind, used to evaluate and document models and datasets, and can be run individually or as part of a suite defined by your model documentation template.\n", - "\n", - "**Metrics**: A subset of tests that do not have thresholds. In the context of this notebook, metrics and tests can be thought of as interchangeable concepts.\n", - "\n", - "**Custom metrics**: Custom metrics are functions that you define to evaluate your model or dataset. These functions can be registered with the ValidMind Library to be used in the ValidMind Platform.\n", - "\n", - "**Inputs**: Objects to be evaluated and documented in the ValidMind Library. They can be any of the following:\n", - "\n", - " - **model**: A single model that has been initialized in ValidMind with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model).\n", - " - **dataset**: Single dataset that has been initialized in ValidMind with [`vm.init_dataset()`](https://docs.validmind.ai/validmind/validmind.html#init_dataset).\n", - " - **models**: A list of ValidMind models - usually this is used when you want to compare multiple models in your custom metric.\n", - " - **datasets**: A list of ValidMind datasets - usually this is used when you want to compare multiple datasets in your custom metric. (Learn more: [Run tests with multiple datasets](https://docs.validmind.ai/notebooks/how_to/tests/run_tests/configure_tests/run_tests_that_require_multiple_datasets.html))\n", - "\n", - "**Parameters**: Additional arguments that can be passed when running a ValidMind test, used to pass additional information to a metric, customize its behavior, or provide additional context.\n", - "\n", - "**Outputs**: Custom metrics can return elements like tables or plots. Tables may be a list of dictionaries (each representing a row) or a pandas DataFrame. Plots may be matplotlib or plotly figures.\n", - "\n", - "**Test suites**: Collections of tests designed to run together to automate and generate model documentation end-to-end for specific use-cases.\n", - "\n", - "Example: the [`classifier_full_suite`](https://docs.validmind.ai/validmind/validmind/test_suites/classifier.html#ClassifierFullSuite) test suite runs tests from the [`tabular_dataset`](https://docs.validmind.ai/validmind/validmind/test_suites/tabular_datasets.html) and [`classifier`](https://docs.validmind.ai/validmind/validmind/test_suites/classifier.html) test suites to fully document the data and model sections for binary classification model use-cases." - ] - }, - { - "cell_type": "markdown", - "id": "8d52b6e0", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Setting up" - ] - }, - { - "cell_type": "markdown", - "id": "e0d2daaf", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Install the ValidMind Library\n", - "\n", - "
Recommended Python versions\n", - "

\n", - "Python 3.8 <= x <= 3.11
\n", - "\n", - "To install the library:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc97888f", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -q validmind" - ] - }, - { - "cell_type": "markdown", - "id": "1ff56571", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Initialize the ValidMind Library" - ] - }, - { - "cell_type": "markdown", - "id": "c4d9f164", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Register sample model\n", - "\n", - "Let's first register a sample model for use with this notebook.\n", - "\n", - "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", - "\n", - "2. In the left sidebar, navigate to **Inventory** and click **+ Register Model**.\n", - "\n", - "3. Enter the model details and click **Next >** to continue to assignment of model stakeholders. ([Need more help?](https://docs.validmind.ai/guide/model-inventory/register-models-in-inventory.html))\n", - "\n", - "4. Select your own name under the **MODEL OWNER** drop-down.\n", - "\n", - "5. Click **Register Model** to add the model to your inventory." - ] - }, - { - "cell_type": "markdown", - "id": "852392e5", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Apply documentation template\n", - "\n", - "Once you've registered your model, let's select a documentation template. A template predefines sections for your model documentation and provides a general outline to follow, making the documentation process much easier.\n", - "\n", - "1. In the left sidebar that appears for your model, click **Documents** and select **Development**.\n", - "\n", - "2. Under **TEMPLATE**, select `Binary classification`.\n", - "\n", - "3. Click **Use Template** to apply the template." - ] - }, - { - "cell_type": "markdown", - "id": "6490e991", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Get your code snippet\n", - "\n", - "Initialize the ValidMind Library with the *code snippet* unique to each model per document, ensuring your test results are uploaded to the correct model and automatically populated in the right document in the ValidMind Platform when you run this notebook.\n", - "\n", - "1. On the left sidebar that appears for your model, select **Getting Started** and select `Development` from the **DOCUMENT** drop-down menu.\n", - "2. Click **Copy snippet to clipboard**.\n", - "3. Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet::" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c51ae01c", - "metadata": {}, - "outputs": [], - "source": [ - "# Load your model identifier credentials from an `.env` file\n", - "\n", - "%load_ext dotenv\n", - "%dotenv .env\n", - "\n", - "# Or replace with your code snippet\n", - "\n", - "import validmind as vm\n", - "\n", - "vm.init(\n", - " # api_host=\"...\",\n", - " # api_key=\"...\",\n", - " # api_secret=\"...\",\n", - " # model=\"...\",\n", - " document=\"documentation\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "99e9d14f", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Preview the documentation template\n", - "\n", - "Let's verify that you have connected the ValidMind Library to the ValidMind Platform and that the appropriate *template* is selected for your model.\n", - "\n", - "You will upload documentation and test results unique to your model based on this template later on. For now, **take a look at the default structure that the template provides with [the `vm.preview_template()` function](https://docs.validmind.ai/validmind/validmind.html#preview_template)** from the ValidMind library and note the empty sections:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd332a9d", - "metadata": {}, - "outputs": [], - "source": [ - "vm.preview_template()" - ] - }, - { - "cell_type": "markdown", - "id": "f805ec38", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Initialize the Python environment\n", - "\n", - "Next, let's import the necessary libraries and set up your Python environment for data analysis:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e2127cd", - "metadata": {}, - "outputs": [], - "source": [ - "import xgboost as xgb\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "id": "1783e13c", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Explore a ValidMind test\n", - "\n", - "Before we run a test, use [the `vm.tests.list_tests()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) to return information on out-of-the-box tests available in the ValidMind Library.\n", - "\n", - "Let's assume you want to evaluate *classifier performance* for a model. Classifier performance measures how well a classification model correctly predicts outcomes, using metrics like [precision, recall, and F1 score](https://en.wikipedia.org/wiki/Precision_and_recall).\n", - "\n", - "We'll pass in a `filter` to the `list_tests` function to find the test ID for classifier performance:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6a6f715", - "metadata": {}, - "outputs": [], - "source": [ - "vm.tests.list_tests(filter=\"ClassifierPerformance\")" - ] - }, - { - "cell_type": "markdown", - "id": "96a56e4b", - "metadata": {}, - "source": [ - "We've identified from the output that the test ID for the classifier performance test is `validmind.model_validation.ClassifierPerformance`.\n", - "\n", - "Use this ID combined with [the `describe_test()` function](https://docs.validmind.ai/validmind/validmind/tests.html#describe_test) to retrieve more information about the test, including its **Required Inputs**:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8a46c7d", - "metadata": {}, - "outputs": [], - "source": [ - "test_id = \"validmind.model_validation.sklearn.ClassifierPerformance\"\n", - "vm.tests.describe_test(test_id)" - ] - }, - { - "cell_type": "markdown", - "id": "97053f50", - "metadata": {}, - "source": [ - "Since this test requires a dataset and a model, you can expect it to throw an error when we run it without passing in either as input:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f853c272", - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " vm.tests.run_test(test_id)\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "id": "1a3115ed", - "metadata": {}, - "source": [ - "
Learn more about the individual tests available in the ValidMind Library\n", - "

\n", - "Check out our Explore tests notebook for more code examples and usage of key functions.
" - ] - }, - { - "cell_type": "markdown", - "id": "89da851b", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Working with ValidMind datasets" - ] - }, - { - "cell_type": "markdown", - "id": "50bfdb1b", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Import the sample dataset\n", - "\n", - "Since we need a dataset to run tests, let's import the public [Bank Customer Churn Prediction](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction) dataset from Kaggle so that we have something to work with.\n", - "\n", - "In our below example, note that:\n", - "\n", - "- The target column, `Exited` has a value of `1` when a customer has churned and `0` otherwise.\n", - "- The ValidMind Library provides a wrapper to automatically load the dataset as a [Pandas DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) object. A Pandas Dataframe is a two-dimensional tabular data structure that makes use of rows and columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ef2dfbb", - "metadata": {}, - "outputs": [], - "source": [ - "# Import the sample dataset from the library\n", - "\n", - "from validmind.datasets.classification import customer_churn\n", - "\n", - "print(\n", - " f\"Loaded demo dataset with: \\n\\n\\t• Target column: '{customer_churn.target_column}' \\n\\t• Class labels: {customer_churn.class_labels}\"\n", - ")\n", - "\n", - "raw_df = customer_churn.load_data()\n", - "raw_df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "a5a8212f", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Split the dataset\n", - "\n", - "Let's first split our dataset to help assess how well the model generalizes to unseen data.\n", - "\n", - "Use [`preprocess()`](https://docs.validmind.ai/validmind/validmind/datasets/classification/customer_churn.html#preprocess) to split our dataset into three subsets:\n", - "\n", - "1. **train_df** — Used to train the model.\n", - "2. **validation_df** — Used to evaluate the model's performance during training.\n", - "3. **test_df** — Used later on to asses the model's performance on new, unseen data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88c87d4a", - "metadata": {}, - "outputs": [], - "source": [ - "train_df, validation_df, test_df = customer_churn.preprocess(raw_df)" - ] - }, - { - "cell_type": "markdown", - "id": "2ae225d7", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Initialize the ValidMind dataset\n", - "\n", - "The next step is to connect your data with a ValidMind `Dataset` object. **This step is always necessary every time you want to connect a dataset to documentation and produce test results through ValidMind,** but you only need to do it once per dataset.\n", - "\n", - "ValidMind dataset objects provide a wrapper to any type of dataset (NumPy, Pandas, Polars, etc.) so that tests can run transparently regardless of the underlying library.\n", - "\n", - "Initialize a ValidMind dataset object using the [`init_dataset` function](https://docs.validmind.ai/validmind/validmind.html#init_dataset) from the ValidMind (`vm`) module. For this example, we'll pass in the following arguments:\n", - "\n", - "- **`dataset`** — The raw dataset that you want to provide as input to tests.\n", - "- **`input_id`** — A unique identifier that allows tracking what inputs are used when running each individual test.\n", - "- **`target_column`** — A required argument if tests require access to true values. This is the name of the target column in the dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf0ec747", - "metadata": {}, - "outputs": [], - "source": [ - "vm_train_ds = vm.init_dataset(\n", - " dataset=train_df,\n", - " input_id=\"train_dataset\",\n", - " target_column=customer_churn.target_column,\n", - ")\n", - "\n", - "vm_test_ds = vm.init_dataset(\n", - " dataset=test_df,\n", - " input_id=\"test_dataset\",\n", - " target_column=customer_churn.target_column,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6d26f65b", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Working with ValidMind models" - ] - }, - { - "cell_type": "markdown", - "id": "6d1677f6", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Train a sample model\n", - "\n", - "To train the model, we need to provide it with:\n", - "\n", - "1. **Inputs** — Features such as customer age, usage, etc.\n", - "2. **Outputs (Expected answers/labels)** — in our case, we would like to know whether the customer churned or not.\n", - "\n", - "Here, we'll use `x_train` and `x_val` to hold the input data (features), and `y_train` and `y_val` to hold the answers (the target we want to predict):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "39e8c7ea", - "metadata": {}, - "outputs": [], - "source": [ - "x_train = train_df.drop(customer_churn.target_column, axis=1)\n", - "y_train = train_df[customer_churn.target_column]\n", - "x_val = validation_df.drop(customer_churn.target_column, axis=1)\n", - "y_val = validation_df[customer_churn.target_column]" - ] - }, - { - "cell_type": "markdown", - "id": "4ac628eb", - "metadata": {}, - "source": [ - "Next, let's create an *XGBoost classifier model* that will automatically stop training if it doesn't improve after 10 tries. XGBoost is a gradient-boosted tree ensemble that builds trees sequentially, with each tree correcting the errors of the previous ones — typically known for strong predictive performance and built-in regularization to reduce overfitting.\n", - "\n", - "Setting an explicit threshold avoids wasting time and helps prevent further overfitting by stopping training when further improvement isn't happening. We'll also set three evaluation metrics to get a more complete picture of model performance:\n", - "\n", - "1. **error** — Measures how often the model makes incorrect predictions.\n", - "2. **logloss** — Indicates how confident the predictions are.\n", - "3. **auc** — Evaluates how well the model distinguishes between churn and not churn." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "255e3583", - "metadata": {}, - "outputs": [], - "source": [ - "model = xgb.XGBClassifier(early_stopping_rounds=10)\n", - "model.set_params(\n", - " eval_metric=[\"error\", \"logloss\", \"auc\"],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "f6430312", - "metadata": {}, - "source": [ - "Finally, our actual training step — where the model learns patterns from the data, so it can make predictions later:\n", - "\n", - "- The model is trained on `x_train` and `y_train`, and evaluates its performance using `x_val` and `y_val` to check if it’s learning well.\n", - "- To turn off printed output while training, we'll set `verbose` to `False`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3aa3657", - "metadata": {}, - "outputs": [], - "source": [ - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " eval_set=[(x_val, y_val)],\n", - " verbose=False,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "c303a046", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Initialize the ValidMind model\n", - "\n", - "You'll also need to initialize a ValidMind model object (`vm_model`) that can be passed to other functions for analysis and tests on the data for our model.\n", - "\n", - "You simply initialize this model object with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4b2be11f", - "metadata": {}, - "outputs": [], - "source": [ - "vm_model_xgb = vm.init_model(\n", - " model,\n", - " input_id=\"xgboost\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "2fa83857", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Assign predictions\n", - "\n", - "Once the model has been registered, you can assign model predictions to the training and testing datasets.\n", - "\n", - "- The [`assign_predictions()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#assign_predictions) from the `Dataset` object can link existing predictions to any number of models.\n", - "- This method links the model's class prediction values and probabilities to our `vm_train_ds` and `vm_test_ds` datasets.\n", - "\n", - "If no prediction values are passed, the method will compute predictions automatically:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "229185fd", - "metadata": {}, - "outputs": [], - "source": [ - "vm_train_ds.assign_predictions(model=vm_model_xgb)\n", - "vm_test_ds.assign_predictions(model=vm_model_xgb)" - ] - }, - { - "cell_type": "markdown", - "id": "d0b3312e", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Running ValidMind tests\n", - "\n", - "Now that we know how to initialize ValidMind `dataset` and `model` objects, we're ready to run some tests!\n", - "\n", - "You run individual tests by calling [the `run_test` function](https://docs.validmind.ai/validmind/validmind/tests.html#run_test) provided by the `validmind.tests` module. For the examples below, we'll pass in the following arguments:\n", - "\n", - "- **`test_id`** — The ID of the test to run, as seen in the `ID` column when you run `list_tests`.\n", - "- **`inputs`** — A dictionary of test inputs, such as `dataset`, `model`, `datasets`, or `models`. These are ValidMind objects initialized with [`vm.init_dataset()`](https://docs.validmind.ai/validmind/validmind.html#init_dataset) or [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model)." - ] - }, - { - "cell_type": "markdown", - "id": "96c89f32", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Run classifier performance test with one model\n", - "\n", - "Run `validmind.data_validation.ClassifierPerformance` test with the testing dataset (`vm_test_ds`) and model (`vm_model_xgb`) as inputs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "85189af9", - "metadata": {}, - "outputs": [], - "source": [ - "result = vm.tests.run_test(\n", - " \"validmind.model_validation.sklearn.ClassifierPerformance\",\n", - " inputs={\n", - " \"dataset\": vm_test_ds,\n", - " \"model\": vm_model_xgb,\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "676dff89", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Run comparison tests\n", - "\n", - "To evaluate which models might be a better fit for a use case based on their performance on selected criteria, we can run the same test with multiple models. We'll train three additional models and run the classifier performance test with for all four models using a single `run_test()` call.\n", - "\n", - "
ValidMind helps streamline your documentation and testing.\n", - "

\n", - "You could call run_test() multiple times passing in different inputs, but you can also pass an input_grid object — a dictionary of test input keys and values that allow you to run a single test for a combination of models and datasets.\n", - "

\n", - "With input_grid, run comparison tests for multiple datasets, or even multiple datasets and models simultaneously — input_grid can be used with run_test() for all possible combinations of inputs, generating a cohesive and comprehensive single output.\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "3d9912dc", - "metadata": {}, - "source": [ - "*Random forest classifier* models use an ensemble method that builds multiple decision trees and averages their predictions. Random forest is robust to overfitting and handles non-linear relations well, but is typically less interpretable than simpler models:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1976b7e8", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.ensemble import RandomForestClassifier\n", - "\n", - "# Train the random forest classifer model\n", - "model_rf = RandomForestClassifier()\n", - "model_rf.fit(x_train, y_train)\n", - "\n", - "# Initialize the ValidMind model object for the random forest classifer model\n", - "vm_model_rf = vm.init_model(\n", - " model_rf,\n", - " input_id=\"random_forest\",\n", - ")\n", - "\n", - "# Assign predictions to the test dataset for the random forest classifer model\n", - "vm_test_ds.assign_predictions(model=vm_model_rf)" - ] - }, - { - "cell_type": "markdown", - "id": "a259927c", - "metadata": {}, - "source": [ - "*Logistic regression* models are linear models that estimate class probabilities via a logistic (sigmoid) function. Logistic regression is highly interpretable with fast training, establishing a strong baseline — however, they struggle when relationships are non-linear as real-world relationships often are:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "90bbf148", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "# Scaling features ensures the lbfgs solver converges reliably\n", - "model_lr = Pipeline([\n", - " (\"scaler\", StandardScaler()),\n", - " (\"lr\", LogisticRegression()),\n", - "])\n", - "model_lr.fit(x_train, y_train)\n", - "\n", - "# Initialize the ValidMind model object for the logistic regression model\n", - "vm_model_lr = vm.init_model(\n", - " model_lr,\n", - " input_id=\"logistic_regression\",\n", - ")\n", - "\n", - "# Assign predictions to the test dataset for the logistic regression model\n", - "vm_test_ds.assign_predictions(model=vm_model_lr)" - ] - }, - { - "cell_type": "markdown", - "id": "9a666b41", - "metadata": {}, - "source": [ - "*Decision tree classifier* models are a single tree with data split on feature thresholds. Useful as an explanability benchmark, decision trees are easy to visualize and interpret — but are prone to overfitting without pruning or ensemble techniques:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bfa1e17d", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.tree import DecisionTreeClassifier\n", - "\n", - "# Train the decision tree classifer model\n", - "model_dt = DecisionTreeClassifier()\n", - "model_dt.fit(x_train, y_train)\n", - "\n", - "# Initialize the ValidMind model object for the decision tree classifier model\n", - "vm_model_dt = vm.init_model(\n", - " model_dt,\n", - " input_id=\"decision_tree\",\n", - ")\n", - "\n", - "# Assign predictions to the test dataset for the decision tree classifiermodel\n", - "vm_test_ds.assign_predictions(model=vm_model_dt)" - ] - }, - { - "cell_type": "markdown", - "id": "2c8f3268", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Run classifier performance test with multiple models\n", - "\n", - "Now, we'll use the `input_grid` to run the [`ClassifierPerformance` test](https://docs.validmind.ai/tests/model_validation/sklearn/ClassifierPerformance.html) on all four models using the testing dataset (`vm_test_ds`).\n", - "\n", - "When running individual tests, you can use a custom `result_id` to tag the individual result with a unique identifier by appending this `result_id` to the `test_id` with a `:` separator. We'll append an identifier to signify that this test was run on `all_models` to differentiate this test run from other runs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2e48ce1e", - "metadata": {}, - "outputs": [], - "source": [ - "perf_comparison_result = vm.tests.run_test(\n", - " \"validmind.model_validation.sklearn.ClassifierPerformance:all_models\",\n", - " input_grid={\n", - " \"dataset\": [vm_test_ds],\n", - " \"model\": [vm_model_xgb, vm_model_rf, vm_model_lr, vm_model_dt],\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "81cbf144", - "metadata": {}, - "source": [ - "Our output indicates that the XGBoost and random forest classification models provide the strongest overall classification performance, so we'll continue our testing with those two models as input only." - ] - }, - { - "cell_type": "markdown", - "id": "3d3fb6ec", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Run classifier performance test with multiple parameter values\n", - "\n", - "Next, let's run the classifier performance test with the `param_grid` object, which runs the same test multiple times with different parameter values. We'll append an identifier to signify that this test was run with our `parameter_grid` configuration:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0ad94c9", - "metadata": {}, - "outputs": [], - "source": [ - "parameter_comparison_result = vm.tests.run_test(\n", - " \"validmind.model_validation.sklearn.ClassifierPerformance:parameter_grid\",\n", - " input_grid={\n", - " \"dataset\": [vm_test_ds],\n", - " \"model\": [vm_model_xgb,vm_model_rf]\n", - " },\n", - " param_grid={\n", - " \"average\": [\"macro\", \"micro\"]\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "508c7546", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Run comparison test with multiple datasets\n", - "\n", - "Let's also run the [ROCCurve test](https://docs.validmind.ai/tests/model_validation/sklearn/ROCCurve.html) using `input_grid` to iterate through multiple datasets, which plots the ROC curves for the training (`vm_train_ds`) and test (`vm_test_ds`) datasets side by side — a common scenario when you want to compare the performance of a model on the training and test datasets and visually assess how much performance is lost in the test dataset.\n", - "\n", - "We'll also need to assign predictions to the training dataset for the random forest classifier model, since we didn't do that in our earlier setup:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "96c3b426", - "metadata": {}, - "outputs": [], - "source": [ - "vm_train_ds.assign_predictions(model=vm_model_rf)" - ] - }, - { - "cell_type": "markdown", - "id": "2be82bae", - "metadata": {}, - "source": [ - "We'll append an identifier to signify that this test was run with our `train_vs_test` dataset comparison configuration:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4056aa1e", - "metadata": {}, - "outputs": [], - "source": [ - "roc_curve_result = vm.tests.run_test(\n", - " \"validmind.model_validation.sklearn.ROCCurve:train_vs_test\",\n", - " input_grid={\n", - " \"dataset\": [vm_train_ds, vm_test_ds],\n", - " \"model\": [vm_model_xgb,vm_model_rf],\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "a05570d5", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Work with test results\n", - "\n", - "Every test result returned by the `run_test()` function has a [`.log()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#TestResult.log) that can be used to send the test results to the ValidMind Platform. When logging individual test results to the platform, you'll need to manually add those results to the desired section of the model documentation.\n", - "\n", - "You can do this through the ValidMind Platform interface after logging your test results ([Learn more ...](https://docs.validmind.ai/developer/model-documentation/work-with-test-results.html)), or directly via the ValidMind Library when calling `.log()` by providing an optional `section_id`. The `section_id` should be a string that matches the title of a section in the documentation template in `snake_case`.\n", - "\n", - "Let's log the results of the classifier performance test (`perf_comparison_result`) and the ROCCurve (`roc_curve_result`) test in the `model_evaluation` section of the documentation — present in the template we previewed in the beginning of this notebook:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e119bf1e", - "metadata": {}, - "outputs": [], - "source": [ - "perf_comparison_result.log(section_id=\"model_evaluation\")\n", - "roc_curve_result.log(section_id=\"model_evaluation\")" - ] - }, - { - "cell_type": "markdown", - "id": "ab5205ee", - "metadata": {}, - "source": [ - "Finally, let's head to the model we connected to at the beginning of this notebook and view our inserted test results in the updated documentation ([Need more help?](https://docs.validmind.ai/guide/model-documentation/working-with-model-documentation.html)):\n", - "\n", - "1. From the **Inventory** in the ValidMind Platform, go to the model you connected to earlier.\n", - "\n", - "2. In the left sidebar that appears for your model, click **Development** under Documents.\n", - "\n", - "3. Expand the **3.2. Model Evaluation** section.\n", - "\n", - "4. Confirm that `perf_comparison_result` and `roc_curve_result` display in this section as expected." - ] - }, - { - "cell_type": "markdown", - "id": "eb196aac", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Next steps\n", - "\n", - "Now that you know how to run comparison tests with the ValidMind Library, you’re ready to take the next step. Extend the functionality of `run_test()` with your own custom test functions that can be incorporated into documentation templates just like any default out-of-the-box ValidMind test.\n", - "\n", - "
Learn how to implement custom tests with the ValidMind Library.\n", - "

\n", - "Check out our Implement comparison tests notebook for code examples and usage of key functions.
" - ] - }, - { - "cell_type": "markdown", - "id": "083c1d8d", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Discover more learning resources\n", - "\n", - "We offer many interactive notebooks to help you automate testing, documenting, validating, and more:\n", - "\n", - "- [Run tests & test suites](https://docs.validmind.ai/developer/how-to/testing-overview.html)\n", - "- [Use ValidMind Library features](https://docs.validmind.ai/developer/how-to/feature-overview.html)\n", - "- [Code samples by use case](https://docs.validmind.ai/guide/samples-jupyter-notebooks.html)\n", - "\n", - "Or, visit our [documentation](https://docs.validmind.ai/) to learn more about ValidMind." - ] - }, - { - "cell_type": "markdown", - "id": "efba0f57", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Upgrade ValidMind\n", - "\n", - "
After installing ValidMind, you’ll want to periodically make sure you are on the latest version to access any new features and other enhancements.
\n", - "\n", - "Retrieve the information for the currently installed version of ValidMind:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d35972c", - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%pip show validmind" - ] - }, - { - "cell_type": "markdown", - "id": "abcd07ef", - "metadata": {}, - "source": [ - "If the version returned is lower than the version indicated in our [production open-source code](https://github.com/validmind/validmind-library/blob/prod/validmind/__version__.py), restart your notebook and run:\n", - "\n", - "```bash\n", - "%pip install --upgrade validmind\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "5fe70b90", - "metadata": {}, - "source": [ - "You may need to restart your kernel after running the upgrade package for changes to be applied." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "***\n", - "\n", - "Copyright © 2023-2026 ValidMind Inc. All rights reserved.
\n", - "Refer to [LICENSE](https://github.com/validmind/validmind-library/blob/main/LICENSE) for details.
\n", - "SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10" + "cells": [ + { + "cell_type": "markdown", + "id": "1d29276f", + "metadata": {}, + "source": [ + "# Run comparison tests\n", + "\n", + "Learn how to use the ValidMind Library to run comparison tests that take any datasets or models as inputs. Identify comparison tests to run, initialize ValidMind dataset and model objects in preparation for passing them to tests, and then run tests — generating outputs automatically logged to your model's documentation in the ValidMind Platform.\n", + "\n", + "
We recommend that you first complete our introductory notebook on running tests.\n", + "

\n", + "Run dataset-based tests
" + ] + }, + { + "cell_type": "markdown", + "id": "61065444", + "metadata": {}, + "source": [ + "::: {.content-hidden when-format=\"html\"}\n", + "## Contents \n", + "- [About ValidMind](#toc1__) \n", + " - [Before you begin](#toc1_1__) \n", + " - [New to ValidMind?](#toc1_2__) \n", + " - [Key concepts](#toc1_3__) \n", + "- [Setting up](#toc2__) \n", + " - [Install the ValidMind Library](#toc2_1__) \n", + " - [Initialize the ValidMind Library](#toc2_2__) \n", + " - [Register sample model](#toc2_2_1__) \n", + " - [Apply documentation template](#toc2_2_2__) \n", + " - [Get your code snippet](#toc2_2_3__) \n", + " - [Preview the documentation template](#toc2_3__) \n", + " - [Initialize the Python environment](#toc2_4__) \n", + "- [Explore a ValidMind test](#toc3__) \n", + "- [Working with ValidMind datasets](#toc4__) \n", + " - [Import the sample dataset](#toc4_1__) \n", + " - [Split the dataset](#toc4_2__) \n", + " - [Initialize the ValidMind dataset](#toc4_3__) \n", + "- [Working with ValidMind models](#toc5__) \n", + " - [Train a sample model](#toc5_1__) \n", + " - [Initialize the ValidMind model](#toc5_2__) \n", + " - [Assign predictions](#toc5_3__) \n", + "- [Running ValidMind tests](#toc6__) \n", + " - [Run classifier performance test with one model](#toc6_1__) \n", + " - [Run comparison tests](#toc6_2__) \n", + " - [Run classifier performance test with multiple models](#toc6_2_1__) \n", + " - [Run classifier performance test with multiple parameter values](#toc6_2_2__) \n", + " - [Run comparison test with multiple datasets](#toc6_2_3__) \n", + "- [Work with test results](#toc7__) \n", + "- [Next steps](#toc8__) \n", + " - [Discover more learning resources](#toc8_1__) \n", + "- [Upgrade ValidMind](#toc9__) \n", + "\n", + ":::\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "67a4d9dc", + "metadata": {}, + "source": [ + "\n", + "\n", + "## About ValidMind\n", + "\n", + "ValidMind is a suite of tools for managing model risk, including risk associated with AI and statistical models. \n", + "\n", + "You use the ValidMind Library to automate documentation and validation tests, and then use the ValidMind Platform to collaborate on model documentation. Together, these products simplify model risk management, facilitate compliance with regulations and institutional standards, and enhance collaboration between yourself and model validators." + ] + }, + { + "cell_type": "markdown", + "id": "eeb30df8", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Before you begin\n", + "\n", + "This notebook assumes you have basic familiarity with Python, including an understanding of how functions work. If you are new to Python, you can still run the notebook but we recommend further familiarizing yourself with the language. \n", + "\n", + "If you encounter errors due to missing modules in your Python environment, install the modules with `pip install`, and then re-run the notebook. For more help, refer to [Installing Python Modules](https://docs.python.org/3/installing/index.html)." + ] + }, + { + "cell_type": "markdown", + "id": "293c3f98", + "metadata": {}, + "source": [ + "\n", + "\n", + "### New to ValidMind?\n", + "\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", + "\n", + "
For access to all features available in this notebook, you'll need access to a ValidMind account.\n", + "

\n", + "Register with ValidMind
" + ] + }, + { + "cell_type": "markdown", + "id": "4fc836d0", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Key concepts\n", + "\n", + "**Model documentation**: A structured and detailed record pertaining to a model, encompassing key components such as its underlying assumptions, methodologies, data sources, inputs, performance metrics, evaluations, limitations, and intended uses. It serves to ensure transparency, adherence to regulatory requirements, and a clear understanding of potential risks associated with the model’s application.\n", + "\n", + "**Documentation template**: Functions as a test suite and lays out the structure of model documentation, segmented into various sections and sub-sections. Documentation templates define the structure of your model documentation, specifying the tests that should be run, and how the results should be displayed.\n", + "\n", + "**Tests**: A function contained in the ValidMind Library, designed to run a specific quantitative test on the dataset or model. Tests are the building blocks of ValidMind, used to evaluate and document models and datasets, and can be run individually or as part of a suite defined by your model documentation template.\n", + "\n", + "**Metrics**: A subset of tests that do not have thresholds. In the context of this notebook, metrics and tests can be thought of as interchangeable concepts.\n", + "\n", + "**Custom metrics**: Custom metrics are functions that you define to evaluate your model or dataset. These functions can be registered with the ValidMind Library to be used in the ValidMind Platform.\n", + "\n", + "**Inputs**: Objects to be evaluated and documented in the ValidMind Library. They can be any of the following:\n", + "\n", + " - **model**: A single model that has been initialized in ValidMind with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model).\n", + " - **dataset**: Single dataset that has been initialized in ValidMind with [`vm.init_dataset()`](https://docs.validmind.ai/validmind/validmind.html#init_dataset).\n", + " - **models**: A list of ValidMind models - usually this is used when you want to compare multiple models in your custom metric.\n", + " - **datasets**: A list of ValidMind datasets - usually this is used when you want to compare multiple datasets in your custom metric. (Learn more: [Run tests with multiple datasets](https://docs.validmind.ai/notebooks/how_to/tests/run_tests/configure_tests/run_tests_that_require_multiple_datasets.html))\n", + "\n", + "**Parameters**: Additional arguments that can be passed when running a ValidMind test, used to pass additional information to a metric, customize its behavior, or provide additional context.\n", + "\n", + "**Outputs**: Custom metrics can return elements like tables or plots. Tables may be a list of dictionaries (each representing a row) or a pandas DataFrame. Plots may be matplotlib or plotly figures.\n", + "\n", + "**Test suites**: Collections of tests designed to run together to automate and generate model documentation end-to-end for specific use-cases.\n", + "\n", + "Example: the [`classifier_full_suite`](https://docs.validmind.ai/validmind/validmind/test_suites/classifier.html#ClassifierFullSuite) test suite runs tests from the [`tabular_dataset`](https://docs.validmind.ai/validmind/validmind/test_suites/tabular_datasets.html) and [`classifier`](https://docs.validmind.ai/validmind/validmind/test_suites/classifier.html) test suites to fully document the data and model sections for binary classification model use-cases." + ] + }, + { + "cell_type": "markdown", + "id": "8d52b6e0", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Setting up" + ] + }, + { + "cell_type": "markdown", + "id": "e0d2daaf", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Install the ValidMind Library\n", + "\n", + "
Recommended Python versions\n", + "

\n", + "Python 3.8 <= x <= 3.14
\n", + "\n", + "To install the library:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc97888f", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -q validmind" + ] + }, + { + "cell_type": "markdown", + "id": "1ff56571", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind Library" + ] + }, + { + "cell_type": "markdown", + "id": "c4d9f164", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Register sample model\n", + "\n", + "Let's first register a sample model for use with this notebook.\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and click **+ Register Model**.\n", + "\n", + "3. Enter the model details and click **Next >** to continue to assignment of model stakeholders. ([Need more help?](https://docs.validmind.ai/guide/model-inventory/register-models-in-inventory.html))\n", + "\n", + "4. Select your own name under the **MODEL OWNER** drop-down.\n", + "\n", + "5. Click **Register Model** to add the model to your inventory." + ] + }, + { + "cell_type": "markdown", + "id": "852392e5", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Apply documentation template\n", + "\n", + "Once you've registered your model, let's select a documentation template. A template predefines sections for your model documentation and provides a general outline to follow, making the documentation process much easier.\n", + "\n", + "1. In the left sidebar that appears for your model, click **Documents** and select **Development**.\n", + "\n", + "2. Under **TEMPLATE**, select `Binary classification`.\n", + "\n", + "3. Click **Use Template** to apply the template." + ] + }, + { + "cell_type": "markdown", + "id": "6490e991", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Get your code snippet\n", + "\n", + "Initialize the ValidMind Library with the *code snippet* unique to each model per document, ensuring your test results are uploaded to the correct model and automatically populated in the right document in the ValidMind Platform when you run this notebook.\n", + "\n", + "1. On the left sidebar that appears for your model, select **Getting Started** and select `Development` from the **DOCUMENT** drop-down menu.\n", + "2. Click **Copy snippet to clipboard**.\n", + "3. Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c51ae01c", + "metadata": {}, + "outputs": [], + "source": [ + "# Load your model identifier credentials from an `.env` file\n", + "\n", + "%load_ext dotenv\n", + "%dotenv .env\n", + "\n", + "# Or replace with your code snippet\n", + "\n", + "import validmind as vm\n", + "\n", + "vm.init(\n", + " # api_host=\"...\",\n", + " # api_key=\"...\",\n", + " # api_secret=\"...\",\n", + " # model=\"...\",\n", + " document=\"documentation\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "99e9d14f", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Preview the documentation template\n", + "\n", + "Let's verify that you have connected the ValidMind Library to the ValidMind Platform and that the appropriate *template* is selected for your model.\n", + "\n", + "You will upload documentation and test results unique to your model based on this template later on. For now, **take a look at the default structure that the template provides with [the `vm.preview_template()` function](https://docs.validmind.ai/validmind/validmind.html#preview_template)** from the ValidMind library and note the empty sections:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd332a9d", + "metadata": {}, + "outputs": [], + "source": [ + "vm.preview_template()" + ] + }, + { + "cell_type": "markdown", + "id": "f805ec38", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the Python environment\n", + "\n", + "Next, let's import the necessary libraries and set up your Python environment for data analysis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e2127cd", + "metadata": {}, + "outputs": [], + "source": [ + "import xgboost as xgb\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "1783e13c", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Explore a ValidMind test\n", + "\n", + "Before we run a test, use [the `vm.tests.list_tests()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) to return information on out-of-the-box tests available in the ValidMind Library.\n", + "\n", + "Let's assume you want to evaluate *classifier performance* for a model. Classifier performance measures how well a classification model correctly predicts outcomes, using metrics like [precision, recall, and F1 score](https://en.wikipedia.org/wiki/Precision_and_recall).\n", + "\n", + "We'll pass in a `filter` to the `list_tests` function to find the test ID for classifier performance:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6a6f715", + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.list_tests(filter=\"ClassifierPerformance\")" + ] + }, + { + "cell_type": "markdown", + "id": "96a56e4b", + "metadata": {}, + "source": [ + "We've identified from the output that the test ID for the classifier performance test is `validmind.model_validation.ClassifierPerformance`.\n", + "\n", + "Use this ID combined with [the `describe_test()` function](https://docs.validmind.ai/validmind/validmind/tests.html#describe_test) to retrieve more information about the test, including its **Required Inputs**:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8a46c7d", + "metadata": {}, + "outputs": [], + "source": [ + "test_id = \"validmind.model_validation.sklearn.ClassifierPerformance\"\n", + "vm.tests.describe_test(test_id)" + ] + }, + { + "cell_type": "markdown", + "id": "97053f50", + "metadata": {}, + "source": [ + "Since this test requires a dataset and a model, you can expect it to throw an error when we run it without passing in either as input:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f853c272", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " vm.tests.run_test(test_id)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "1a3115ed", + "metadata": {}, + "source": [ + "
Learn more about the individual tests available in the ValidMind Library\n", + "

\n", + "Check out our Explore tests notebook for more code examples and usage of key functions.
" + ] + }, + { + "cell_type": "markdown", + "id": "89da851b", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Working with ValidMind datasets" + ] + }, + { + "cell_type": "markdown", + "id": "50bfdb1b", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Import the sample dataset\n", + "\n", + "Since we need a dataset to run tests, let's import the public [Bank Customer Churn Prediction](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction) dataset from Kaggle so that we have something to work with.\n", + "\n", + "In our below example, note that:\n", + "\n", + "- The target column, `Exited` has a value of `1` when a customer has churned and `0` otherwise.\n", + "- The ValidMind Library provides a wrapper to automatically load the dataset as a [Pandas DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) object. A Pandas Dataframe is a two-dimensional tabular data structure that makes use of rows and columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ef2dfbb", + "metadata": {}, + "outputs": [], + "source": [ + "# Import the sample dataset from the library\n", + "\n", + "from validmind.datasets.classification import customer_churn\n", + "\n", + "print(\n", + " f\"Loaded demo dataset with: \\n\\n\\t• Target column: '{customer_churn.target_column}' \\n\\t• Class labels: {customer_churn.class_labels}\"\n", + ")\n", + "\n", + "raw_df = customer_churn.load_data()\n", + "raw_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "a5a8212f", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Split the dataset\n", + "\n", + "Let's first split our dataset to help assess how well the model generalizes to unseen data.\n", + "\n", + "Use [`preprocess()`](https://docs.validmind.ai/validmind/validmind/datasets/classification/customer_churn.html#preprocess) to split our dataset into three subsets:\n", + "\n", + "1. **train_df** — Used to train the model.\n", + "2. **validation_df** — Used to evaluate the model's performance during training.\n", + "3. **test_df** — Used later on to asses the model's performance on new, unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88c87d4a", + "metadata": {}, + "outputs": [], + "source": [ + "train_df, validation_df, test_df = customer_churn.preprocess(raw_df)" + ] + }, + { + "cell_type": "markdown", + "id": "2ae225d7", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind dataset\n", + "\n", + "The next step is to connect your data with a ValidMind `Dataset` object. **This step is always necessary every time you want to connect a dataset to documentation and produce test results through ValidMind,** but you only need to do it once per dataset.\n", + "\n", + "ValidMind dataset objects provide a wrapper to any type of dataset (NumPy, Pandas, Polars, etc.) so that tests can run transparently regardless of the underlying library.\n", + "\n", + "Initialize a ValidMind dataset object using the [`init_dataset` function](https://docs.validmind.ai/validmind/validmind.html#init_dataset) from the ValidMind (`vm`) module. For this example, we'll pass in the following arguments:\n", + "\n", + "- **`dataset`** — The raw dataset that you want to provide as input to tests.\n", + "- **`input_id`** — A unique identifier that allows tracking what inputs are used when running each individual test.\n", + "- **`target_column`** — A required argument if tests require access to true values. This is the name of the target column in the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf0ec747", + "metadata": {}, + "outputs": [], + "source": [ + "vm_train_ds = vm.init_dataset(\n", + " dataset=train_df,\n", + " input_id=\"train_dataset\",\n", + " target_column=customer_churn.target_column,\n", + ")\n", + "\n", + "vm_test_ds = vm.init_dataset(\n", + " dataset=test_df,\n", + " input_id=\"test_dataset\",\n", + " target_column=customer_churn.target_column,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6d26f65b", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Working with ValidMind models" + ] + }, + { + "cell_type": "markdown", + "id": "6d1677f6", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Train a sample model\n", + "\n", + "To train the model, we need to provide it with:\n", + "\n", + "1. **Inputs** — Features such as customer age, usage, etc.\n", + "2. **Outputs (Expected answers/labels)** — in our case, we would like to know whether the customer churned or not.\n", + "\n", + "Here, we'll use `x_train` and `x_val` to hold the input data (features), and `y_train` and `y_val` to hold the answers (the target we want to predict):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39e8c7ea", + "metadata": {}, + "outputs": [], + "source": [ + "x_train = train_df.drop(customer_churn.target_column, axis=1)\n", + "y_train = train_df[customer_churn.target_column]\n", + "x_val = validation_df.drop(customer_churn.target_column, axis=1)\n", + "y_val = validation_df[customer_churn.target_column]" + ] + }, + { + "cell_type": "markdown", + "id": "4ac628eb", + "metadata": {}, + "source": [ + "Next, let's create an *XGBoost classifier model* that will automatically stop training if it doesn't improve after 10 tries. XGBoost is a gradient-boosted tree ensemble that builds trees sequentially, with each tree correcting the errors of the previous ones — typically known for strong predictive performance and built-in regularization to reduce overfitting.\n", + "\n", + "Setting an explicit threshold avoids wasting time and helps prevent further overfitting by stopping training when further improvement isn't happening. We'll also set three evaluation metrics to get a more complete picture of model performance:\n", + "\n", + "1. **error** — Measures how often the model makes incorrect predictions.\n", + "2. **logloss** — Indicates how confident the predictions are.\n", + "3. **auc** — Evaluates how well the model distinguishes between churn and not churn." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "255e3583", + "metadata": {}, + "outputs": [], + "source": [ + "model = xgb.XGBClassifier(early_stopping_rounds=10)\n", + "model.set_params(\n", + " eval_metric=[\"error\", \"logloss\", \"auc\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f6430312", + "metadata": {}, + "source": [ + "Finally, our actual training step — where the model learns patterns from the data, so it can make predictions later:\n", + "\n", + "- The model is trained on `x_train` and `y_train`, and evaluates its performance using `x_val` and `y_val` to check if it’s learning well.\n", + "- To turn off printed output while training, we'll set `verbose` to `False`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3aa3657", + "metadata": {}, + "outputs": [], + "source": [ + "model.fit(\n", + " x_train,\n", + " y_train,\n", + " eval_set=[(x_val, y_val)],\n", + " verbose=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c303a046", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind model\n", + "\n", + "You'll also need to initialize a ValidMind model object (`vm_model`) that can be passed to other functions for analysis and tests on the data for our model.\n", + "\n", + "You simply initialize this model object with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b2be11f", + "metadata": {}, + "outputs": [], + "source": [ + "vm_model_xgb = vm.init_model(\n", + " model,\n", + " input_id=\"xgboost\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "2fa83857", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Assign predictions\n", + "\n", + "Once the model has been registered, you can assign model predictions to the training and testing datasets.\n", + "\n", + "- The [`assign_predictions()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#assign_predictions) from the `Dataset` object can link existing predictions to any number of models.\n", + "- This method links the model's class prediction values and probabilities to our `vm_train_ds` and `vm_test_ds` datasets.\n", + "\n", + "If no prediction values are passed, the method will compute predictions automatically:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "229185fd", + "metadata": {}, + "outputs": [], + "source": [ + "vm_train_ds.assign_predictions(model=vm_model_xgb)\n", + "vm_test_ds.assign_predictions(model=vm_model_xgb)" + ] + }, + { + "cell_type": "markdown", + "id": "d0b3312e", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Running ValidMind tests\n", + "\n", + "Now that we know how to initialize ValidMind `dataset` and `model` objects, we're ready to run some tests!\n", + "\n", + "You run individual tests by calling [the `run_test` function](https://docs.validmind.ai/validmind/validmind/tests.html#run_test) provided by the `validmind.tests` module. For the examples below, we'll pass in the following arguments:\n", + "\n", + "- **`test_id`** — The ID of the test to run, as seen in the `ID` column when you run `list_tests`.\n", + "- **`inputs`** — A dictionary of test inputs, such as `dataset`, `model`, `datasets`, or `models`. These are ValidMind objects initialized with [`vm.init_dataset()`](https://docs.validmind.ai/validmind/validmind.html#init_dataset) or [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model)." + ] + }, + { + "cell_type": "markdown", + "id": "96c89f32", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run classifier performance test with one model\n", + "\n", + "Run `validmind.data_validation.ClassifierPerformance` test with the testing dataset (`vm_test_ds`) and model (`vm_model_xgb`) as inputs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85189af9", + "metadata": {}, + "outputs": [], + "source": [ + "result = vm.tests.run_test(\n", + " \"validmind.model_validation.sklearn.ClassifierPerformance\",\n", + " inputs={\n", + " \"dataset\": vm_test_ds,\n", + " \"model\": vm_model_xgb,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "676dff89", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run comparison tests\n", + "\n", + "To evaluate which models might be a better fit for a use case based on their performance on selected criteria, we can run the same test with multiple models. We'll train three additional models and run the classifier performance test with for all four models using a single `run_test()` call.\n", + "\n", + "
ValidMind helps streamline your documentation and testing.\n", + "

\n", + "You could call run_test() multiple times passing in different inputs, but you can also pass an input_grid object — a dictionary of test input keys and values that allow you to run a single test for a combination of models and datasets.\n", + "

\n", + "With input_grid, run comparison tests for multiple datasets, or even multiple datasets and models simultaneously — input_grid can be used with run_test() for all possible combinations of inputs, generating a cohesive and comprehensive single output.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "3d9912dc", + "metadata": {}, + "source": [ + "*Random forest classifier* models use an ensemble method that builds multiple decision trees and averages their predictions. Random forest is robust to overfitting and handles non-linear relations well, but is typically less interpretable than simpler models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1976b7e8", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# Train the random forest classifer model\n", + "model_rf = RandomForestClassifier()\n", + "model_rf.fit(x_train, y_train)\n", + "\n", + "# Initialize the ValidMind model object for the random forest classifer model\n", + "vm_model_rf = vm.init_model(\n", + " model_rf,\n", + " input_id=\"random_forest\",\n", + ")\n", + "\n", + "# Assign predictions to the test dataset for the random forest classifer model\n", + "vm_test_ds.assign_predictions(model=vm_model_rf)" + ] + }, + { + "cell_type": "markdown", + "id": "a259927c", + "metadata": {}, + "source": [ + "*Logistic regression* models are linear models that estimate class probabilities via a logistic (sigmoid) function. Logistic regression is highly interpretable with fast training, establishing a strong baseline — however, they struggle when relationships are non-linear as real-world relationships often are:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90bbf148", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "# Scaling features ensures the lbfgs solver converges reliably\n", + "model_lr = Pipeline([\n", + " (\"scaler\", StandardScaler()),\n", + " (\"lr\", LogisticRegression()),\n", + "])\n", + "model_lr.fit(x_train, y_train)\n", + "\n", + "# Initialize the ValidMind model object for the logistic regression model\n", + "vm_model_lr = vm.init_model(\n", + " model_lr,\n", + " input_id=\"logistic_regression\",\n", + ")\n", + "\n", + "# Assign predictions to the test dataset for the logistic regression model\n", + "vm_test_ds.assign_predictions(model=vm_model_lr)" + ] + }, + { + "cell_type": "markdown", + "id": "9a666b41", + "metadata": {}, + "source": [ + "*Decision tree classifier* models are a single tree with data split on feature thresholds. Useful as an explanability benchmark, decision trees are easy to visualize and interpret — but are prone to overfitting without pruning or ensemble techniques:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfa1e17d", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "# Train the decision tree classifer model\n", + "model_dt = DecisionTreeClassifier()\n", + "model_dt.fit(x_train, y_train)\n", + "\n", + "# Initialize the ValidMind model object for the decision tree classifier model\n", + "vm_model_dt = vm.init_model(\n", + " model_dt,\n", + " input_id=\"decision_tree\",\n", + ")\n", + "\n", + "# Assign predictions to the test dataset for the decision tree classifiermodel\n", + "vm_test_ds.assign_predictions(model=vm_model_dt)" + ] + }, + { + "cell_type": "markdown", + "id": "2c8f3268", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Run classifier performance test with multiple models\n", + "\n", + "Now, we'll use the `input_grid` to run the [`ClassifierPerformance` test](https://docs.validmind.ai/tests/model_validation/sklearn/ClassifierPerformance.html) on all four models using the testing dataset (`vm_test_ds`).\n", + "\n", + "When running individual tests, you can use a custom `result_id` to tag the individual result with a unique identifier by appending this `result_id` to the `test_id` with a `:` separator. We'll append an identifier to signify that this test was run on `all_models` to differentiate this test run from other runs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e48ce1e", + "metadata": {}, + "outputs": [], + "source": [ + "perf_comparison_result = vm.tests.run_test(\n", + " \"validmind.model_validation.sklearn.ClassifierPerformance:all_models\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\": [vm_model_xgb, vm_model_rf, vm_model_lr, vm_model_dt],\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "81cbf144", + "metadata": {}, + "source": [ + "Our output indicates that the XGBoost and random forest classification models provide the strongest overall classification performance, so we'll continue our testing with those two models as input only." + ] + }, + { + "cell_type": "markdown", + "id": "3d3fb6ec", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Run classifier performance test with multiple parameter values\n", + "\n", + "Next, let's run the classifier performance test with the `param_grid` object, which runs the same test multiple times with different parameter values. We'll append an identifier to signify that this test was run with our `parameter_grid` configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0ad94c9", + "metadata": {}, + "outputs": [], + "source": [ + "parameter_comparison_result = vm.tests.run_test(\n", + " \"validmind.model_validation.sklearn.ClassifierPerformance:parameter_grid\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\": [vm_model_xgb,vm_model_rf]\n", + " },\n", + " param_grid={\n", + " \"average\": [\"macro\", \"micro\"]\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "508c7546", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Run comparison test with multiple datasets\n", + "\n", + "Let's also run the [ROCCurve test](https://docs.validmind.ai/tests/model_validation/sklearn/ROCCurve.html) using `input_grid` to iterate through multiple datasets, which plots the ROC curves for the training (`vm_train_ds`) and test (`vm_test_ds`) datasets side by side — a common scenario when you want to compare the performance of a model on the training and test datasets and visually assess how much performance is lost in the test dataset.\n", + "\n", + "We'll also need to assign predictions to the training dataset for the random forest classifier model, since we didn't do that in our earlier setup:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96c3b426", + "metadata": {}, + "outputs": [], + "source": [ + "vm_train_ds.assign_predictions(model=vm_model_rf)" + ] + }, + { + "cell_type": "markdown", + "id": "2be82bae", + "metadata": {}, + "source": [ + "We'll append an identifier to signify that this test was run with our `train_vs_test` dataset comparison configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4056aa1e", + "metadata": {}, + "outputs": [], + "source": [ + "roc_curve_result = vm.tests.run_test(\n", + " \"validmind.model_validation.sklearn.ROCCurve:train_vs_test\",\n", + " input_grid={\n", + " \"dataset\": [vm_train_ds, vm_test_ds],\n", + " \"model\": [vm_model_xgb,vm_model_rf],\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a05570d5", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Work with test results\n", + "\n", + "Every test result returned by the `run_test()` function has a [`.log()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#TestResult.log) that can be used to send the test results to the ValidMind Platform. When logging individual test results to the platform, you'll need to manually add those results to the desired section of the model documentation.\n", + "\n", + "You can do this through the ValidMind Platform interface after logging your test results ([Learn more ...](https://docs.validmind.ai/developer/model-documentation/work-with-test-results.html)), or directly via the ValidMind Library when calling `.log()` by providing an optional `section_id`. The `section_id` should be a string that matches the title of a section in the documentation template in `snake_case`.\n", + "\n", + "Let's log the results of the classifier performance test (`perf_comparison_result`) and the ROCCurve (`roc_curve_result`) test in the `model_evaluation` section of the documentation — present in the template we previewed in the beginning of this notebook:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e119bf1e", + "metadata": {}, + "outputs": [], + "source": [ + "perf_comparison_result.log(section_id=\"model_evaluation\")\n", + "roc_curve_result.log(section_id=\"model_evaluation\")" + ] + }, + { + "cell_type": "markdown", + "id": "ab5205ee", + "metadata": {}, + "source": [ + "Finally, let's head to the model we connected to at the beginning of this notebook and view our inserted test results in the updated documentation ([Need more help?](https://docs.validmind.ai/guide/model-documentation/working-with-model-documentation.html)):\n", + "\n", + "1. From the **Inventory** in the ValidMind Platform, go to the model you connected to earlier.\n", + "\n", + "2. In the left sidebar that appears for your model, click **Development** under Documents.\n", + "\n", + "3. Expand the **3.2. Model Evaluation** section.\n", + "\n", + "4. Confirm that `perf_comparison_result` and `roc_curve_result` display in this section as expected." + ] + }, + { + "cell_type": "markdown", + "id": "eb196aac", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Next steps\n", + "\n", + "Now that you know how to run comparison tests with the ValidMind Library, you’re ready to take the next step. Extend the functionality of `run_test()` with your own custom test functions that can be incorporated into documentation templates just like any default out-of-the-box ValidMind test.\n", + "\n", + "
Learn how to implement custom tests with the ValidMind Library.\n", + "

\n", + "Check out our Implement comparison tests notebook for code examples and usage of key functions.
" + ] + }, + { + "cell_type": "markdown", + "id": "083c1d8d", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Discover more learning resources\n", + "\n", + "We offer many interactive notebooks to help you automate testing, documenting, validating, and more:\n", + "\n", + "- [Run tests & test suites](https://docs.validmind.ai/developer/how-to/testing-overview.html)\n", + "- [Use ValidMind Library features](https://docs.validmind.ai/developer/how-to/feature-overview.html)\n", + "- [Code samples by use case](https://docs.validmind.ai/guide/samples-jupyter-notebooks.html)\n", + "\n", + "Or, visit our [documentation](https://docs.validmind.ai/) to learn more about ValidMind." + ] + }, + { + "cell_type": "markdown", + "id": "efba0f57", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Upgrade ValidMind\n", + "\n", + "
After installing ValidMind, you’ll want to periodically make sure you are on the latest version to access any new features and other enhancements.
\n", + "\n", + "Retrieve the information for the currently installed version of ValidMind:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d35972c", + "metadata": { + "vscode": { + "languageId": "plaintext" } + }, + "outputs": [], + "source": [ + "%pip show validmind" + ] + }, + { + "cell_type": "markdown", + "id": "abcd07ef", + "metadata": {}, + "source": [ + "If the version returned is lower than the version indicated in our [production open-source code](https://github.com/validmind/validmind-library/blob/prod/validmind/__version__.py), restart your notebook and run:\n", + "\n", + "```bash\n", + "%pip install --upgrade validmind\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "5fe70b90", + "metadata": {}, + "source": [ + "You may need to restart your kernel after running the upgrade package for changes to be applied." + ] + }, + { + "cell_type": "markdown", + "id": "copyright-54faffd51a5a4717a02b6be426d6b441", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "***\n", + "\n", + "Copyright © 2023-2026 ValidMind Inc. All rights reserved.
\n", + "Refer to [LICENSE](https://github.com/validmind/validmind-library/blob/main/LICENSE) for details.
\n", + "SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "name": "python", + "version": "3.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/site/notebooks/how_to/tests/run_tests/configure_tests/enable_pii_detection.ipynb b/site/notebooks/how_to/tests/run_tests/configure_tests/enable_pii_detection.ipynb index e0ae28fff7..ddbd734e9d 100644 --- a/site/notebooks/how_to/tests/run_tests/configure_tests/enable_pii_detection.ipynb +++ b/site/notebooks/how_to/tests/run_tests/configure_tests/enable_pii_detection.ipynb @@ -157,7 +157,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To use PII detection powered by [Microsoft Presidio](https://microsoft.github.io/presidio/), install the library with the explicit `[pii-detection]` extra specifier:" ] diff --git a/site/notebooks/quickstart/quickstart_model_documentation.Rmd b/site/notebooks/quickstart/quickstart_model_documentation.Rmd new file mode 100644 index 0000000000..fc48d8c445 --- /dev/null +++ b/site/notebooks/quickstart/quickstart_model_documentation.Rmd @@ -0,0 +1,227 @@ +--- +title: "Quickstart for Model Documentation (R)" +author: "ValidMind" +date: "2026-03-18" +output: html_document +--- + +# Quickstart for Model Documentation + +Learn the basics of using ValidMind to document models as part of a model development workflow using R. This notebook uses the ValidMind R package (a `reticulate` wrapper around the Python library) to generate a draft of documentation for a binary classification model. + +We will: + +1. Import a sample dataset and preprocess it +2. Split the datasets and initialize them for use with ValidMind +3. Train a logistic regression (GLM) model and initialize it for use with testing +4. Run the full suite of documentation tests, sending results to the ValidMind Platform + +## Setting up + +The Python path is auto-configured via the `VALIDMIND_PYTHON` environment variable. +If not set, it falls back to the system Python. For local development, create a +`.Renviron` file in the project root with `VALIDMIND_PYTHON=.venv/bin/python`. + +```{r setup, include=FALSE} +library(reticulate) + +python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python")) +if (nchar(python_version) > 0 && !startsWith(python_version, "/")) { + python_version <- file.path(getwd(), python_version) +} +use_python(python_version, required = TRUE) + +library(validmind) +library(dplyr) +library(caTools) +library(knitr) + +knitr::opts_chunk$set(warning = FALSE, message = FALSE) +``` + +## Initialize the ValidMind Library + +Log in to the [ValidMind Platform](https://app.prod.validmind.ai) and register a model: + +1. Navigate to **Inventory** and click **+ Register Model**. +2. Under **Documents > Development**, select the `Binary classification` template. +3. Go to **Getting Started**, select `Development` from the **DOCUMENT** drop-down, and copy the code snippet. + +Replace the placeholder values below with your own credentials: + +```{r} +vm_r <- vm( + api_host = "https://app.prod.validmind.ai/api/v1/tracking", + api_key = "", + api_secret = "", + model = "", + document = "documentation" +) +``` + +## Preview the documentation template + +Verify the connection and see the documentation structure: + +```{r} +py_print(vm_r$preview_template()) +``` + +## Load the demo dataset + +We use the Bank Customer Churn dataset for this demonstration: + +```{r} +customer_churn <- reticulate::import( + "validmind.datasets.classification.customer_churn" +) + +cat(sprintf( + paste0( + "Loaded demo dataset with:\n\n\t- Target column: '%s'", + "\n\t- Class labels: %s\n" + ), + customer_churn$target_column, + paste( + names(customer_churn$class_labels), + customer_churn$class_labels, + sep = ": ", collapse = ", " + ) +)) + +data <- customer_churn$load_data() +head(data) +``` + +## Initialize the raw dataset + +Before running tests, initialize a ValidMind dataset object for the raw data: + +```{r} +vm_raw_dataset <- vm_r$init_dataset( + dataset = data, + input_id = "raw_dataset", + target_column = customer_churn$target_column, + class_labels = customer_churn$class_labels +) +``` + +## Preprocess the raw dataset + +Handle categorical variables using one-hot encoding and remove unnecessary columns: + +```{r} +# load_data() already drops RowNumber, CustomerId, Surname +# One-hot encode categorical variables +geo_dummies <- model.matrix(~ Geography - 1, data = data) +gender_dummies <- model.matrix(~ Gender - 1, data = data) +data_processed <- data %>% select(-Geography, -Gender) +data_processed <- cbind(data_processed, geo_dummies, gender_dummies) +``` + +### Split the dataset + +Split into training (60%), validation (20%), and test (20%) sets: + +```{r} +set.seed(42) + +# First split: 80% train+validation, 20% test +target_col <- customer_churn$target_column +split1 <- sample.split(data_processed[[target_col]], SplitRatio = 0.8) +train_val_data <- subset(data_processed, split1 == TRUE) +test_data <- subset(data_processed, split1 == FALSE) + +# Second split: 75% train, 25% validation (of the 80% = 60/20 overall) +split2 <- sample.split(train_val_data[[target_col]], SplitRatio = 0.75) +train_data <- subset(train_val_data, split2 == TRUE) +validation_data <- subset(train_val_data, split2 == FALSE) +``` + +## Train a logistic regression model + +Train a GLM with a binomial family (logistic regression): + +```{r} +formula <- as.formula(paste(target_col, "~ .")) +model <- glm(formula, data = train_data, family = binomial) +summary(model) +``` + +## Initialize the ValidMind datasets + +```{r} +vm_train_ds <- vm_r$init_dataset( + dataset = train_data, + input_id = "train_dataset", + target_column = customer_churn$target_column +) + +vm_test_ds <- vm_r$init_dataset( + dataset = test_data, + input_id = "test_dataset", + target_column = customer_churn$target_column +) +``` + +## Initialize a model object + +Save the R model and initialize it with ValidMind: + +```{r} +model_path <- save_model(model) + +vm_model <- vm_r$init_r_model( + model_path = model_path, + input_id = "model" +) +``` + +### Assign predictions + +Link model predictions to the training and testing datasets: + +```{r} +vm_train_ds$assign_predictions(model = vm_model) +vm_test_ds$assign_predictions(model = vm_model) +``` + +## Run the full suite of tests + +Build the test configuration that maps each test to its required inputs: + +```{r} +# Import the test config helper from the Python customer_churn module +customer_churn <- reticulate::import( + "validmind.datasets.classification.customer_churn" +) +test_config <- customer_churn$get_demo_test_config() +``` + +Preview the test configuration: + +```{r} +vm_utils <- reticulate::import("validmind.utils") +py_print(vm_utils$preview_test_config(test_config)) +``` + +Run the full documentation test suite and upload results to the ValidMind Platform: + +```{r} +full_suite <- vm_r$run_documentation_tests(config = test_config) +``` + +## Next steps + +Head to the [ValidMind Platform](https://app.prod.validmind.ai) to view the generated documentation: + +1. Navigate to **Inventory** and select your model. +2. Click **Development** under Documents to see the full draft of your model documentation. + +From there, you can make qualitative edits, collaborate with validators, and submit for approval. + +--- + +*Copyright 2023-2026 ValidMind Inc. All rights reserved.* +*Refer to [LICENSE](https://github.com/validmind/validmind-library/blob/main/LICENSE) for details.* +*SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial* diff --git a/site/notebooks/quickstart/quickstart_model_documentation.ipynb b/site/notebooks/quickstart/quickstart_model_documentation.ipynb index e7c28cf339..40287aa57f 100644 --- a/site/notebooks/quickstart/quickstart_model_documentation.ipynb +++ b/site/notebooks/quickstart/quickstart_model_documentation.ipynb @@ -184,7 +184,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/quickstart/quickstart_model_validation.Rmd b/site/notebooks/quickstart/quickstart_model_validation.Rmd new file mode 100644 index 0000000000..547aaf29d5 --- /dev/null +++ b/site/notebooks/quickstart/quickstart_model_validation.Rmd @@ -0,0 +1,335 @@ +--- +title: "Quickstart for Model Validation (R)" +author: "ValidMind" +date: "2026-03-18" +output: html_document +--- + +# Quickstart for Model Validation + +Learn the basics of using ValidMind to validate models as part of a model validation workflow using R. This notebook uses the ValidMind R package (a `reticulate` wrapper around the Python library) to generate a draft of a validation report for a binary classification model. + +We will: + +1. Import a sample dataset and preprocess it, then initialize datasets for use with ValidMind +2. Independently verify data quality tests performed on datasets by model development +3. Train a champion model for evaluation +4. Run model evaluation tests with the ValidMind Library + +## Setting up + +The Python path is auto-configured via the `VALIDMIND_PYTHON` environment variable. +If not set, it falls back to the system Python. For local development, create a +`.Renviron` file in the project root with `VALIDMIND_PYTHON=.venv/bin/python`. + +```{r setup, include=FALSE} +library(reticulate) + +python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python")) +if (nchar(python_version) > 0 && !startsWith(python_version, "/")) { + python_version <- file.path(getwd(), python_version) +} +use_python(python_version, required = TRUE) + +library(validmind) +library(dplyr) +library(caTools) +library(knitr) + +knitr::opts_chunk$set(warning = FALSE, message = FALSE) +``` + +## Initialize the ValidMind Library + +Log in to the [ValidMind Platform](https://app.prod.validmind.ai) and register a model: + +1. Navigate to **Inventory** and click **+ Register Model**. +2. Assign yourself as a **Validator** (remove yourself from Owner and Developer roles). +3. Under **Documents > Validation**, select the `Generic Validation Report` template. +4. Go to **Getting Started**, select `Validation` from the **DOCUMENT** drop-down, and copy the code snippet. + +Replace the placeholder values below with your own credentials: + +```{r} +vm_r <- vm( + api_host = "https://app.prod.validmind.ai/api/v1/tracking", + api_key = "", + api_secret = "", + model = "", + document = "validation-report" +) +``` + +## Preview the validation report template + +Verify the connection and see the validation report structure: + +```{r} +py_print(vm_r$preview_template()) +``` + +## Identify available tests + +List the tasks and tags available in the ValidMind test library: + +```{r} +vm_r$tests$list_tasks_and_tags() +``` + +List all data quality tests for classification: + +```{r} +vm_r$tests$list_tests(tags = list("data_quality"), task = "classification") +``` + +## Load the demo dataset + +We use the Bank Customer Churn dataset for this demonstration: + +```{r} +customer_churn <- reticulate::import( + "validmind.datasets.classification.customer_churn" +) + +cat(sprintf( + paste0( + "Loaded demo dataset with:\n\n\t- Target column: '%s'", + "\n\t- Class labels: %s\n" + ), + customer_churn$target_column, + paste( + names(customer_churn$class_labels), + customer_churn$class_labels, + sep = ": ", collapse = ", " + ) +)) + +data <- customer_churn$load_data() +head(data) +``` + +## Preprocess the raw dataset + +Handle categorical variables using one-hot encoding and remove unnecessary columns: + +```{r} +# load_data() already drops RowNumber, CustomerId, Surname +# One-hot encode categorical variables +geo_dummies <- model.matrix(~ Geography - 1, data = data) +gender_dummies <- model.matrix(~ Gender - 1, data = data) +data_processed <- data %>% select(-Geography, -Gender) +data_processed <- cbind(data_processed, geo_dummies, gender_dummies) +``` + +### Split the dataset + +Split into training (60%), validation (20%), and test (20%) sets: + +```{r} +set.seed(42) + +# First split: 80% train+validation, 20% test +target_col <- customer_churn$target_column +split1 <- sample.split(data_processed[[target_col]], SplitRatio = 0.8) +train_val_data <- subset(data_processed, split1 == TRUE) +test_data <- subset(data_processed, split1 == FALSE) + +# Second split: 75% train, 25% validation (of the 80% = 60/20 overall) +split2 <- sample.split(train_val_data[[target_col]], SplitRatio = 0.75) +train_data <- subset(train_val_data, split2 == TRUE) +validation_data <- subset(train_val_data, split2 == FALSE) +``` + +### Separate features and targets + +```{r} +x_train <- train_data %>% select(-all_of(target_col)) +y_train <- train_data[[target_col]] +``` + +## Initialize the ValidMind datasets + +```{r} +vm_raw_dataset <- vm_r$init_dataset( + dataset = data, + input_id = "raw_dataset", + target_column = customer_churn$target_column, + class_labels = customer_churn$class_labels +) + +vm_train_ds <- vm_r$init_dataset( + dataset = train_data, + input_id = "train_dataset", + target_column = customer_churn$target_column +) + +vm_validation_ds <- vm_r$init_dataset( + dataset = validation_data, + input_id = "validation_dataset", + target_column = customer_churn$target_column +) + +vm_test_ds <- vm_r$init_dataset( + dataset = test_data, + input_id = "test_dataset", + target_column = customer_churn$target_column +) +``` + +## Run data quality tests + +### Run an individual data quality test + +Run the ClassImbalance test on the raw dataset and log it to the platform: + +```{r} +vm_r$tests$run_test( + test_id = "validmind.data_validation.ClassImbalance", + inputs = list(dataset = vm_raw_dataset) +)$log() +``` + +### Run data comparison tests + +Compare class imbalance across dataset splits: + +```{r} +comparison_tests <- list( + "validmind.data_validation.ClassImbalance:train_vs_validation" = list( + input_grid = list(dataset = list("train_dataset", "validation_dataset")) + ), + "validmind.data_validation.ClassImbalance:train_vs_test" = list( + input_grid = list(dataset = list("train_dataset", "test_dataset")) + ) +) + +for (test_name in names(comparison_tests)) { + cat(paste0("Running: ", test_name, "\n")) + config <- comparison_tests[[test_name]] + tryCatch({ + vm_r$tests$run_test( + test_name, + input_grid = config$input_grid + )$log() + }, error = function(e) { + cat(paste0("Error running test ", test_name, ": ", e$message, "\n")) + }) +} +``` + +## Train the champion model + +Train a logistic regression (GLM) to serve as the champion model: + +```{r} +formula <- as.formula(paste(target_col, "~ .")) +model <- glm(formula, data = train_data, family = binomial) +summary(model) +``` + +## Initialize the model object + +Save the R model and initialize it with ValidMind: + +```{r} +model_path <- save_model(model) + +vm_xgboost <- vm_r$init_r_model( + model_path = model_path, + input_id = "xgboost_champion" +) +``` + +### Assign predictions + +Link model predictions to the training and testing datasets: + +```{r} +vm_train_ds$assign_predictions(model = vm_xgboost) +vm_test_ds$assign_predictions(model = vm_xgboost) +``` + +## Run model evaluation tests + +### Run model performance tests + +List available model performance tests: + +```{r} +vm_r$tests$list_tests(tags = list("model_performance"), task = "classification") +``` + +Run and log performance tests: + +```{r} +performance_tests <- c( + "validmind.model_validation.sklearn.ClassifierPerformance:xgboost_champion", + "validmind.model_validation.sklearn.ConfusionMatrix:xgboost_champion", + "validmind.model_validation.sklearn.ROCCurve:xgboost_champion" +) + +for (test in performance_tests) { + cat(paste0("Running: ", test, "\n")) + vm_r$tests$run_test( + test, + inputs = list(dataset = vm_test_ds, model = vm_xgboost) + )$log() +} +``` + +### Run diagnostic tests + +Assess the model for overfitting: + +```{r} +vm_r$tests$run_test( + test_id = paste0( + "validmind.model_validation.sklearn.OverfitDiagnosis", + ":xgboost_champion" + ), + input_grid = list( + datasets = list(list(vm_train_ds, vm_test_ds)), + model = list(vm_xgboost) + ) +)$log() +``` + +Test robustness: + +```{r} +vm_r$tests$run_test( + test_id = paste0( + "validmind.model_validation.sklearn.RobustnessDiagnosis", + ":xgboost_champion" + ), + input_grid = list( + datasets = list(list(vm_train_ds, vm_test_ds)), + model = list(vm_xgboost) + ) +)$log() +``` + +### Run feature importance tests + +Note: `PermutationFeatureImportance` and `SHAPGlobalImportance` are not supported for R models. + +```{r} +vm_r$tests$run_test( + "validmind.model_validation.FeaturesAUC:xgboost_champion", + inputs = list(dataset = vm_test_ds, model = vm_xgboost) +)$log() +``` + +## Next steps + +Head to the [ValidMind Platform](https://app.prod.validmind.ai) to view the validation report: + +1. Navigate to **Inventory** and select your model. +2. Click **Validation** under Documents. +3. Include your logged test results as evidence, create risk assessment notes, and assess compliance. + +--- + +*Copyright 2023-2026 ValidMind Inc. All rights reserved.* +*Refer to [LICENSE](https://github.com/validmind/validmind-library/blob/main/LICENSE) for details.* +*SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial* diff --git a/site/notebooks/quickstart/quickstart_model_validation.ipynb b/site/notebooks/quickstart/quickstart_model_validation.ipynb index 640e64015b..63e17f2a88 100644 --- a/site/notebooks/quickstart/quickstart_model_validation.ipynb +++ b/site/notebooks/quickstart/quickstart_model_validation.ipynb @@ -259,7 +259,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/tutorials/model_development/1-set_up_validmind.ipynb b/site/notebooks/tutorials/model_development/1-set_up_validmind.ipynb index f82f57eaa1..4244924b9a 100644 --- a/site/notebooks/tutorials/model_development/1-set_up_validmind.ipynb +++ b/site/notebooks/tutorials/model_development/1-set_up_validmind.ipynb @@ -171,7 +171,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb b/site/notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb index c5dc1fb394..05ad11c2c3 100644 --- a/site/notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb +++ b/site/notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb @@ -261,7 +261,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/use_cases/agents/document_agentic_ai.ipynb b/site/notebooks/use_cases/agents/document_agentic_ai.ipynb index 89e8152214..3c3b6817b1 100644 --- a/site/notebooks/use_cases/agents/document_agentic_ai.ipynb +++ b/site/notebooks/use_cases/agents/document_agentic_ai.ipynb @@ -194,7 +194,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.9 <= x <= 3.11
\n", + "Python 3.9 <= x <= 3.14\n", "\n", "Let's begin by installing the ValidMind Library with large language model (LLM) support:" ] diff --git a/site/notebooks/use_cases/model_validation/validate_application_scorecard.ipynb b/site/notebooks/use_cases/model_validation/validate_application_scorecard.ipynb index f3df8a617e..7857d42e0a 100644 --- a/site/notebooks/use_cases/model_validation/validate_application_scorecard.ipynb +++ b/site/notebooks/use_cases/model_validation/validate_application_scorecard.ipynb @@ -247,7 +247,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/notebooks/use_cases/nlp_and_llm/rag_benchmark_demo.ipynb b/site/notebooks/use_cases/nlp_and_llm/rag_benchmark_demo.ipynb index 1e3eb07b67..1b56fa1b07 100644 --- a/site/notebooks/use_cases/nlp_and_llm/rag_benchmark_demo.ipynb +++ b/site/notebooks/use_cases/nlp_and_llm/rag_benchmark_demo.ipynb @@ -159,7 +159,7 @@ "\n", "
Recommended Python versions\n", "

\n", - "Python 3.8 <= x <= 3.11
\n", + "Python 3.8 <= x <= 3.14\n", "\n", "To install the library:" ] diff --git a/site/releases/2024/2024-dec-06/release-notes.qmd b/site/releases/2024/2024-dec-06/release-notes.qmd index 142a96f72d..464cceed00 100644 --- a/site/releases/2024/2024-dec-06/release-notes.qmd +++ b/site/releases/2024/2024-dec-06/release-notes.qmd @@ -1308,8 +1308,6 @@ We've now introduced a custom 404 page where you can perform a search instead. :::: - - {{< include /releases/_how-to-upgrade.qmd >}} diff --git a/site/releases/2025/2025-mar-07/release-notes.qmd b/site/releases/2025/2025-mar-07/release-notes.qmd index f2a363f65b..1e76360c0c 100644 --- a/site/releases/2025/2025-mar-07/release-notes.qmd +++ b/site/releases/2025/2025-mar-07/release-notes.qmd @@ -58,7 +58,7 @@ We've introduced an interactive historical record of breaking changes and deprec - These histories grouped by calendar year are searchable and can be filtered by product area ({{< var validmind.developer >}}, {{< var validmind.platform >}}), associated versioning, and the type of change. ::: {.column-margin} -[Breaking changes & deprecations](/releases/breaking-changes/breaking-changes.qmd){.button .button-green target="_blank"} +[Breaking changes & deprecations](/releases/breaking-changes-and-deprecations.qmd){.button .button-green target="_blank"} ::: diff --git a/site/releases/_how-to-upgrade.qmd b/site/releases/_how-to-upgrade.qmd index 7aa53e95d4..54235bdb4d 100644 --- a/site/releases/_how-to-upgrade.qmd +++ b/site/releases/_how-to-upgrade.qmd @@ -2,6 +2,8 @@ ## {{< fa arrow-up-from-bracket >}} How to upgrade + + #### {{< var validmind.platform >}} To access the latest version of the {{< var validmind.platform >}},^[[Log in to {{< var vm.product >}}](/guide/access/log-in-to-validmind.qmd)] hard refresh your browser tab: diff --git a/site/support/troubleshooting.qmd b/site/support/troubleshooting.qmd index b420b42c7b..fcc7e31a45 100644 --- a/site/support/troubleshooting.qmd +++ b/site/support/troubleshooting.qmd @@ -84,8 +84,6 @@ This error indicates a Certificate Authority issue. In rare cases it can signal Contact your internal IT team. Provide details about your Python environment and ask for help installing the correct CA bundle. - - {{< include /releases/_how-to-upgrade.qmd >}} ## Additional resources diff --git a/site/training/_training-register.qmd b/site/training/_training-register.qmd index 8a7cf5d5e3..aaae2f4694 100644 --- a/site/training/_training-register.qmd +++ b/site/training/_training-register.qmd @@ -3,6 +3,7 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> ## {{< fa graduation-cap >}} Register + 1. **Get your access credentials**: Email to enquire about access. diff --git a/site/training/administrator-fundamentals/using-validmind-for-risk-management.qmd b/site/training/administrator-fundamentals/using-validmind-for-risk-management.qmd index 8c154c56e8..0110612344 100644 --- a/site/training/administrator-fundamentals/using-validmind-for-risk-management.qmd +++ b/site/training/administrator-fundamentals/using-validmind-for-risk-management.qmd @@ -292,7 +292,7 @@ When you're done, click [{{< fa chevron-right >}}]() to continue. ::: 1. Click on the workflow you'd added earlier, then: - - To add a step, drag and drop a new step onto the canvas. For example: {{< fa arrow-pointer >}} User Action v2 + - To add a step, drag and drop a new step onto the canvas. For example: {{< fa arrow-pointer >}} User Action - Double-click the new step to open up the configuration modal. 2. After you're finished with step configuration, click **Update Step** to apply your changes. 3. After you've configured a step, link your example workflow together: diff --git a/site/training/ai-governance/configuring-ai-workflows.qmd b/site/training/ai-governance/configuring-ai-workflows.qmd index 2c126076cf..db4cd7c4ae 100644 --- a/site/training/ai-governance/configuring-ai-workflows.qmd +++ b/site/training/ai-governance/configuring-ai-workflows.qmd @@ -94,11 +94,11 @@ This example intake workflow demonstrates key AI governance patterns: ::: ::: {.f3} -- **Model Stage Change** — Sets model stage to [In Risk Classification]{.bubble} -- **User Action v2** — Collects risk classification materials with deadline -- **Broadcast** — Notifies stakeholders of pending risk rating -- **Approval** — Routes to Model Validation Governance for review -- **Condition Branch** — Different paths for High Risk, Medium Risk, or Default +- **{{< fa cube >}} Model Stage Change** — Sets model stage to [In Risk Classification]{.bubble} +- **{{< fa arrow-pointer >}} User Action** — Collects risk classification materials with deadline +- **{{< fa bullhorn >}} Broadcast** — Notifies stakeholders of pending risk rating +- **{{< fa users >}} Approval** — Routes to Model Validation Governance for review +- **{{< fa maximize >}} Condition Branch** — Different paths for High Risk, Medium Risk, or Default ::: # Approval workflows {background-image="../assets/digitalspace-colours.svg" .title-accent}