diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 9273465..df90c51 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,16 +3,19 @@ { "name": "Python 3", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile - "image": "mcr.microsoft.com/devcontainers/python:0-3.9", + "image": "mcr.microsoft.com/devcontainers/python:0-3.10", // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, + "features": { + "ghcr.io/jsburckhardt/devcontainer-features/uv:1": {}, + "ghcr.io/jsburckhardt/devcontainer-features/ruff:1": {} + }, // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "pip3 install --user -r requirements-dev.txt; pre-commit install", + "postCreateCommand": "uv pip install --prefix ~/.local -r requirements-dev.txt && prek install", // Configure tool-specific properties. "customizations": { diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index 2d36c08..e8c0e73 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -22,7 +22,7 @@ body: id: environment attributes: label: Environment - description: Please provide environment details that will help us reproduce the issue e.g. operating system and tubular/scikit-learn/pandas versions. + description: Please provide environment details that will help us reproduce the issue e.g. operating system and numpy/shap/xgboost-cpu versions. validations: required: true - type: textarea diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 05f6814..6517e66 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -2,6 +2,9 @@ name: Python package build and test +permissions: + contents: read + on: push: pull_request: @@ -42,9 +45,9 @@ jobs: - name: Upload coverage pytest html test results to github if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: coverage-html-pytest-results + name: coverage-html-pytest-results-${{ matrix.python-version }} path: htmlcov lint: @@ -56,27 +59,37 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python 3.9 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: ${{ matrix.python-version }} - - name: Install ruff + - name: Install linters run: | python -m pip install uv - uv venv -p 3.9 ruff - source ruff/bin/activate + uv venv -p ${{ matrix.python-version }} $HOME/lints + source $HOME/lints/bin/activate uv pip install -r requirements-dev.txt - name: Lint with ruff run: | - source ruff/bin/activate + source $HOME/lints/bin/activate ruff --version ruff check --output-format github . - - name: Check formatting with ruff + - name: Check for typos with codespell run: | - source ruff/bin/activate - ruff --version - ruff format --diff . - ruff format --check . + source $HOME/lints/bin/activate + codespell --version + codespell . + + - name: Check for typos with typos + run: | + source $HOME/lints/bin/activate + typos --version + typos . + + - name: lint code examples in docs + run: | + source $HOME/lints/bin/activate + prek run blacken-docs --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61017dd..ab88e07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,31 @@ -repos: - - repo: https://github.com/ambv/black - rev: 22.3.0 - hooks: - - id: black - language_version: python3 - - - repo: https://github.com/pycqa/bandit - rev: 1.7.4 - hooks: - - id: bandit - +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.14.0 + hooks: + # Run the linter. + - id: ruff + args: [ --fix, --exit-non-zero-on-fix ] + # Run the formatter. + - id: ruff-format +- repo: https://github.com/MarcoGorelli/auto-walrus + rev: 0.3.4 + hooks: + - id: auto-walrus +- repo: https://github.com/codespell-project/codespell + rev: 'v2.4.1' + hooks: + - id: codespell + additional_dependencies: + - tomli +- repo: https://github.com/crate-ci/typos + rev: 'v1.39.0' + hooks: + - id: typos +- repo: https://github.com/adamchainz/blacken-docs + rev: "1.20.0" + hooks: + - id: blacken-docs + args: [--skip-errors] + additional_dependencies: + - black==22.12.0 \ No newline at end of file diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 84e5cf3..e8c9ee6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,13 +16,23 @@ Subsections for each version can be one of the following; Each individual change should have a link to the pull request after the description of the change. +1.0.1 (unreleased) +------------------ + +Changed +^^^^^^^ +- Reactivated CI and added PR approval guardrails (https://github.com/azukds/model_interpreter/pull/8) +- Updated CI configuration to follow tubular's python-package.yml (https://github.com/azukds/model_interpreter/pull/8) +- Switched to prek for pre-commit checks (https://github.com/azukds/model_interpreter/pull/8) +- Fixed spelling mistakes and removed stale tubular references (https://github.com/azukds/model_interpreter/pull/8) + 1.0.0 (2024-08-06) ------------------- Added ^^^^^ - Moved config to pyproject.toml approach -- Updated envs and impacted logic (e.g to accomodate new shap behaviour) +- Updated envs and impacted logic (e.g to accommodate new shap behaviour) - Setup devcontainer config for codespaces - Updated build pipeline diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index a61016e..67d414c 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,7 +1,7 @@ Contributing ============ -Thanks for your interest in contributing to this package! No contibution is too small! We're hoping it can be made even better through community contributions. +Thanks for your interest in contributing to this package! No contribution is too small! We're hoping it can be made even better through community contributions. Requests and feedback --------------------- @@ -24,7 +24,7 @@ Then clone the fork (or this repository if internal); git clone https://github.com/lvgig/model_interpreter.git cd model_interpreter -Then install tubular and dependencies for development; +Then install model_interpreter and dependencies for development; .. code:: diff --git a/README.md b/README.md index 1cda991..c0bb9c7 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,12 @@ from model_interpreter.interpreter import ModelInterpreter # generate a classification dataset X, y = make_classification( -n_samples=1000, -n_features=4, n_informative=2, - n_redundant=0, random_state=0, -shuffle=False + n_samples=1000, + n_features=4, + n_informative=2, + n_redundant=0, + random_state=0, + shuffle=False, ) # fit a model @@ -45,11 +47,10 @@ model_interpreter.fit(clf) # return feature contribution importances for a single row single_row = X.head(1) contribution_list = single_model_contribution.transform( -single_row, return_type="name_value_dicts" + single_row, return_type="name_value_dicts" ) print(contribution_list) - ``` Which will return the following output: diff --git a/examples/Random_Forest/binary_classification.ipynb b/examples/Random_Forest/binary_classification.ipynb index 1e66be9..b9e41d3 100644 --- a/examples/Random_Forest/binary_classification.ipynb +++ b/examples/Random_Forest/binary_classification.ipynb @@ -137,7 +137,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A model interpreter object is created by specifiying the feature names in the same order as used for building the model." + "A model interpreter object is created by specifying the feature names in the same order as used for building the model." ] }, { @@ -357,7 +357,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "There are also three `sorting` options avaliable:\n", + "There are also three `sorting` options available:\n", "- `'abs'`, which is the default used in the above examples sorts by the absolute value of the feature contribution\n", "- `'positive'`, which sorts the contributions in descending order\n", "- `'label'`, which sorts in a descending order if `pred_label > 0`, and ascending if `pred_label = 0`\n", diff --git a/examples/Random_Forest/multiclass_classification.ipynb b/examples/Random_Forest/multiclass_classification.ipynb index 930beb7..fa99746 100644 --- a/examples/Random_Forest/multiclass_classification.ipynb +++ b/examples/Random_Forest/multiclass_classification.ipynb @@ -139,7 +139,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A model interpreter object is created by specifiying the feature names in the same order as used for building the model." + "A model interpreter object is created by specifying the feature names in the same order as used for building the model." ] }, { @@ -364,7 +364,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "There are also three `sorting` options avaliable:\n", + "There are also three `sorting` options available:\n", "- `'abs'`, which is the default used in the above examples sorts by the absolute value of the feature contribution\n", "- `'positive'`, which sorts the contributions in descending order\n", "- `'label'`, which sorts in a descending order if `pred_label > 0`, and ascending if `pred_label = 0`\n", diff --git a/examples/XGB/example.ipynb b/examples/XGB/example.ipynb index 8d507dd..87cad05 100644 --- a/examples/XGB/example.ipynb +++ b/examples/XGB/example.ipynb @@ -184,7 +184,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A model interpreter object is created by specifiying the feature names in the same order as used for building the model." + "A model interpreter object is created by specifying the feature names in the same order as used for building the model." ] }, { @@ -343,7 +343,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[{'Name': 'MedInc feature', 'Value': -0.7173248529}, {'Name': 'Lattitude of location', 'Value': -0.2954589427}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Average number of rooms', 'Value': 0.0455645546}, {'Name': 'Age of house', 'Value': -0.0052499785}, {'Name': 'Average number of bedrooms', 'Value': -0.0039967196}, {'Name': 'Population feature', 'Value': -0.0015882736}]\n" + "[{'Name': 'MedInc feature', 'Value': -0.7173248529}, {'Name': 'Latitude of location', 'Value': -0.2954589427}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Average number of rooms', 'Value': 0.0455645546}, {'Name': 'Age of house', 'Value': -0.0052499785}, {'Name': 'Average number of bedrooms', 'Value': -0.0039967196}, {'Name': 'Population feature', 'Value': -0.0015882736}]\n" ] } ], @@ -355,7 +355,7 @@ " \"AveBedrms\": \"Average number of bedrooms\",\n", " \"Population\": \"Population feature\",\n", " \"AveOccup\": \"Average occupation\",\n", - " \"Latitude\": \"Lattitude of location\",\n", + " \"Latitude\": \"Latitude of location\",\n", " \"Longitude\": \"Longitude of location\",\n", "}\n", "\n", @@ -412,7 +412,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "There are also three `sorting` options avaliable:\n", + "There are also three `sorting` options available:\n", "- `'abs'`, which is the default used in the above examples sorts by the absolute value of the feature contribution\n", "- `'positive'`, which sorts the contributions in descending order\n", "- `'label'`, which sorts in a descending order if `pred_label > 0`, and ascending if `pred_label = 0`\n", @@ -436,7 +436,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[{'Name': 'MedInc feature', 'Value': -0.7173248529}, {'Name': 'Lattitude of location', 'Value': -0.2954589427}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Average number of rooms', 'Value': 0.0455645546}]\n" + "[{'Name': 'MedInc feature', 'Value': -0.7173248529}, {'Name': 'Latitude of location', 'Value': -0.2954589427}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Average number of rooms', 'Value': 0.0455645546}]\n" ] } ], @@ -461,7 +461,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[{'Name': 'Average number of rooms', 'Value': 0.0455645546}, {'Name': 'Population feature', 'Value': -0.0015882736}, {'Name': 'Average number of bedrooms', 'Value': -0.0039967196}, {'Name': 'Age of house', 'Value': -0.0052499785}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Lattitude of location', 'Value': -0.2954589427}, {'Name': 'MedInc feature', 'Value': -0.7173248529}]\n" + "[{'Name': 'Average number of rooms', 'Value': 0.0455645546}, {'Name': 'Population feature', 'Value': -0.0015882736}, {'Name': 'Average number of bedrooms', 'Value': -0.0039967196}, {'Name': 'Age of house', 'Value': -0.0052499785}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Latitude of location', 'Value': -0.2954589427}, {'Name': 'MedInc feature', 'Value': -0.7173248529}]\n" ] } ], @@ -486,7 +486,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[{'Name': 'MedInc feature', 'Value': -0.7173248529}, {'Name': 'Lattitude of location', 'Value': -0.2954589427}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Age of house', 'Value': -0.0052499785}, {'Name': 'Average number of bedrooms', 'Value': -0.0039967196}, {'Name': 'Population feature', 'Value': -0.0015882736}, {'Name': 'Average number of rooms', 'Value': 0.0455645546}]\n" + "[{'Name': 'MedInc feature', 'Value': -0.7173248529}, {'Name': 'Latitude of location', 'Value': -0.2954589427}, {'Name': 'Average occupation', 'Value': -0.2584415674}, {'Name': 'Longitude of location', 'Value': -0.194666937}, {'Name': 'Age of house', 'Value': -0.0052499785}, {'Name': 'Average number of bedrooms', 'Value': -0.0039967196}, {'Name': 'Population feature', 'Value': -0.0015882736}, {'Name': 'Average number of rooms', 'Value': 0.0455645546}]\n" ] } ], @@ -518,7 +518,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'MedInc feature': -0.71732485, 'Lattitude of location': -0.29545894, 'Average occupation': -0.25844157, 'Longitude of location': -0.19466694, 'Average number of rooms': 0.045564555}\n" + "{'MedInc feature': -0.71732485, 'Latitude of location': -0.29545894, 'Average occupation': -0.25844157, 'Longitude of location': -0.19466694, 'Average number of rooms': 0.045564555}\n" ] } ], @@ -543,7 +543,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[('MedInc feature', -0.71732485), ('Lattitude of location', -0.29545894), ('Average occupation', -0.25844157), ('Longitude of location', -0.19466694), ('Average number of rooms', 0.045564555)]\n" + "[('MedInc feature', -0.71732485), ('Latitude of location', -0.29545894), ('Average occupation', -0.25844157), ('Longitude of location', -0.19466694), ('Average number of rooms', 0.045564555)]\n" ] } ], diff --git a/examples/XGB/example_ohe.ipynb b/examples/XGB/example_ohe.ipynb index b055619..1ca62f2 100644 --- a/examples/XGB/example_ohe.ipynb +++ b/examples/XGB/example_ohe.ipynb @@ -283,7 +283,7 @@ "metadata": {}, "source": [ "\n", - "### Create catagorical variable" + "### Create categorical variable" ] }, { diff --git a/examples/non_standard_model.ipynb b/examples/non_standard_model.ipynb index 4d0b531..0036b2e 100644 --- a/examples/non_standard_model.ipynb +++ b/examples/non_standard_model.ipynb @@ -239,7 +239,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A model interpreter object is created by specifiying the feature names in the same order as used for building the model." + "A model interpreter object is created by specifying the feature names in the same order as used for building the model." ] }, { diff --git a/model_interpreter/interpreter.py b/model_interpreter/interpreter.py index 8fe2003..2a5729b 100644 --- a/model_interpreter/interpreter.py +++ b/model_interpreter/interpreter.py @@ -25,7 +25,7 @@ class ModelInterpreter: model features, if applicable. The one hot encoded feature names are automatically derived based on this list, otherwise they are treated normally. eg: passing ["colour"] will aggregate the contributions - of fields with a name beggining with "colour_" and then return the + of fields with a name beginning with "colour_" and then return the aggregated contributions for "colour" """ @@ -113,7 +113,7 @@ def _kernel_explainer_setup(self, model, X_train, is_classification, n_samples): if is_classification is None: raise ValueError( - f"is_classification input required to fit kernel explainer. True for a classification model, False for a regression model, recieved {is_classification}" + f"is_classification input required to fit kernel explainer. True for a classification model, False for a regression model, received {is_classification}" ) elif (is_classification is True) and (not hasattr(model, "predict_proba")): @@ -406,7 +406,7 @@ def transform( return_precision: int, default = 16 how many decimal places to round the shap values in the return - message to. Default is 16 which mantains the decimal places + message to. Default is 16 which maintains the decimal places returned by shap explainers Return: diff --git a/pyproject.toml b/pyproject.toml index d453220..7038bb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,9 @@ dev = [ "pytest-mock", "pytest-cov", "pytest-benchmark", + "prek", + "codespell", + "typos", "hypothesis", "mypy", "ruff", diff --git a/requirements-dev.txt b/requirements-dev.txt index 075fda6..c5b79b0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,7 +6,9 @@ cfgv==3.4.0 # via pre-commit cloudpickle==3.1.0 # via shap -colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows' +codespell==2.4.1 + # via model-interpreter (pyproject.toml) +colorama==0.4.6 ; sys_platform == 'win32' # via # pytest # tqdm @@ -63,6 +65,8 @@ pluggy==1.5.0 # via pytest pre-commit==4.0.1 # via model-interpreter (pyproject.toml) +prek==0.3.4 + # via model-interpreter (pyproject.toml) py-cpuinfo==9.0.0 # via pytest-benchmark pytest==8.3.3 @@ -113,6 +117,8 @@ tqdm==4.67.1 # via shap typing-extensions==4.12.2 # via mypy +typos==1.44.0 + # via model-interpreter (pyproject.toml) tzdata==2024.2 # via pandas virtualenv==20.28.0 diff --git a/requirements.txt b/requirements.txt index 1980685..86dbf78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ cloudpickle==3.1.0 # via # -c requirements-dev.txt # shap -colorama==0.4.6 ; platform_system == 'Windows' +colorama==0.4.6 ; sys_platform == 'win32' # via # -c requirements-dev.txt # tqdm @@ -60,7 +60,7 @@ shap==0.46.0 # -c requirements-dev.txt # model-interpreter (pyproject.toml) six==1.16.0 - # via + # via # -c requirements-dev.txt # python-dateutil slicer==0.0.8 diff --git a/tests/test_fit.py b/tests/test_fit.py index 640a31b..d4d6776 100644 --- a/tests/test_fit.py +++ b/tests/test_fit.py @@ -92,7 +92,7 @@ def test_error_classification_is_required_for_kernel( """Test to check if ValueError if is_classification: int is not passed for a non-standard model""" with pytest.raises( ValueError, - match="is_classification input required to fit kernel explainer. True for a classification model, False for a regression model, recieved None", + match="is_classification input required to fit kernel explainer. True for a classification model, False for a regression model, received None", ): MI.fit(DBS_model, X_train=kernel_X_train)