diff --git a/.github/workflows/package-tests.yml b/.github/workflows/package-tests.yml index eaa31d8d5..04cc4b003 100644 --- a/.github/workflows/package-tests.yml +++ b/.github/workflows/package-tests.yml @@ -2,15 +2,9 @@ name: Package tests on: pull_request: - branches: - - main - - master - - dev + branches: [main, master, dev] push: - branches-ignore: - - main - - master - - dev + branches-ignore: [main, master, dev] workflow_call: permissions: @@ -33,26 +27,30 @@ jobs: matrix: ${{ fromJson(needs.package-filter.outputs.matrix) }} runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 with: python-version: '3.12' - - name: Install Poetry - uses: abatilo/actions-poetry@v2 - - name: Install pre-commit - run: | - pip install pre-commit - - name: Run pre-commit hooks and check for changes + + - uses: astral-sh/setup-uv@v4 + + - name: Install and run pre-commit run: | cd "${{ matrix.package_dir }}" + uv sync --all-extras + uv run pre-commit run \ + --from-ref ${{ github.event.pull_request.base.sha || github.event.before }} \ + --to-ref ${{ github.event.pull_request.head.sha || github.sha }} - poetry run pre-commit run --files ./**/** - if [[ $(git status --porcelain) ]] - then - echo "::error::pre-commit hooks failed for ${{ matrix.package_name }}" && exit 1 - fi + - name: Show dirty files (if pre-commit failed) + if: failure() + run: | + cd "${{ matrix.package_dir }}" + git status --porcelain + git diff docker: name: Docker | Build ${{ matrix.package_name }} @@ -63,92 +61,70 @@ jobs: matrix: ${{ fromJson(needs.package-filter.outputs.matrix) }} runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Check if Dockerfile exists - id: check_dockerfile + - uses: actions/checkout@v4 + + - uses: docker/setup-buildx-action@v3 + + - name: Docker | Build Image run: | - if [ -f "${{ matrix.package_dir }}/Dockerfile" ]; then - echo "Dockerfile exists" - echo "dockerfile_exists=true" >> $GITHUB_ENV - else - echo "Dockerfile does not exist" - echo "dockerfile_exists=false" >> $GITHUB_ENV + if [ ! -f "${{ matrix.package_dir }}/Dockerfile" ]; then + echo "No Dockerfile found, skipping" + exit 0 fi - - name: Docker | Tag - id: docker_tag - if: env.dockerfile_exists == 'true' - run: | version=$(cat ${{ matrix.package_dir }}/VERSION) tag=polusai/${{ matrix.package_name }}:${version} - echo "tag will be ${tag}" - echo "tag=${tag}" >> $GITHUB_OUTPUT - - name: Docker | Setup Buildx - uses: docker/setup-buildx-action@v3 - - name: Docker | Check if Image exists - if: env.dockerfile_exists == 'true' - run: | - tag=${{ steps.docker_tag.outputs.tag }} docker pull ${tag} > /dev/null \ - && $(echo "::error::${tag} already exists on DockerHub" && exit 1) \ - || echo "success" - - name: Docker | Build Image - if: env.dockerfile_exists == 'true' - run: | + && (echo "::error::${tag} already exists on DockerHub" && exit 1) \ + || echo "Image does not exist, safe to build" cp .gitignore ${{ matrix.package_dir }}/.dockerignore cd "${{ matrix.package_dir }}" if [ -f "build-docker.sh" ]; then bash build-docker.sh else - docker build . -t ${{ steps.docker_tag.outputs.tag }} + docker build . -t ${tag} fi - bash build-docker.sh - # docker buildx build --platform linux/amd64,linux/arm64 -t ${tag} --push . tests: name: Test | ${{ matrix.package_name }} needs: package-filter + timeout-minutes: 30 if: ${{ needs.package-filter.outputs.num_packages > 0 }} strategy: fail-fast: false matrix: ${{ fromJson(needs.package-filter.outputs.matrix) }} runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v4 + - uses: actions/checkout@v4 with: lfs: true - - name: Set up Python - uses: actions/setup-python@v5 + + - uses: actions/setup-python@v5 with: python-version: '3.12' - - name: Install Conda + + - name: Run tests with conda + if: ${{ hashFiles(format('{0}/environment.yml', matrix.package_dir)) != '' }} uses: conda-incubator/setup-miniconda@v2 + - name: Run tests with conda + if: ${{ hashFiles(format('{0}/environment.yml', matrix.package_dir)) != '' }} + shell: bash -l {0} run: | - package_dir=${{ matrix.package_dir }} - cd $package_dir - if [ -f "environment.yml" ]; then - conda init bash - source ~/.bashrc - conda env create -f environment.yml - conda activate project_env - pip install -e ".[all]" - conda install pytest - python -X faulthandler -m pytest -v -p no:faulthandler - echo "conda_installed=true" >> $GITHUB_ENV - else - echo "conda_installed=false" >> $GITHUB_ENV - fi - - name: Install Poetry - uses: abatilo/actions-poetry@v2 - - name: Run tests with poetry - if: env.conda_installed == 'false' - run: | - poetry config virtualenvs.create false + cd ${{ matrix.package_dir }} + conda env create -f environment.yml + conda activate project_env + pip install -e ".[all]" + conda install pytest -y + python -X faulthandler -m pytest -v -p no:faulthandler - package_dir=${{ matrix.package_dir }} - cd $package_dir + - name: Run tests with uv + if: ${{ hashFiles(format('{0}/environment.yml', matrix.package_dir)) == '' }} + uses: astral-sh/setup-uv@v4 - poetry install - python -X faulthandler -m pytest -v -p no:faulthandler + - name: Run tests with uv + if: ${{ hashFiles(format('{0}/environment.yml', matrix.package_dir)) == '' }} + run: | + cd ${{ matrix.package_dir }} + uv sync --all-extras || uv pip install -e ".[all]" + uv pip install pytest + uv run python -X faulthandler -m pytest -v -p no:faulthandler diff --git a/features/nyxus-tool/.bumpversion.cfg b/features/nyxus-tool/.bumpversion.cfg index b8efaf74a..725c2c8ab 100755 --- a/features/nyxus-tool/.bumpversion.cfg +++ b/features/nyxus-tool/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.8 +current_version = 0.1.8-dev2 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/features/nyxus-tool/CHANGELOG.md b/features/nyxus-tool/CHANGELOG.md index a7df986ab..4da0f027f 100644 --- a/features/nyxus-tool/CHANGELOG.md +++ b/features/nyxus-tool/CHANGELOG.md @@ -1,3 +1,15 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [0.1.8-dev2] - 2026-03-06 +- Replace the nyxus `featurize_file` function with a simple `featurize` function as `single_roi` flag does not respect `ignore_mask_files`. +- Handling of additional tuning parameter for nyxus features. + +## [0.1.8-dev1] - 2024-11-15 +- Fix a bug for no objects in label images. +- Updated bfio base-container image. + ## [0.1.8-dev0] - 2024-09-26 - Renamed nyxus-plugin to nyxus-tool. -- updated nyxus package and bfio base-container image. +- Updated nyxus package and bfio base-container image. diff --git a/features/nyxus-tool/Dockerfile b/features/nyxus-tool/Dockerfile index 6fac7686a..ec6b30836 100755 --- a/features/nyxus-tool/Dockerfile +++ b/features/nyxus-tool/Dockerfile @@ -1,25 +1,33 @@ -FROM polusai/bfio:2.4.3 +FROM polusai/bfio:2.5.0 -# environment variables defined in polusai/bfio ENV EXEC_DIR="/opt/executables" ENV POLUS_IMG_EXT=".ome.tif" ENV POLUS_TAB_EXT=".csv" ENV POLUS_LOG="INFO" +ENV NUM_WORKERS=4 -# Work directory defined in the base container WORKDIR ${EXEC_DIR} -# TODO: Change the tool_dir to the tool directory -ENV TOOL_DIR="features/nyxus-tool" +# Copy the tool +COPY . ${EXEC_DIR}/nyxus-tool -# Copy the repository into the container -RUN mkdir image-tools -COPY . ${EXEC_DIR}/image-tools +# Upgrade pip/setuptools/wheel first +RUN python3 -m pip install --upgrade pip setuptools wheel -# Install the tool -RUN pip3 install "${EXEC_DIR}/image-tools/${TOOL_DIR}" --no-cache-dir +# Install build dependencies (compiler + Python headers) +RUN apt-get update && apt-get install -y \ + build-essential \ + python3.11-dev \ + && rm -rf /var/lib/apt/lists/* -# Set the entrypoint -# TODO: Change the entrypoint to the tool entrypoint +# Install your package (Annoy will build successfully) +RUN pip3 install "${EXEC_DIR}/nyxus-tool" --no-cache-dir + +# Clean up build tools to reduce image size +RUN apt-get purge -y build-essential python3.11-dev \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +# Entrypoint ENTRYPOINT ["python3", "-m", "polus.images.features.nyxus_tool"] CMD ["--help"] diff --git a/features/nyxus-tool/README.md b/features/nyxus-tool/README.md index 19abd6de2..1dcf39a78 100755 --- a/features/nyxus-tool/README.md +++ b/features/nyxus-tool/README.md @@ -1,22 +1,29 @@ -# Nyxus-Plugin(v0.1.8) +# Nyxus-tool(v0.1.8-dev2) -Nyxus plugin uses parallel processing of [Nyxus python package](https://pypi.org/project/nyxus/) to extract nyxus features from intensity-label image data. Especially useful when processing high throughput screens. +Parallelized feature extraction from intensity + label image pairs using the **[Nyxus](https://pypi.org/project/nyxus/)** library. + +Especially useful for high-throughput microscopy screens. Contact [Hamdah Shafqat Abbasi](mailto: hamdah.abbasi@axleinfo.com) for more information. For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). -## Note -Use two separate [filepatterns](https://filepattern.readthedocs.io/en/latest/) for intensity and label images. -For example if you have label images of one channel `c1`\ -`segPattern='p00{z}_x{x+}_y{y+}_wx{t}_wy{p}_c1.ome.tif'`\ -Use filepattern if you require to extract features from intensity images of all other channels\ -`intPattern=p00{z}_x{x+}_y{y+}_wx{t}_wy{p}_c{c}.ome.tif` +## Important notes + +- Use two separate **[filepattern](https://filepattern.readthedocs.io/en/latest/)** for intensity and label images. +- Example naming scheme: + + Intensity (multi-channel): + `intPattern=p00{z}_x{x+}_y{y+}_wx{t}_wy{p}_c{c}.ome.tif` -## Output Format -Computed features outputs can be saved in either of formats `.csv`, `.arrow`, `.parquet` by passing values `pandas`, `arrowipc`, `parquet` to `fileExtension`. By default plugin saves outputs in `.csv` + Segmentation : + `segPattern='p00{z}_x{x+}_y{y+}_wx{t}_wy{p}_c1.ome.tif'` + +- `--singleRoi` mode treats each intensity image as one whole-object ROI (ignores segmentation mask) +- Nyxus parameters (e.g., `neighbor_distance`, `pixels_per_micron`) are passed via repeatable `--kwargs KEY=VALUE` +- Output file extension (format) is controlled via environment variable `POLUS_TAB_EXT` (default: `pandas`; options: `pandas`, `arrowipc`, `parquet`) ## Building @@ -29,20 +36,40 @@ To build the Docker image for the conversion plugin, run If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit. +## Quick run example (Docker) + +See `run-plugin.sh` for a template. + + +```bash +docker run --rm -v /path/to/data:/data \ + -e POLUS_TAB_EXT=pandas \ + polusai/nyxus-tool:0.1.8-dev2 \ + --inpDir /data/intensity \ + --segDir /data/segmentation \ + --intPattern 'p00{z}_x{x+}_y{y+}_wx{t}_wy{p}_c{c}.ome.tif' \ + --segPattern 'p00{z}_x{x+}_y{y+}_wx{t}_wy{p}_c1.ome.tif' \ + --features "BASIC_MORPHOLOGY,ALL_INTENSITY" \ + --kwargs neighbor_distance=5 \ + --kwargs pixels_per_micron=1.0 \ + --singleRoi false \ + --outDir /data/features +``` + + ## Options -This plugin takes nine input arguments and one output argument: +This plugin takes seven input arguments and one output argument: | Name | Description | I/O | Type | |--------------------|--------------------------------------------------------------------|--------|---------------| -| `--inpDir` | Input image directory | Input | collection | -| `--segDir` | Input label image directory | Input | collection | +| `--inpDir` | Intensity images folder | Input | collection | +| `--segDir` | Label / segmentation images folder directory | Input | collection | | `--intPattern` | Filepattern to parse intensity images | Input | string | | `--segPattern` | Filepattern to parse label images | Input | string | -| `--features` | [nyxus features](https://pypi.org/project/nyxus/) | Input | string | -| `--fileExtension` | A desired file format for nyxus features output | Input | enum | -| `--neighborDist` | Distance between two neighbor objects | Input | integer | -| `--pixelPerMicron` | Pixel Size in micrometer | Input | float | -| `--singleRoi` | Treat intensity image as single roi and ignoring segmentation mask | Input | bool | +| `--features` | [Feature groups or individual nyxus features (comma-separated or repeated)](https://pypi.org/project/nyxus/) | Input | string | | +| `--singleRoi` | Treat each intensity image as single ROI (whole-image features, no mask) | Input | bool +| `--kwargs` | Nyxus params as KEY=VALUE (repeatable; e.g., neighbor_distance=5) | Input | list[str] | | `--outDir` | Output collection | Output | collection | -| `--preview` | Generate a JSON file with outputs | Output | JSON | +| `--preview` | Generate a JSON file with outputs | Output | + JSON | diff --git a/features/nyxus-tool/VERSION b/features/nyxus-tool/VERSION index 699c6c6d4..89894cbed 100755 --- a/features/nyxus-tool/VERSION +++ b/features/nyxus-tool/VERSION @@ -1 +1 @@ -0.1.8 +0.1.8-dev2 diff --git a/features/nyxus-tool/ict.yaml b/features/nyxus-tool/ict.yaml index 596ac4cc1..769f02a94 100644 --- a/features/nyxus-tool/ict.yaml +++ b/features/nyxus-tool/ict.yaml @@ -2,7 +2,7 @@ author: - Nick Schaub - Hamdah Shafqat contact: nick.schaub@nih.gov -container: polusai/nyxus-tool:0.1.8 +container: polusai/nyxus-tool:0.1.8-dev2 description: Nyxus plugin allows to make use of parallel pocessing for extracting nyxus features @@ -38,31 +38,19 @@ inputs: name: features required: false type: array - - description: Output file format - format: - - enum - name: fileExtension - required: true - type: string - - description: Pixel distance between neighboring cells - format: - - number - name: neighborDist - required: false - type: number - - description: Pixel size in micrometer - format: - - number - name: pixelPerMicron - required: false - type: number - description: Consider intensity image as single roi and ignoring segmentation mask format: - boolean name: singleRoi required: false type: boolean -name: polusai/Nyxusplugin + - description: Nyxus parameters in KEY=VALUE format (repeatable) + format: + - array + name: kwargs + required: false + type: array +name: polusai/Nyxustool outputs: - description: Output collection format: @@ -72,7 +60,7 @@ outputs: type: path repository: https://github.com/PolusAI/image-tools specVersion: 1.0.0 -title: Nyxus plugin +title: Nyxus tool ui: - description: Input image collection key: inputs.inpDir @@ -94,28 +82,13 @@ ui: key: inputs.features title: features type: text - - description: Ouput file format - fields: - - .arrow - - .feather - - .csv - - default - key: inputs.fileExtension - title: fileExtension - type: select - - default: 5 - description: Pixel distance between neighboring cells - key: inputs.neighborDist - title: neighborDist - type: number - - default: 1.0 - description: Pixel size in micrometer - key: inputs.pixelPerMicron - title: pixelPerMicron - type: number - - default: false - description: Consider intensity image as single roi and ignoring segmentation mask + - description: Consider intensity image as single roi and ignoring segmentation mask key: inputs.singleRoi title: singleRoi type: checkbox -version: 0.1.8 + default: false + - description: Nyxus parameters (repeatable KEY=VALUE, e.g. neighbor_distance=5) + key: inputs.kwargs + title: Nyxus kwargs + type: text +version: 0.1.8-dev2 diff --git a/features/nyxus-tool/nyxusplugin.cwl b/features/nyxus-tool/nyxusplugin.cwl index 2c5be2c98..2f83ed2e4 100644 --- a/features/nyxus-tool/nyxusplugin.cwl +++ b/features/nyxus-tool/nyxusplugin.cwl @@ -5,10 +5,6 @@ inputs: inputBinding: prefix: --features type: string? - fileExtension: - inputBinding: - prefix: --fileExtension - type: string inpDir: inputBinding: prefix: --inpDir @@ -17,18 +13,10 @@ inputs: inputBinding: prefix: --intPattern type: string - neighborDist: - inputBinding: - prefix: --neighborDist - type: double? outDir: inputBinding: prefix: --outDir type: Directory - pixelPerMicron: - inputBinding: - prefix: --pixelPerMicron - type: double? segDir: inputBinding: prefix: --segDir @@ -41,6 +29,13 @@ inputs: inputBinding: prefix: --singleRoi type: boolean? + kwargs: + inputBinding: + prefix: --kwargs + type: + - "null" + - type: array + items: string outputs: outDir: outputBinding: @@ -48,7 +43,7 @@ outputs: type: Directory requirements: DockerRequirement: - dockerPull: polusai/nyxus-tool:0.1.8 + dockerPull: polusai/nyxus-tool:0.1.8-dev2 InitialWorkDirRequirement: listing: - entry: $(inputs.outDir) diff --git a/features/nyxus-tool/plugin.json b/features/nyxus-tool/plugin.json index fd9fdcf8c..e5f487113 100755 --- a/features/nyxus-tool/plugin.json +++ b/features/nyxus-tool/plugin.json @@ -1,9 +1,9 @@ { - "name": "Nyxus plugin", - "version": "0.1.8", - "containerId": "polusai/nyxus-tool:0.1.8", - "title": "Nyxus plugin", - "description": "Nyxus plugin allows to make use of parallel pocessing for extracting nyxus features", + "name": "Nyxus tool", + "version": "0.1.8-dev2", + "containerId": "polusai/nyxus-tool:0.1.8-dev2", + "title": "Nyxus tool", + "description": "Extracts Nyxus features from intensity and label image pairs using parallel processing", "author": "Nick Schaub (nick.schaub@nih.gov), Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov)", "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", "repository": "https://github.com/PolusAI/image-tools", @@ -56,41 +56,23 @@ } } }, - { - "name": "fileExtension", - "type": "enum", - "description": "Output file format", - "default": "default", - "options": { - "values": [ - ".arrow", - ".feather", - ".csv", - "default" - ] - }, - "required": true - }, - { - "name": "neighborDist", - "description": "Pixel distance between neighboring cells", - "type": "number", - "options": null, - "required": false - }, - { - "name": "pixelPerMicron", - "description": "Pixel size in micrometer", - "type": "number", - "options": null, - "required": false - }, { "name": "singleRoi", "description": "Consider intensity image as single roi and ignoring segmentation mask", "type": "boolean", "options": null, "required": false + }, + { + "name": "kwargs", + "type": "array", + "description": "Nyxus parameters as KEY=VALUE pairs", + "required": false, + "options": { + "items": { + "type": "string" + } + } } ], "outputs": [ @@ -129,29 +111,16 @@ "description": "Features or feature groups to be extracted by nyxus plugin", "default": "ALL" }, - { - "key": "inputs.fileExtension", - "title": "fileExtension", - "description": "Ouput file format", - "default": "pandas" - }, - { - "key": "inputs.neighborDist", - "title": "neighborDist", - "description": "Pixel distance between neighboring cells", - "default": 5 - }, - { - "key": "inputs.pixelPerMicron", - "title": "pixelPerMicron", - "description": "Pixel size in micrometer", - "default": 1.0 - }, { "key": "inputs.singleRoi", "title": "singleRoi", "description": "Consider intensity image as single roi and ignoring segmentation mask", "default": false + }, + { + "key": "inputs.kwargs", + "title": "Nyxus Parameters", + "description": "Advanced parameters (repeatable KEY=VALUE)" } ] } diff --git a/features/nyxus-tool/pyproject.toml b/features/nyxus-tool/pyproject.toml index fcd80b1d5..826036a89 100644 --- a/features/nyxus-tool/pyproject.toml +++ b/features/nyxus-tool/pyproject.toml @@ -1,43 +1,60 @@ -[tool.poetry] +[build-system] +requires = [ + "setuptools", + "wheel" +] +build-backend = "setuptools.build_meta" + + +[project] name = "polus-images-features-nyxus-tool" -version = "0.1.8" +version = "0.1.8-dev2" description = "" -authors = [ - "Nick Schaub ", - "Hamdah Shafqat abbasi " - ] readme = "README.md" -packages = [{include = "polus", from = "src"}] - -[tool.poetry.dependencies] -python = ">=3.9,<3.12" -filepattern = "2.0.4" -typer = "^0.7.0" -tqdm = "^4.66.1" -nyxus = "^0.8.2" -vaex = "^4.17.0" -preadator = "0.4.0.dev2" -pytest-sugar = "^0.9.7" -pytest-xdist = "^3.5.0" -pyarrow = ">=16.0,<17.0" -numpy = "<2.0.0" - -[tool.poetry.group.dev.dependencies] -bump2version = "^1.0.1" -pre-commit = "^3.0.4" -black = "^23.1.0" -flake8 = "^6.0.0" -mypy = "^1.0.0" -pytest = "^7.2.1" -ipykernel = "^6.21.2" -requests = "^2.28.2" -scikit-image = "0.24.0" +requires-python = ">=3.11,<3.13" + +authors = [ + { name = "Nick Schaub", email = "nick.schaub@nih.gov" }, + { name = "Hamdah Shafqat Abbasi", email = "hamdahshafqat.abbasi@nih.gov" } +] + +dependencies = [ + "bfio==2.5.0", + "filepattern==2.1.4", + "typer==0.24.1", + "tqdm==4.67.3", + "nyxus==0.11.0", + "vaex==4.19.0", + "numpy>2.0.0" +] + + +[project.optional-dependencies] +dev = [ + "bump2version>=1.0.1", + "pre-commit>=3.0.4", + "black>=23.1.0", + "flake8>=6.0.0", + "mypy>=1.0.0", + "pytest>=7.2.1", + "pytest-sugar==1.1.1", + "ipykernel>=6.21.2", + "requests==2.32.5", + "scikit-image==0.24.0" +] + + +[tool.setuptools] +package-dir = {"" = "src"} +include-package-data = true + + +[tool.setuptools.packages.find] +where = ["src"] +namespaces = false -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" [tool.pytest.ini_options] pythonpath = [ "." -] +] \ No newline at end of file diff --git a/features/nyxus-tool/run-plugin.sh b/features/nyxus-tool/run-plugin.sh index ce26a3024..01864879d 100755 --- a/features/nyxus-tool/run-plugin.sh +++ b/features/nyxus-tool/run-plugin.sh @@ -3,32 +3,49 @@ version=$( None: + """Configure logging based on environment variable.""" + log_level = os.getenv("POLUS_LOG", "INFO").upper() + level = getattr(logging, log_level, logging.INFO) + + logging.basicConfig( + level=level, + format="%(asctime)s - %(name)-30s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + force=True, + ) + + +# Suppress all warnings +warnings.filterwarnings("ignore") + + # Initialize the logger -logging.basicConfig( - format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", - datefmt="%d-%b-%y %H:%M:%S", -) +configure_logging() logger = logging.getLogger("polus.images.features.nyxus_tool") @app.command() -def main( # noqa: PLR0913 +def main( # noqa: C901, PLR0913 inp_dir: Path = typer.Option( ..., "--inpDir", - help="Input image data collection to be processed by this plugin", + help="Input image directory", ), seg_dir: Path = typer.Option( ..., @@ -54,125 +70,141 @@ def main( # noqa: PLR0913 "--segPattern", help="Pattern use to parse segmentation image filenames", ), - features: Optional[list[str]] = typer.Option( + features: list[str] = typer.Option( ["ALL"], "--features", help="Nyxus features to be extracted", + callback=validate_features, ), - file_extension: Extension = typer.Option( - Extension.DEFAULT, - "--fileExtension", - help="File format of an output file.", - ), - neighbor_dist: Optional[int] = typer.Option( - 5, - "--neighborDist", - help="Number of Pixels between Neighboring cells", - ), - pixel_per_micron: Optional[float] = typer.Option( - 1.0, - "--pixelPerMicron", - help="Number of pixels per micrometer", - ), - single_roi: Optional[bool] = typer.Option( + single_roi: bool = typer.Option( False, "--singleRoi", help="Consider intensity image as single roi and ignoring segmentation mask", ), + kwargs: list[str] + | None = typer.Option( + None, + "--kwargs", + help="Nyxus KEY=VALUE params", + ), out_dir: Path = typer.Option( ..., "--outDir", help="Output directory", ), - preview: Optional[bool] = typer.Option( + preview: bool = typer.Option( False, "--preview", help="Output a JSON preview of files", ), ) -> None: """Scaled Nyxus plugin allows to extract features from labelled images.""" - logger.info(f"inpDir = {inp_dir}") - logger.info(f"segDir = {seg_dir}") - logger.info(f"outDir = {out_dir}") - logger.info(f"intPattern = {int_pattern}") - logger.info(f"segPattern = {seg_pattern}") - logger.info(f"features = {features}") - logger.info(f"fileExtension = {file_extension}") - logger.info(f"neighborDist = {neighbor_dist}") - logger.info(f"pixelPerMicron = {pixel_per_micron}") - logger.info(f"singleRoi = {single_roi}") + validate_paths(inp_dir, seg_dir, out_dir) + + kwarg_dict: dict[str, object] = {} + user_kwargs: dict[str, str] = {} + + if kwargs: + for kv in kwargs: + key, value = NyxusKwargType()(kv) + kwarg_dict[key] = value + user_kwargs[key] = kv.split("=", 1)[1] + + config = NyxusConfig( + inp_dir=inp_dir.resolve(), + seg_dir=seg_dir.resolve(), + out_dir=out_dir.resolve(), + features=features, + single_roi=single_roi, + kwargs=user_kwargs, + ) + + logger.info( + "Configuration: %s", + json.dumps({**config.__dict__, "kwargs": user_kwargs}, indent=1, default=str), + ) + int_images = fp.FilePattern(inp_dir, int_pattern) + seg_images = fp.FilePattern(seg_dir, seg_pattern) - inp_dir = inp_dir.resolve() - out_dir = out_dir.resolve() + if len(int_images) == 0: + msg = f"No intensity images found in {inp_dir} with pattern {int_pattern}" + raise ValueError( + msg, + ) - assert inp_dir.exists(), f"{inp_dir} does not exist!! Please check input path again" - assert seg_dir.exists(), f"{seg_dir} does not exist!! Please check input path again" - assert ( - out_dir.exists() - ), f"{out_dir} does not exist!! Please check output path again" + if not single_roi and len(seg_images) == 0: + msg = f"No segmentation images found in {seg_dir}" + raise ValueError(msg) - features = [re.split(",", f) for f in features][0] # type: ignore + tab_ext = POLUS_TAB_EXT + if preview: + if tab_ext == "pandas": + tab_ext = "csv" - assert all( - f in FEATURE_GROUP.union(FEATURE_LIST) for f in features # type: ignore - ), "One or more feature selections were invalid" + write_preview(int_images, config.out_dir, tab_ext, int_pattern) + return - # Adding * to the start and end of nyxus group features - features = [(f"*{f}*") if f in FEATURE_GROUP else f for f in features] + with ProcessPoolExecutor(max_workers=NUM_WORKERS) as executor: + futures = [] - num_workers = max([cpu_count(), 2]) + # Whole image features + if single_roi: + logger.info("Running Nyxus in single ROI mode") - int_images = fp.FilePattern(inp_dir, int_pattern) - seg_images = fp.FilePattern(seg_dir, seg_pattern) - - if preview: - with Path.open(Path(out_dir, "preview.json"), "w") as jfile: - out_json: dict[str, Any] = { - "filepattern": int_pattern, - "outDir": [], - } - for file in int_images(): - out_name = file[1][0].name.replace( - "".join(file[1][0].suffixes), - f"{file_extension}", - ) - out_json["outDir"].append(out_name) - json.dump(out_json, jfile, indent=2) - - for s_image in seg_images(): - i_image = int_images.get_matching(**dict(s_image[0].items())) - - with preadator.ProcessManager( - name="compute nyxus feature", - num_processes=num_workers, - threads_per_process=2, - ) as pm: - threads = [] - for fl in i_image: - file = fl[1] - logger.debug(f"Compute nyxus feature {file}") - thread = pm.submit_process( - nyxus_func, + for int_img in int_images(): + file = int_img[1][0] + fut = executor.submit( + run_nyxus_whole_image_features, file, - s_image[1], out_dir, features, - file_extension, - pixel_per_micron, - neighbor_dist, + tab_ext, + kwargs=kwarg_dict, ) - threads.append(thread) - pm.join_processes() - for f in tqdm( - as_completed(threads), - total=len(threads), - mininterval=5, - desc=f"converting images to {file_extension}", - initial=0, - unit_scale=True, - colour="cyan", - ): - f.result() + futures.append(fut) + + # Image object Features + else: + logger.info("Running Nyxus with segmentation masks") + + for s_image in seg_images(): + seg_path = s_image[1][0] + + with BioReader(seg_path) as br: + seg_image = br.read() + + if len(np.unique(seg_image)) == 1: + logger.debug("Skipping empty segmentation %s", seg_path) + continue + + i_images = int_images.get_matching(**dict(s_image[0].items())) + + for fl in i_images: + file = fl[1] + + logger.info("Submitting Nyxus job for %s", file) + + fut = executor.submit( + run_nyxus_object_features, + file, + s_image[1], + out_dir, + features, + tab_ext, + kwargs=kwarg_dict, + ) + + futures.append(fut) + + for f in tqdm( + as_completed(futures), + total=len(futures), + mininterval=5, + desc=f"Computing features ({tab_ext})", + unit_scale=True, + colour="cyan", + ): + f.result() if __name__ == "__main__": diff --git a/features/nyxus-tool/src/polus/images/features/nyxus_tool/nyxus_func.py b/features/nyxus-tool/src/polus/images/features/nyxus_tool/nyxus_func.py index a63bc247c..c49d02ace 100644 --- a/features/nyxus-tool/src/polus/images/features/nyxus_tool/nyxus_func.py +++ b/features/nyxus-tool/src/polus/images/features/nyxus_tool/nyxus_func.py @@ -2,11 +2,15 @@ import logging import pathlib from typing import Any -from typing import Optional -from typing import Union +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq import vaex +from bfio import BioReader from nyxus import Nyxus +from pyarrow import ipc from .utils import Extension @@ -14,16 +18,91 @@ chunk_size = 100_000 +_EXT_MAP: dict[str, str] = { + "arrowipc": ".arrow", + "parquet": ".parquet", + "pandas": ".csv", +} -def nyxus_func( # noqa: PLR0913 - int_file: Union[list[pathlib.Path], Any], - seg_file: Union[list[pathlib.Path], Any], + +def _resolve_file_ext(file_extension: Extension) -> tuple[str, str]: + """Resolve a file extension enum/string to (file_ext, suffix). + + Args: + file_extension: Extension enum value or plain string. + + Returns: + A tuple of (file_ext, suffix), e.g. ("arrowipc", ".arrow"). + + Raises: + ValueError: If the extension is not supported. + """ + if hasattr(file_extension, "value"): + file_ext = str(file_extension.value).lower().strip() + else: + file_ext = str(file_extension).lower().strip() + + try: + suffix = _EXT_MAP[file_ext] + except KeyError as err: + msg = f"Invalid extension '{file_extension}'. Options: {list(_EXT_MAP)}" + raise ValueError( + msg, + ) from err + + return file_ext, suffix + + +def _write_features( + feats: pd.DataFrame | str, + file_ext: str, + output_path: pathlib.Path, +) -> None: + """Write a features DataFrame to disk in the requested format.""" + if isinstance(feats, str): + feats_path = pathlib.Path(feats) + + if feats_path.suffix == ".csv": + feats = pd.read_csv(feats_path) + + elif feats_path.suffix in [".arrow", ".feather"]: + # Use normal open() instead of memory_map + with feats_path.open("rb") as f: + feats = ipc.open_file(f).read_all().to_pandas() + + elif feats_path.suffix == ".parquet": + feats = pq.read_table(str(feats_path)).to_pandas() + + else: + msg = f"Unsupported Nyxus output format: {feats_path}" + raise ValueError(msg) + + # ---- Write output ---- + if file_ext == "pandas": + vf = vaex.from_pandas(feats) + vf.export_csv(path=str(output_path), chunk_size=100_000) + + else: + table = pa.Table.from_pandas(feats) + + if file_ext == "arrowipc": + with pa.OSFile(str(output_path), "wb") as sink, ipc.new_file( + sink, + table.schema, + ) as writer: + writer.write(table) + + elif file_ext == "parquet": + pq.write_table(table, str(output_path)) + + +def run_nyxus_object_features( # noqa: PLR0913 + int_file: list[pathlib.Path] | Any, + seg_file: list[pathlib.Path] | Any, out_dir: pathlib.Path, features: list[str], file_extension: Extension, - pixels_per_micron: Optional[float] = 1.0, - neighbor_dist: Optional[int] = 5, - single_roi: Optional[bool] = False, + kwargs: dict[str, Any] | None = None, ) -> None: """Scalable Extraction of Nyxus Features. @@ -32,43 +111,66 @@ def nyxus_func( # noqa: PLR0913 seg_file : Path to label image. out_dir : Path to output directory. features : List of features to compute. - file_extension: Output file extension. - pixels_per_micron : Number of pixels for every micrometer. - neighbor_dist : Pixel distance between neighbor objects. Defaults to 5. - single_roi : 'True' to treat intensity image as single roi and vice versa. + file_extension: Output file format (Extension enum or string). + kwargs: Additional parameters passed to Nyxus.set_params(). """ if isinstance(int_file, pathlib.Path): int_file = [int_file] + if isinstance(seg_file, pathlib.Path): + seg_file = [seg_file] + nyx = Nyxus(features) - nyx_params = { - "neighbor_distance": neighbor_dist, - "pixels_per_micron": pixels_per_micron, - "n_feature_calc_threads": 4, - } + if kwargs is None: + kwargs = {} - nyx.set_params(**nyx_params) + nyx.set_params(**kwargs) - if f"{file_extension}" == "arrowipc": - ext = ".arrow" - elif f"{file_extension}" == "parquet": - ext = ".parquet" - elif f"{file_extension}" == "pandas": - ext = ".csv" + file_ext, suffix = _resolve_file_ext(file_extension) for i_file in int_file: - out_name = i_file.name.replace("".join(i_file.suffixes), f"{ext}") - output_path = str(pathlib.Path(out_dir, out_name)) + out_name = i_file.name.replace("".join(i_file.suffixes), suffix) + output_path = pathlib.Path(out_dir, out_name) feats = nyx.featurize_files( intensity_files=[str(i_file)], mask_files=[str(seg_file[0])], - single_roi=single_roi, - output_type=f"{file_extension}", - output_path=output_path, + single_roi=False, + output_type=file_ext, + output_path=str(output_path), ) - if f"{file_extension}" == "pandas": - vf = vaex.from_pandas(feats) - vf.export_csv(path=output_path, chunk_size=chunk_size) + _write_features(feats, file_ext, output_path) + + +def run_nyxus_whole_image_features( + int_file: pathlib.Path, + out_dir: pathlib.Path, + features: list[str], + file_extension: Extension, + kwargs: dict[str, Any] | None = None, +) -> None: + """Extract Nyxus features for full intensity images.""" + nyx = Nyxus(features) + + if kwargs is None: + kwargs = {} + + nyx.set_params(**kwargs) + + file_ext, suffix = _resolve_file_ext(file_extension) + + logger.info("Running Nyxus whole-image feature extraction") + + with BioReader(int_file) as br: + image = br.read() + + mask = np.ones(image.shape[:2], dtype=np.uint8) + + feats = nyx.featurize(image, mask) + + out_name = int_file.name.replace("".join(int_file.suffixes), suffix) + output_path = pathlib.Path(out_dir, out_name) + + _write_features(feats, file_ext, output_path) diff --git a/features/nyxus-tool/src/polus/images/features/nyxus_tool/utils.py b/features/nyxus-tool/src/polus/images/features/nyxus_tool/utils.py index eb24cf79a..ba049c7bd 100644 --- a/features/nyxus-tool/src/polus/images/features/nyxus_tool/utils.py +++ b/features/nyxus-tool/src/polus/images/features/nyxus_tool/utils.py @@ -1,8 +1,23 @@ """Nyxus Plugin.""" import enum +import json +import logging import os +from dataclasses import dataclass +from dataclasses import field +from multiprocessing import cpu_count +from pathlib import Path +from typing import Any +from typing import Callable +from typing import cast -POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", "pandas") +import filepattern as fp +import typer + +logger = logging.getLogger(__name__) + +POLUS_TAB_EXT: str = os.environ.get("POLUS_TAB_EXT", "pandas") +NUM_WORKERS: int = int(os.environ.get("NUM_WORKERS", max(cpu_count() - 1, 1))) class Extension(str, enum.Enum): @@ -14,34 +29,234 @@ class Extension(str, enum.Enum): DEFAULT = POLUS_TAB_EXT +@dataclass +class NyxusConfig: + """Configuration object for Nyxus processing.""" + + inp_dir: Path + seg_dir: Path + out_dir: Path + features: list[str] + single_roi: bool = False + kwargs: dict[str, Any] = field(default_factory=dict) + num_workers: int = NUM_WORKERS + + +def validate_paths(*paths: Path) -> None: + """Validate that the provided paths exist. + + Args: + *paths: Paths to validate. + + Raises: + typer.BadParameter: If a path does not exist. + """ + for p in paths: + if not p.exists(): + msg = f"{p} does not exist" + raise typer.BadParameter(msg) + + +def validate_features(features: list[str]) -> list[str]: + """Validate the requested features against the supported feature sets. + + Args: + features: List of requested features or groups. + + Returns: + List of valid features with groups marked with asterisks. + + Raises: + typer.BadParameter: If an invalid feature is requested. + """ + valid_features = FEATURE_GROUP.union(FEATURE_LIST) + flat_features: list[str] = [f for feat in features for f in feat.split(",")] + invalid: list[str] = [f for f in flat_features if f not in valid_features] + if invalid: + msg = f"Invalid features: {', '.join(invalid)}" + raise typer.BadParameter(msg) + return [f"*{f}*" if f in FEATURE_GROUP else f for f in flat_features] + + +def write_preview( + int_images: fp.FilePattern, + out_dir: Path, + file_extension: str, + pattern: str, +) -> None: + """Write a preview JSON file listing input/output images. + + Args: + int_images: FilePattern object for intensity images. + out_dir: Output directory path. + file_extension: Output file extension. + pattern: File pattern used to identify images. + """ + preview_path = out_dir / "preview.json" + out_json: dict[str, Any] = {"filepattern": pattern, "outDir": []} + for file in int_images(): + out_name = file[1][0].name.replace( + "".join(file[1][0].suffixes), + f".{file_extension}", + ) + out_json["outDir"].append(out_name) + with preview_path.open("w", encoding="utf-8") as jfile: + json.dump(out_json, jfile, indent=2) + + +class NyxusParamError(Exception): + """Raised when a Nyxus parameter is invalid.""" + + pass + + +NYXUS_PARAMS = { + "neighbor_distance": {"default": 5, "type": int, "min": 1}, + "pixels_per_micron": {"default": 1.0, "type": float, "min": 1e-9}, + "coarse_gray_depth": {"default": 64, "type": int, "min": 1}, + "n_feature_calc_threads": {"default": 4, "type": int, "min": 1}, + "use_gpu_device": {"default": -1, "type": int, "min": -1}, + "ibsi": {"default": False, "type": bool}, + "gabor_kersize": {"default": 16, "type": int, "min": 1}, + "gabor_gamma": {"default": 0.1, "type": float, "min": 0.0}, + "gabor_sig2lam": {"default": 0.8, "type": float, "min": 0.0}, + "gabor_f0": {"default": 0.1, "type": float, "min": 0.0}, + "gabor_thold": {"default": 0.025, "type": float, "min": 0.0}, + "gabor_thetas": {"default": [0, 45, 90, 135], "type": list}, + "gabor_freqs": {"default": [4, 16, 32, 64], "type": list}, + "dynamic_range": {"default": 10000, "type": int, "min": 1}, + "min_intensity": {"default": 0.0, "type": float}, + "max_intensity": {"default": 1.0, "type": float}, + "ram_limit": {"default": -1, "type": int}, + "verbose": {"default": 0, "type": int, "min": 0}, + "anisotropy_x": {"default": 1.0, "type": float, "min": 1e-9}, + "anisotropy_y": {"default": 1.0, "type": float, "min": 1e-9}, +} + +VALID_NYXUS_KWARGS = set(NYXUS_PARAMS.keys()) + + +class NyxusKwargType: + """Parses a CLI KEY=VALUE string into a (key, value) tuple.""" + + name = "KEY=VALUE" + + def __call__( + self, + value: str, + _param: object = None, + _ctx: object = None, + ) -> tuple[str, int | float | bool | str]: + """Validate KEY=VALUE argument and cast to int, float, bool, or str.""" + if isinstance(value, tuple): + return value + + if "=" not in value: + msg = f"'{value}' is not a valid KEY=VALUE pair" + raise typer.BadParameter(msg) + + key, _, raw = value.partition("=") + key = key.strip() + raw = raw.strip() + + if key not in VALID_NYXUS_KWARGS: + msg = f"'{key}' is not a valid Nyxus parameter" + raise typer.BadParameter(msg) + + # Try int or float + for _cast in (int, float): + try: + return key, _cast(raw) + except ValueError: + continue + + # Boolean + if raw.lower() in ("true", "false"): + return key, raw.lower() == "true" + + # fallback string + return key, raw + + +# parse kwargs +def parse_nyxus_kwargs(kwargs: dict) -> dict: + """Validate and set defaults for Nyxus parameters.""" + parsed = {} + + # Warn about unexpected keys + unexpected_keys = set(kwargs) - set(NYXUS_PARAMS) + if unexpected_keys: + logger.info( + f"Warning: unexpected keyword argument(s): {', '.join(unexpected_keys)}", + ) + + # Loop through each parameter + for key, rules in NYXUS_PARAMS.items(): + value = kwargs.get(key, rules["default"]) + + # Convert type + if "type" in rules and rules["type"] is not None: + typ = cast(Callable[[Any], Any], rules["type"]) + if typ is not None: + try: + value = typ(value) + except (ValueError, TypeError) as err: + msg = f"Parameter '{key}' must be of type {typ.__name__}" + raise NyxusParamError(msg) from err + + # Check minimum + if "min" in rules and value < rules["min"]: + msg = f"Parameter '{key}' must be >= {rules['min']}" + raise NyxusParamError(msg) + + parsed[key] = value + + # Optional constant + parsed["anisotropy_z"] = 1.0 + + return parsed + + FEATURE_GROUP = { "ALL_INTENSITY", "ALL_MORPHOLOGY", "BASIC_MORPHOLOGY", "ALL_GLCM", + "ALL_GLRM", "ALL_GLRLM", "ALL_GLSZM", "ALL_GLDM", "ALL_NGTDM", - "ALL_EASY", + "ALL_BUT_GABOR", + "ALL_BUT_GLCM", "ALL", } + FEATURE_LIST = { + # Intensity "INTEGRATED_INTENSITY", "MEAN", - "MAX", "MEDIAN", + "MIN", + "MAX", + "RANGE", + "COVERED_IMAGE_INTENSITY_RANGE", "STANDARD_DEVIATION", - "MODE", + "STANDARD_DEVIATION_BIASED", + "COV", + "STANDARD_ERROR", "SKEWNESS", "KURTOSIS", + "EXCESS_KURTOSIS", "HYPERSKEWNESS", "HYPERFLATNESS", "MEAN_ABSOLUTE_DEVIATION", + "MEDIAN_ABSOLUTE_DEVIATION", "ENERGY", "ROOT_MEAN_SQUARED", "ENTROPY", + "MODE", "UNIFORMITY", "UNIFORMITY_PIU", "P01", @@ -50,54 +265,371 @@ class Extension(str, enum.Enum): "P75", "P90", "P99", + "QCOD", "INTERQUARTILE_RANGE", "ROBUST_MEAN_ABSOLUTE_DEVIATION", "MASS_DISPLACEMENT", + # Morphology "AREA_PIXELS_COUNT", + "AREA_UM2", + "CENTROID_X", + "CENTROID_Y", "COMPACTNESS", "BBOX_YMIN", "BBOX_XMIN", "BBOX_HEIGHT", "BBOX_WIDTH", + "MAJOR_AXIS_LENGTH", "MINOR_AXIS_LENGTH", - "MAGOR_AXIS_LENGTH", "ECCENTRICITY", "ORIENTATION", "ROUNDNESS", - "NUM_NEIGHBORS", - "PERCENT_TOUCHING", "EXTENT", + "ASPECT_RATIO", "CONVEX_HULL_AREA", "SOLIDITY", "PERIMETER", "EQUIVALENT_DIAMETER", - "EDGE_MEAN", - "EDGE_MAX", - "EDGE_MIN", + "EDGE_MEAN_INTENSITY", "EDGE_STDDEV_INTENSITY", + "EDGE_MAX_INTENSITY", + "EDGE_MIN_INTENSITY", "CIRCULARITY", "EROSIONS_2_VANISH", "EROSIONS_2_VANISH_COMPLEMENT", "FRACT_DIM_BOXCOUNT", "FRACT_DIM_PERIMETER", - "GLCM", - "GLRLM", - "GLSZM", - "GLDM", - "NGTDM", + "WEIGHTED_CENTROID_X", + "WEIGHTED_CENTROID_Y", + "MIN_FERET_DIAMETER", + "MAX_FERET_DIAMETER", + "MIN_FERET_ANGLE", + "MAX_FERET_ANGLE", + "STAT_FERET_DIAM_MIN", + "STAT_FERET_DIAM_MAX", + "STAT_FERET_DIAM_MEAN", + "STAT_FERET_DIAM_MEDIAN", + "STAT_FERET_DIAM_STDDEV", + "STAT_FERET_DIAM_MODE", + "STAT_MARTIN_DIAM_MIN", + "STAT_MARTIN_DIAM_MAX", + "STAT_MARTIN_DIAM_MEAN", + "STAT_MARTIN_DIAM_MEDIAN", + "STAT_MARTIN_DIAM_STDDEV", + "STAT_MARTIN_DIAM_MODE", + "STAT_NASSENSTEIN_DIAM_MIN", + "STAT_NASSENSTEIN_DIAM_MAX", + "STAT_NASSENSTEIN_DIAM_MEAN", + "STAT_NASSENSTEIN_DIAM_MEDIAN", + "STAT_NASSENSTEIN_DIAM_STDDEV", + "STAT_NASSENSTEIN_DIAM_MODE", + "MAXCHORDS_MAX", + "MAXCHORDS_MAX_ANG", + "MAXCHORDS_MIN", + "MAXCHORDS_MIN_ANG", + "MAXCHORDS_MEDIAN", + "MAXCHORDS_MEAN", + "MAXCHORDS_MODE", + "MAXCHORDS_STDDEV", + "ALLCHORDS_MAX", + "ALLCHORDS_MAX_ANG", + "ALLCHORDS_MIN", + "ALLCHORDS_MIN_ANG", + "ALLCHORDS_MEDIAN", + "ALLCHORDS_MEAN", + "ALLCHORDS_MODE", + "ALLCHORDS_STDDEV", + "EULER_NUMBER", + "EXTREMA_P1_X" + "EXTREMA_P1_Y" + "EXTREMA_P2_X" + "EXTREMA_P2_Y" + "EXTREMA_P3_X" + "EXTREMA_P3_Y" + "EXTREMA_P4_X" + "EXTREMA_P4_Y" + "EXTREMA_P5_X" + "EXTREMA_P5_Y" + "EXTREMA_P6_X" + "EXTREMA_P6_Y" + "EXTREMA_P7_X" + "EXTREMA_P7_Y" + "EXTREMA_P8_X" + "EXTREMA_P8_Y" + "POLYGONALITY_AVE", + "HEXAGONALITY_AVE", + "HEXAGONALITY_STDDEV", + "DIAMETER_MIN_ENCLOSING_CIRCLE", + "DIAMETER_CIRCUMSCRIBING_CIRCLE", + "DIAMETER_INSCRIBING_CIRCLE", + "GEODETIC_LENGTH", + "THICKNESS", + "ROI_RADIUS_MEAN", + "ROI_RADIUS_MAX", + "ROI_RADIUS_MEDIAN", + # GLCM texture + "GLCM_ASM", + "GLCM_ACOR", + "GLCM_CLUPROM", + "GLCM_CLUSHADE", + "GLCM_CLUTEND", + "GLCM_CONTRAST", + "GLCM_CORRELATION", + "GLCM_DIFAVE", + "GLCM_DIFENTRO", + "GLCM_DIFVAR", + "GLCM_DIS", + "GLCM_ENERGY", + "GLCM_ENTROPY", + "GLCM_HOM1", + "GLCM_HOM2", + "GLCM_ID", + "GLCM_IDN", + "GLCM_IDM", + "GLCM_IDMN", + "GLCM_INFOMEAS1", + "GLCM_INFOMEAS2", + "GLCM_IV", + "GLCM_JAVE", + "GLCM_JE", + "GLCM_JMAX", + "GLCM_JVAR", + "GLCM_SUMAVERAGE", + "GLCM_SUMENTROPY", + "GLCM_SUMVARIANCE", + "GLCM_VARIANCE", + # GLRLM texture + "GLRLM_SRE", + "GLRLM_LRE", + "GLRLM_GLN", + "GLRLM_GLNN", + "GLRLM_RLN", + "GLRLM_RLNN", + "GLRLM_RP", + "GLRLM_GLV", + "GLRLM_RV", + "GLRLM_RE", + "GLRLM_LGLRE", + "GLRLM_HGLRE", + "GLRLM_SRLGLE", + "GLRLM_SRHGLE", + "GLRLM_LRLGLE", + "GLRLM_LRHGLE", + # GLDZM texture + "GLDZM_SDE", + "GLDZM_LDE", + "GLDZM_LGLE", + "GLDZM_HGLE", + "GLDZM_SDLGLE", + "GLDZM_SDHGLE", + "GLDZM_LDLGLE", + "GLDZM_LDHGLE", + "GLDZM_GLNU", + "GLDZM_GLNUN", + "GLDZM_ZDNU", + "GLDZM_ZDNUN", + "GLDZM_ZP", + "GLDZM_GLM", + "GLDZM_GLV", + "GLDZM_ZDM", + "GLDZM_ZDV", + "GLDZM_ZDE", + # GLSZM texture + "GLSZM_SAE", + "GLSZM_LAE", + "GLSZM_GLN", + "GLSZM_GLNN", + "GLSZM_SZN", + "GLSZM_SZNN", + "GLSZM_ZP", + "GLSZM_GLV", + "GLSZM_ZV", + "GLSZM_ZE", + "GLSZM_LGLZE", + "GLSZM_HGLZE", + "GLSZM_SALGLE", + "GLSZM_SAHGLE", + "GLSZM_LALGLE", + "GLSZM_LAHGLE", + "GLDM_SDE", + "GLDM_LDE", + "GLDM_GLN", + "GLDM_DN", + "GLDM_DNN", + "GLDM_GLV", + "GLDM_DV", + "GLDM_DE", + "GLDM_LGLE", + "GLDM_HGLE", + "GLDM_SDLGLE", + "GLDM_SDHGLE", + "GLDM_LDLGLE", + "GLDM_LDHGLE", + "NGLDM_LDE", + "NGLDM_HDE", + "NGLDM_LGLCE", + "NGLDM_HGLCE", + "NGLDM_LDLGLE", + "NGLDM_LDHGLE", + "NGLDM_HDLGLE", + "NGLDM_HDHGLE", + "NGLDM_GLNU", + "NGLDM_GLNUN", + "NGLDM_DCNU", + "NGLDM_DCNUN", + "NGLDM_GLM", + "NGLDM_GLV", + "NGLDM_DCM", + "NGLDM_DCV", + "NGLDM_DCE", + "NGLDM_DCENE", + "NGTDM_COARSENESS", + "NGTDM_CONTRAST", + "NGTDM_BUSYNESS", + "NGTDM_COMPLEXITY", + "NGTDM_STRENGTH", + # Radial / frequency "ZERNIKE2D", "FRAC_AT_D", - "RADIAL_CV", "MEAN_FRAC", + "RADIAL_CV", "GABOR", - "ALL_INTENSITY", - "ALL_MORPHOLOGY", - "BASIC_MORPHOLOGY", - "ALL_GLCM", - "ALL_GLRLM", - "ALL_GLSZM", - "ALL_GLDM", - "ALL_NGTDM", - "ALL_EASY", - "ALL", + # Image moments (example expansion up to order 3) + "SPAT_MOMENT_00", + "SPAT_MOMENT_01", + "SPAT_MOMENT_02", + "SPAT_MOMENT_03", + "SPAT_MOMENT_10", + "SPAT_MOMENT_11", + "SPAT_MOMENT_12", + "SPAT_MOMENT_20", + "SPAT_MOMENT_21", + "SPAT_MOMENT_30", + "WEIGHTED_SPAT_MOMENT_00", + "WEIGHTED_SPAT_MOMENT_01", + "WEIGHTED_SPAT_MOMENT_02", + "WEIGHTED_SPAT_MOMENT_03", + "WEIGHTED_SPAT_MOMENT_10", + "WEIGHTED_SPAT_MOMENT_11", + "WEIGHTED_SPAT_MOMENT_20", + "WEIGHTED_SPAT_MOMENT_21", + "WEIGHTED_SPAT_MOMENT_30", + "CENTRAL_MOMENT_00", + "CENTRAL_MOMENT_01", + "CENTRAL_MOMENT_02", + "CENTRAL_MOMENT_03", + "CENTRAL_MOMENT_10", + "CENTRAL_MOMENT_11", + "CENTRAL_MOMENT_12", + "CENTRAL_MOMENT_20", + "CENTRAL_MOMENT_21", + "CENTRAL_MOMENT_30", + "WEIGHTED_CENTRAL_MOMENT_02", + "WEIGHTED_CENTRAL_MOMENT_03", + "WEIGHTED_CENTRAL_MOMENT_11", + "WEIGHTED_CENTRAL_MOMENT_12", + "WEIGHTED_CENTRAL_MOMENT_20", + "WEIGHTED_CENTRAL_MOMENT_21", + "WEIGHTED_CENTRAL_MOMENT_30", + "NORM_CENTRAL_MOMENT_02", + "NORM_CENTRAL_MOMENT_03", + "NORM_CENTRAL_MOMENT_11", + "NORM_CENTRAL_MOMENT_12", + "NORM_CENTRAL_MOMENT_20", + "NORM_CENTRAL_MOMENT_21", + "NORM_CENTRAL_MOMENT_30", + "NORM_SPAT_MOMENT_00", + "NORM_SPAT_MOMENT_01", + "NORM_SPAT_MOMENT_02", + "NORM_SPAT_MOMENT_03", + "NORM_SPAT_MOMENT_10", + "NORM_SPAT_MOMENT_20", + "NORM_SPAT_MOMENT_30", + # Hu moments + "HU_M1", + "HU_M2", + "HU_M3", + "HU_M4", + "HU_M5", + "HU_M6", + "HU_M7", + "WEIGHTED_HU_M1", + "WEIGHTED_HU_M2", + "WEIGHTED_HU_M3", + "WEIGHTED_HU_M4", + "WEIGHTED_HU_M5", + "WEIGHTED_HU_M6", + "WEIGHTED_HU_M7", + # IMOM features + "IMOM_RM_00", + "IMOM_RM_01", + "IMOM_RM_02", + "IMOM_RM_03", + "IMOM_RM_10", + "IMOM_RM_11", + "IMOM_RM_12", + "IMOM_RM_20", + "IMOM_RM_21", + "IMOM_RM_30", + "IMOM_WRM_00", + "IMOM_WRM_01", + "IMOM_WRM_02", + "IMOM_WRM_03", + "IMOM_WRM_10", + "IMOM_WRM_11", + "IMOM_WRM_12", + "IMOM_WRM_20", + "IMOM_WRM_21", + "IMOM_WRM_30", + "IMOM_CM_02", + "IMOM_CM_03", + "IMOM_CM_11", + "IMOM_CM_12", + "IMOM_CM_20", + "IMOM_CM_21", + "IMOM_CM_30", + "IMOM_WCM_02", + "IMOM_WCM_03", + "IMOM_WCM_11", + "IMOM_WCM_12", + "IMOM_WCM_20", + "IMOM_WCM_21", + "IMOM_WCM_30", + "IMOM_NCM_02", + "IMOM_NCM_03", + "IMOM_NCM_11", + "IMOM_NCM_20", + "IMOM_NCM_21", + "IMOM_NCM_30", + "IMOM_NRM_00", + "IMOM_NRM_01", + "IMOM_NRM_02", + "IMOM_NRM_03", + "IMOM_NRM_10", + "IMOM_NRM_20", + "IMOM_NRM_30", + "IMOM_HU1", + "IMOM_HU2", + "IMOM_HU3", + "IMOM_HU4", + "IMOM_HU5", + "IMOM_HU6", + "IMOM_HU7", + "IMOM_WHU1", + "IMOM_WHU2", + "IMOM_WHU3", + "IMOM_WHU4", + "IMOM_WHU5", + "IMOM_WHU6", + "IMOM_WHU7", + # Neighbor + "NUM_NEIGHBORS", + "PERCENT_TOUCHING", + "CLOSEST_NEIGHBOR1_DIST", + "CLOSEST_NEIGHBOR1_ANG", + "CLOSEST_NEIGHBOR2_DIST", + "CLOSEST_NEIGHBOR2_ANG", + "ANG_BW_NEIGHBORS_MEAN", + "ANG_BW_NEIGHBORS_STDDEV", + "ANG_BW_NEIGHBORS_MODE", } diff --git a/features/nyxus-tool/tests/conftest.py b/features/nyxus-tool/tests/conftest.py index 95091b58d..adf3399e0 100644 --- a/features/nyxus-tool/tests/conftest.py +++ b/features/nyxus-tool/tests/conftest.py @@ -2,7 +2,6 @@ import tempfile from pathlib import Path -from typing import Union import numpy as np import pytest @@ -23,24 +22,24 @@ def pytest_addoption(parser: pytest.Parser) -> None: @pytest.fixture() -def inp_dir() -> Union[str, Path]: +def inp_dir() -> str | Path: """Create directory for saving intensity images.""" return Path(tempfile.mkdtemp(dir=Path.cwd())) @pytest.fixture() -def seg_dir() -> Union[str, Path]: +def seg_dir() -> str | Path: """Create directory for saving groundtruth labelled images.""" return Path(tempfile.mkdtemp(dir=Path.cwd())) @pytest.fixture() -def output_directory() -> Union[str, Path]: +def output_directory() -> str | Path: """Create output directory.""" return Path(tempfile.mkdtemp(dir=Path.cwd())) -@pytest.fixture(params=[256, 512, 1024, 2048]) +@pytest.fixture(params=[256]) def image_sizes(request: pytest.FixtureRequest) -> pytest.FixtureRequest: """To get the parameter of the fixture.""" return request.param @@ -48,12 +47,12 @@ def image_sizes(request: pytest.FixtureRequest) -> pytest.FixtureRequest: @pytest.fixture() def synthetic_images( - inp_dir: Union[str, Path], - seg_dir: Union[str, Path], + inp_dir: str | Path, + seg_dir: str | Path, image_sizes: pytest.FixtureRequest, -) -> tuple[Union[str, Path], Union[str, Path]]: +) -> tuple[str | Path, str | Path]: """Generate random synthetic images.""" - for i in range(10): + for i in range(3): im = np.zeros((image_sizes, image_sizes)) points = image_sizes * np.random.random((2, 10**2)) im[(points[0]).astype(int), (points[1]).astype(int)] = 1 @@ -72,8 +71,6 @@ def synthetic_images( @pytest.fixture( params=[ ("pandas", ".csv", "MEAN"), - ("arrowipc", ".arrow", "MEDIAN"), - ("parquet", ".parquet", "MODE"), ], ) def get_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest: @@ -81,7 +78,7 @@ def get_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest: return request.param -@pytest.fixture(params=[5000, 10000, 30000]) +@pytest.fixture(params=[5000]) def scaled_sizes(request: pytest.FixtureRequest) -> pytest.FixtureRequest: """To get the parameter of the fixture.""" return request.param diff --git a/features/nyxus-tool/tests/test_main.py b/features/nyxus-tool/tests/test_main.py index 73315f99c..08143d8d6 100755 --- a/features/nyxus-tool/tests/test_main.py +++ b/features/nyxus-tool/tests/test_main.py @@ -1,150 +1,153 @@ -"""Nyxus Plugin.""" +"""Tests for Nyxus plugin.""" from pathlib import Path import shutil -import tempfile -import filepattern as fp -import numpy as np import pytest import vaex -from typing import Union -from skimage import filters, io, measure + from typer.testing import CliRunner -from polus.images.features.nyxus_tool.__main__ import app as app -from polus.images.features.nyxus_tool.nyxus_func import nyxus_func +from polus.images.features.nyxus_tool.__main__ import app +from polus.images.features.nyxus_tool.nyxus_func import ( + run_nyxus_object_features, + run_nyxus_whole_image_features +) runner = CliRunner() def clean_directories() -> None: - """Remove all temporary directories.""" - for d in Path(".").cwd().iterdir(): + """Remove temporary directories.""" + for d in Path.cwd().iterdir(): if d.is_dir() and d.name.startswith("tmp"): shutil.rmtree(d) -def test_nyxus_func( - synthetic_images: tuple[Union[str, Path], Union[str, Path]], - output_directory: Union[str, Path], +def test_run_nyxus_object_features( + synthetic_images: tuple[str | Path, str | Path], + output_directory: str | Path, get_params: pytest.FixtureRequest, ) -> None: - """Test Nyxus Function. - - This unit test runs the nyxus function and validates the outputs - """ + """Test object-level feature extraction.""" inp_dir, seg_dir = synthetic_images - int_pattern = "y04_r{r:d}_c{c:d}.ome.tif" - seg_pattern = "y04_r{r:d}_c0.ome.tif" - int_images = fp.FilePattern(inp_dir, int_pattern) - seg_images = fp.FilePattern(seg_dir, seg_pattern) fileext, EXT, feat = get_params - for s_image in seg_images(): - i_image = int_images.get_matching(**{k: v for k, v in s_image[0].items()}) - for i in i_image: - nyxus_func( - int_file=i[1], - seg_file=s_image[1], - out_dir=output_directory, - features=[feat], - file_extension=fileext, - ) - - output_ext = [f.suffix for f in output_directory.iterdir()][0] - assert output_ext == EXT - vdf = vaex.open([f for f in output_directory.iterdir()][0]) + + int_files = sorted(Path(inp_dir).glob("*c1.ome.tif")) + seg_files = sorted(Path(seg_dir).glob("*c0.ome.tif")) + + for int_file, seg_file in zip(int_files, seg_files): + run_nyxus_object_features( + int_file=int_file, + seg_file=seg_file, + out_dir=output_directory, + features=[feat], + file_extension=fileext, + ) + + outputs = list(Path(output_directory).iterdir()) + + assert len(outputs) > 0 + assert outputs[0].suffix == EXT + + vdf = vaex.open(outputs[0]) assert vdf.shape is not None + clean_directories() -@pytest.fixture -def scaled_images( - inp_dir: Union[str, Path], - seg_dir: Union[str, Path], - scaled_sizes: pytest.FixtureRequest, -) -> tuple[Union[str, Path], Union[str, Path]]: - """Generate random synthetic images.""" - im = np.zeros((scaled_sizes, scaled_sizes)) - points = scaled_sizes * np.random.random((2, 1**2)) - im[(points[0]).astype(int), (points[1]).astype(int)] = 1 - im = filters.gaussian(im, sigma=scaled_sizes / (20.0 * 10)) - blobs = im > im.mean() - lab_blobs = measure.label(blobs, background=0) - intname = "y04_r1_c1.ome.tif" - segname = "y04_r1_c0.ome.tif" - int_name = Path(inp_dir, intname) - seg_name = Path(seg_dir, segname) - io.imsave(int_name, im) - io.imsave(seg_name, lab_blobs) - return inp_dir, seg_dir - - -@pytest.fixture(params=[("pandas", ".csv", "MEAN")]) -def get_scaled_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest: - """To get the parameter of the fixture.""" - yield request.param - - -@pytest.mark.skipif("not config.getoption('slow')") -def test_scaled_nyxus_func( - scaled_images: tuple[Union[str, Path], Union[str, Path]], - output_directory: Union[str, Path], - get_scaled_params: pytest.FixtureRequest, +def test_run_nyxus_whole_image_features( + synthetic_images: tuple[str | Path, str | Path], + output_directory: str | Path, + get_params: pytest.FixtureRequest, ) -> None: - """Test Nyxus Function. - - This unit test runs the nyxus function and validates the outputs - """ - inp_dir, seg_dir = scaled_images - int_pattern = "y04_r{r:d}_c{c:d}.ome.tif" - seg_pattern = "y04_r{r:d}_c0.ome.tif" - int_images = fp.FilePattern(inp_dir, int_pattern) - seg_images = fp.FilePattern(seg_dir, seg_pattern) - fileext, EXT, feat = get_scaled_params - for s_image in seg_images(): - i_image = int_images.get_matching(**{k: v for k, v in s_image[0].items()}) - nyxus_func( - int_file=i_image[0][1], - seg_file=s_image[1], + """Test whole-image feature extraction.""" + inp_dir, _ = synthetic_images + fileext, EXT, feat = get_params + + int_files = sorted(Path(inp_dir).glob("*c1.ome.tif")) + + for int_file in int_files: + run_nyxus_whole_image_features( + int_file=int_file, out_dir=output_directory, features=[feat], file_extension=fileext, ) - output_ext = [f.suffix for f in output_directory.iterdir()][0] - assert output_ext == EXT - vdf = vaex.open([f for f in output_directory.iterdir()][0]) + + outputs = list(Path(output_directory).iterdir()) + + assert len(outputs) > 0 + assert outputs[0].suffix == EXT + + vdf = vaex.open(outputs[0]) assert vdf.shape is not None + clean_directories() -def test_cli(synthetic_images, output_directory, get_params) -> None: - """Test Cli.""" +def test_cli( + synthetic_images: tuple[str | Path, str | Path], + output_directory: str | Path, + get_params: pytest.FixtureRequest, +) -> None: + """Test CLI execution.""" inp_dir, seg_dir = synthetic_images - int_pattern = "y04_r{r:d}_c1.ome.tif" - seg_pattern = "y04_r{r:d}_c0.ome.tif" fileext, _, feat = get_params - runner.invoke( + result = runner.invoke( app, [ "--inpDir", - inp_dir, + str(inp_dir), "--segDir", - seg_dir, + str(seg_dir), "--intPattern", - int_pattern, + "y04_r{r:d}_c1.ome.tif", "--segPattern", - seg_pattern, + "y04_r{r:d}_c0.ome.tif", + "--features", + feat, + "--outDir", + str(output_directory), + ], + ) + + assert result.exit_code == 0 + assert any(Path(output_directory).iterdir()) + + clean_directories() + + +def test_cli_single_roi( + synthetic_images: tuple[str | Path, str | Path], + output_directory: str | Path, + get_params: pytest.FixtureRequest, +) -> None: + """Test CLI with single ROI mode.""" + inp_dir, seg_dir = synthetic_images + _, _, feat = get_params + + result = runner.invoke( + app, + [ + "--inpDir", + str(inp_dir), + "--segDir", + str(seg_dir), + "--intPattern", + "y04_r{r:d}_c1.ome.tif", + "--segPattern", + "y04_r{r:d}_c0.ome.tif", "--features", feat, - "--fileExtension", - fileext, "--singleRoi", - False, "--outDir", - output_directory, + str(output_directory), ], ) - assert output_directory.joinpath(f"y04_r1_c1{fileext}") + + assert result.exit_code == 0 + assert any(Path(output_directory).iterdir()) + clean_directories()