From 8b15b3834a6ef99ffa87ac56d8832f7f4aa9797e Mon Sep 17 00:00:00 2001 From: Jane Van Lam <75lam@cua.edu> Date: Tue, 10 Mar 2026 15:50:40 -0400 Subject: [PATCH 1/3] replace preadtor with ProcessPoolExe --- .pre-commit-config.yaml | 22 +- Jenkinsfile | 4 +- .../Dockerfile_DistributedMemory | 2 +- .../PCA_Cov_DistributedMemory.py | 275 +- dimension_reduction/PCA/README.rst | 83 +- .../Dockerfile_SharedMemory_CPUonly | 2 +- .../Dockerfile_SharedMemory_GPUEnabled | 6 +- .../PCA/Shared-Memory/PCA_SVD_SharedMemory.py | 226 +- .../PCA/Shared-Memory/plugin_CPUOnly.json | 140 +- .../PCA/Shared-Memory/plugin_GpuEnabled.json | 140 +- dimension_reduction/UMAP/README.rst | 69 +- .../UMAP/Shared-Memory-GPU/Dockerfile | 23 +- .../UMAP/Shared-Memory-GPU/Initialization.cpp | 68 +- .../UMAP/Shared-Memory-GPU/KNN_GPU_Code.cu | 192 +- .../UMAP/Shared-Memory-GPU/LMOptimization.cpp | 346 +- .../UMAP/Shared-Memory-GPU/LMOptimization.h | 1 - .../UMAP/Shared-Memory-GPU/Metrics.cpp | 80 +- .../UMAP/Shared-Memory-GPU/Metrics.cuh | 39 +- .../UMAP/Shared-Memory-GPU/SGD.cuh | 66 +- .../UMAP/Shared-Memory-GPU/highDComputes.cpp | 8 +- .../UMAP/Shared-Memory-GPU/highDComputes.h | 6 +- .../UMAP/Shared-Memory-GPU/main.cu | 200 +- .../UMAP/Shared-Memory-OpenMP/Dockerfile | 13 +- .../Examples/Plotting MNIST Data/plot.py | 63 +- .../Shared-Memory-OpenMP/Initialization.cpp | 68 +- .../Shared-Memory-OpenMP/KNN_OpenMP_Code.cpp | 74 +- .../Shared-Memory-OpenMP/KNN_OpenMP_Code.h | 2 - .../Shared-Memory-OpenMP/LMOptimization.cpp | 348 +- .../Shared-Memory-OpenMP/LMOptimization.h | 1 - .../UMAP/Shared-Memory-OpenMP/Metrics.cpp | 56 +- .../UMAP/Shared-Memory-OpenMP/Metrics.h | 2 - .../UMAP/Shared-Memory-OpenMP/SGD.cpp | 12 +- .../UMAP/Shared-Memory-OpenMP/SGD.h | 1 - .../Shared-Memory-OpenMP/highDComputes.cpp | 12 +- .../UMAP/Shared-Memory-OpenMP/highDComputes.h | 1 - .../UMAP/Shared-Memory-OpenMP/main.cpp | 223 +- .../UMAP/Shared-Memory-OpenMP/plugin.json | 334 +- .../pixel_segmentation_eval/__main__.py | 16 +- .../pixel_segmentation_eval/evaluate.py | 268 +- .../Dockerfile | 4 +- .../polus-feature-extraction-plugin/README.md | 60 +- .../polus-feature-extraction-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../plugin.json | 2 +- .../src/log4j.properties | 2 +- .../src/main.py | 1696 +++-- .../Dockerfile | 4 +- .../README.md | 1 - .../VERSION | 2 +- .../build-docker.sh | 2 +- .../plugin.json | 2 +- .../src/main.py | 177 +- .../src/requirements.txt | 2 +- .../Dockerfile | 4 +- .../README.md | 10 +- .../VERSION | 2 +- .../build-docker.sh | 2 +- .../plugin.json | 2 +- .../src/main.py | 119 +- .../src/mesh.py | 183 +- .../src/requirements.txt | 2 +- .../tests/featurization_test.py | 41 +- .../region_segmentation_eval/__main__.py | 11 +- .../region_segmentation_eval/evaluate.py | 648 +- .../build-docker.sh | 2 +- formats/polus-czi-extract-plugin/Dockerfile | 2 +- formats/polus-czi-extract-plugin/VERSION | 2 +- .../polus-czi-extract-plugin/build-docker.sh | 2 +- formats/polus-czi-extract-plugin/plugin.json | 2 +- .../polus-czi-extract-plugin/run-plugin.sh | 3 +- formats/polus-czi-extract-plugin/src/main.py | 246 +- .../src/requirements.txt | 2 +- formats/polus-imaris-parser-plugin/Dockerfile | 2 +- formats/polus-imaris-parser-plugin/README.md | 5 +- formats/polus-imaris-parser-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../polus-imaris-parser-plugin/plugin.json | 2 +- .../src/extract_ims_data.py | 461 +- .../src/link_ims_ids.py | 92 +- .../polus-imaris-parser-plugin/src/main.py | 107 +- .../src/merge_ids_to_features.py | 423 +- .../polus-multichannel-tiff-plugin/Dockerfile | 4 +- .../polus-multichannel-tiff-plugin/README.md | 4 +- .../polus-multichannel-tiff-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../plugin.json | 2 +- .../src/log4j.properties | 2 +- .../src/main.py | 194 +- .../README.md | 30 +- .../polus-tiledtiff-converter-plugin/VERSION | 2 +- .../bumpversion.cfg | 2 +- .../mvn-packager.sh | 2 +- .../plugin.json | 2 +- .../polus-tiledtiff-converter-plugin/pom.xml | 150 +- .../converters/TiledOmeTiffConverter.java | 32 +- .../plugins/TiledOmeTiffConverterMain.java | 40 +- .../segmentation/mesmer_inference/__main__.py | 18 +- .../segmentation/mesmer_inference/padded.py | 64 +- .../mesmer-training-tool/run-plugin.sh | 2 +- .../segmentation/mesmer_training/__main__.py | 20 +- .../segmentation/mesmer_training/train.py | 12 +- .../polus-aics-classic-seg-plugin/Dockerfile | 4 +- .../polus-aics-classic-seg-plugin/README.md | 11 +- .../polus-aics-classic-seg-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../polus-aics-classic-seg-plugin/plugin.json | 2 +- .../src/Workflows/Playground_CurvyLinear.py | 92 +- .../src/Workflows/Playground_dots.py | 109 +- .../src/Workflows/Playground_filament3d.py | 85 +- .../src/Workflows/Playground_gja1.py | 85 +- .../src/Workflows/Playground_lamp1.py | 95 +- .../src/Workflows/Playground_npm1.py | 98 +- .../src/Workflows/Playground_shell.py | 91 +- .../src/Workflows/Playground_spotty.py | 74 +- .../src/Workflows/Playground_st6gal1.py | 97 +- .../src/log4j.properties | 2 +- .../polus-aics-classic-seg-plugin/src/main.py | 139 +- .../polus-smp-training-plugin/VERSION | 2 +- .../generate_plugin_manifest.py | 375 +- .../misc/preprocess_tissuenet.py | 126 +- .../misc/train_smp_DICEloss.py | 612 +- .../polus-smp-training-plugin/plugin.json | 6066 ++++++++--------- .../requirements-docker.txt | 2 +- .../polus-smp-training-plugin/src/.gitignore | 2 +- .../src/inference.py | 37 +- .../polus-smp-training-plugin/src/main.py | 447 +- .../polus-smp-training-plugin/src/training.py | 78 +- .../src/utils/augmentations.py | 31 +- .../src/utils/dataset.py | 46 +- .../src/utils/helpers.py | 89 +- .../src/utils/options.py | 369 +- .../tests/__init__.py | 1 + .../tests/model_test.py | 130 +- .../tests/tile_test.py | 48 +- .../images/apply-flatfield-tool/.gitignore | 37 + .../images/apply-flatfield-tool/Dockerfile | 15 +- .../apply-flatfield-tool/pyproject.toml | 5 +- .../images/apply_flatfield/apply_flatfield.py | 34 +- .../images/binary_operations/__main__.py | 21 +- .../images/binary_operations/binops.py | 70 +- .../images/binary_operations/utils.py | 41 +- .../images/image-assembler-tool/.gitignore | 1 - .../transforms/images/montage/__main__.py | 32 +- .../transforms/images/montage/montage.py | 44 +- .../images/transforms/images/montage/utils.py | 15 +- .../polus-apply-flatfield-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../polus-apply-flatfield-plugin/plugin.json | 176 +- .../polus-apply-flatfield-plugin/src/main.py | 54 +- .../images/polus-autocropping-plugin/VERSION | 2 +- .../requirements.txt | 2 +- .../polus-autocropping-plugin/src/__init__.py | 2 +- .../polus-autocropping-plugin/src/autocrop.py | 239 +- .../polus-autocropping-plugin/src/main.py | 154 +- .../src/utils/constants.py | 4 +- .../src/utils/helpers.py | 51 +- .../src/utils/local_distogram.py | 78 +- .../tests/__init__.py | 3 +- .../tests/test_autocrop.py | 28 +- .../tests/version_test.py | 10 +- .../images/polus-ftl-label-plugin/Cargo.toml | 2 +- .../images/polus-ftl-label-plugin/README.md | 2 +- .../SimpleTiledTiffViewer.py | 41 +- .../polus-ftl-label-plugin/build-docker.sh | 2 +- .../ftl_rust/__init__.py | 81 +- .../images/polus-ftl-label-plugin/plugin.json | 86 +- .../polus-ftl-label-plugin/run-plugin.sh | 2 +- .../polus-ftl-label-plugin/rust_setup.py | 1 - .../polus-ftl-label-plugin/src/bench_rust.py | 17 +- .../images/polus-ftl-label-plugin/src/ftl.pyx | 142 +- .../images/polus-ftl-label-plugin/src/main.py | 94 +- .../src/requirements.txt | 1 - .../polus-ftl-label-plugin/src/setup.py | 16 +- .../Dockerfile | 4 +- .../polus-image-registration-plugin/README.md | 63 +- .../build-docker.sh | 2 +- .../plugin.json | 186 +- .../src/image_registration.py | 611 +- .../src/log4j.properties | 2 +- .../src/main.py | 159 +- .../src/parser.py | 145 +- .../README.md | 11 +- .../build-docker.sh | 2 +- .../plugin.json | 100 +- .../src/main.py | 148 +- .../src/requirements.txt | 2 +- .../polus-rolling-ball-plugin/Dockerfile | 2 +- .../images/polus-rolling-ball-plugin/VERSION | 2 +- .../polus-rolling-ball-plugin/plugin.json | 2 +- .../polus-rolling-ball-plugin/src/main.py | 83 +- .../src/rolling_ball.py | 33 +- .../tests/__init__.py | 3 +- .../tests/correctness_test.py | 14 +- .../tests/version_test.py | 16 +- .../polus-stack-z-slice-plugin/Dockerfile | 4 +- .../polus-stack-z-slice-plugin/README.md | 2 +- .../images/polus-stack-z-slice-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../polus-stack-z-slice-plugin/plugin.json | 1 - .../polus-stack-z-slice-plugin/run-plugin.sh | 2 +- .../polus-stack-z-slice-plugin/src/main.py | 163 +- .../remove-border-objects-plugin/Dockerfile | 2 +- .../remove-border-objects-plugin/README.md | 6 +- .../remove-border-objects-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../package-release.sh | 2 +- .../remove-border-objects-plugin/plugin.json | 2 +- .../run-plugin.sh | 3 +- .../src/functions.py | 49 +- .../remove-border-objects-plugin/src/main.py | 127 +- .../src/requirements.txt | 2 +- .../tests/test_main.py | 65 +- .../tests/version_test.py | 55 +- .../polus-recycle-vector-plugin/Dockerfile | 4 +- .../polus-recycle-vector-plugin/README.md | 5 +- .../polus-recycle-vector-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../package-release.sh | 2 +- .../polus-recycle-vector-plugin/plugin.json | 1 - .../polus-recycle-vector-plugin/run-plugin.sh | 3 +- .../polus-recycle-vector-plugin/src/main.py | 32 +- .../src/requirements.txt | 2 +- utils/filepattern-generator-plugin/README.md | 4 - utils/filepattern-generator-plugin/VERSION | 2 +- .../build-docker.sh | 2 +- .../filepattern-generator-plugin/plugin.json | 2 +- .../run-plugin.sh | 1 - .../filepattern-generator-plugin/src/main.py | 44 +- .../tests/test_main.py | 1 - .../tests/version_test.py | 28 +- .../images/utils/midrc_download/__init__.py | 2 - .../Dockerfile | 2 +- .../VERSION | 2 +- .../build-docker.sh | 2 +- .../bumpversion.cfg | 2 +- .../plugin.json | 2 +- .../run-plugin.sh | 2 +- .../src/main.py | 116 +- .../src/requirements.txt | 2 +- .../tests/__init__.py | 4 +- .../tests/version_test.py | 28 +- utils/polus-imagej-macro-plugin/Dockerfile | 2 +- utils/polus-imagej-macro-plugin/README.md | 12 +- utils/polus-imagej-macro-plugin/VERSION | 2 +- .../polus-imagej-macro-plugin/build-docker.sh | 2 +- utils/polus-imagej-macro-plugin/plugin.json | 2 +- utils/polus-imagej-macro-plugin/run-plugin.sh | 1 - utils/polus-imagej-macro-plugin/src/main.py | 291 +- utils/polus-imagej-util/.gitignore | 2 +- utils/polus-imagej-util/README.md | 6 +- utils/polus-imagej-util/bfio.cfg | 2 +- .../classes/logtemplates/mainlog.txt | 18 +- utils/polus-imagej-util/classes/populate.py | 1223 ++-- utils/polus-imagej-util/cookiecutter.json | 154 +- utils/polus-imagej-util/filepattern.cfg | 2 +- utils/polus-imagej-util/generate.py | 115 +- utils/polus-imagej-util/imagej_kaniko.py | 63 +- utils/polus-imagej-util/imagej_ui.ipynb | 490 +- utils/polus-imagej-util/requirements.txt | 2 +- utils/polus-imagej-util/scale.json | 90 +- utils/polus-imagej-util/tests/core_test.py | 46 +- .../{{cookiecutter.project_slug}}/Dockerfile | 2 +- .../{{cookiecutter.project_slug}}/README.md | 2 +- .../{{cookiecutter.project_slug}}/VERSION | 2 +- .../build-docker.sh | 2 +- .../{{cookiecutter.project_slug}}/plugin.json | 2 +- .../run-plugin.sh | 2 +- .../src/ij_converter.py | 83 +- .../{{cookiecutter.project_slug}}/src/main.py | 272 +- .../tests/data_test.py | 100 +- .../tests/scale_test.py | 202 +- .../tests/unit_test.py | 100 +- .../tests/version_test.py | 3 - utils/polus-notebook-plugin/Dockerfile | 2 +- utils/polus-notebook-plugin/README.md | 2 +- utils/polus-notebook-plugin/VERSION | 2 +- .../plugin_csv_to_csv.json | 106 +- .../plugin_img_to_csv.json | 106 +- .../plugin_img_to_img.json | 106 +- .../src/execute_notebook.py | 126 +- utils/polus-python-template/.gitignore | 2 +- utils/polus-python-template/README.md | 4 +- utils/polus-python-template/cookiecutter.json | 27 +- .../hooks/post_gen_project.py | 17 +- .../hooks/pre_gen_project.py | 19 +- .../plugin.json | 2 +- .../{{ cookiecutter.package_name }}.py | 2 +- .../tests/conftest.py | 41 +- .../test_{{cookiecutter.package_name}}.py | 2 +- .../Dockerfile | 2 +- .../VERSION | 2 +- .../build-docker.sh | 2 +- .../main.py | 134 +- .../plugin.json | 154 +- utils/polus-subset-data-plugin/Dockerfile | 4 +- utils/polus-subset-data-plugin/README.md | 1 - utils/polus-subset-data-plugin/VERSION | 2 +- .../polus-subset-data-plugin/build-docker.sh | 2 +- utils/polus-subset-data-plugin/plugin.json | 2 +- utils/polus-subset-data-plugin/src/main.py | 65 +- .../src/requirements.txt | 2 +- .../microjson_to_ome/microjson_ome.py | 4 +- .../Dockerfile | 4 +- .../README.md | 1 - .../VERSION | 2 +- .../build-docker.sh | 2 +- .../plugin.json | 2 +- .../src/dl_fi.py | 3 +- .../src/main.py | 364 +- .../src/utils.py | 698 +- .../Dockerfile | 2 +- .../VERSION | 2 +- .../build-docker.sh | 2 +- .../plugin.json | 2 +- .../src/log4j.properties | 2 +- .../src/main.py | 472 +- .../Dockerfile | 4 +- .../README.md | 2 - .../build-docker.sh | 2 +- .../plugin.json | 2 +- .../src/main.py | 169 +- .../src/requirements.txt | 2 +- .../polus-precompute-volume-plugin/Dockerfile | 2 +- .../polus-precompute-volume-plugin/README.md | 3 +- .../polus-precompute-volume-plugin/VERSION | 2 +- .../plugin.json | 2 +- .../run-plugin.sh | 4 +- .../src/main.py | 133 +- .../src/requirements.txt | 2 +- .../src/utils.py | 722 +- .../tabular_to_microjson/microjson_overlay.py | 4 +- 331 files changed, 15242 insertions(+), 13469 deletions(-) create mode 100644 transforms/images/apply-flatfield-tool/.gitignore diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6e78c94e6..68bcd468a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,9 @@ repos: exclude: (.*?)\.(h5)$ - id: check-case-conflict - id: check-json + exclude: .*\{\{.* - id: pretty-format-json + exclude: .*\{\{.* args: - "--autofix" - "--indent=2" @@ -17,6 +19,13 @@ repos: - id: check-xml - id: check-yaml - id: debug-statements + exclude: | + (?x)( + test_[a-zA-Z0-9_]+\.py$| + [a-zA-Z0-9_]+_test\.py$| + conftest\.py$| + .*\{\{.* + ) - id: detect-aws-credentials args: - "--allow-missing-credentials" @@ -25,6 +34,7 @@ repos: - id: mixed-line-ending args: ["--fix=lf"] description: Forces to replace line ending by the UNIX 'lf' character. + exclude: \.(bat|ps1|cmd)$ - id: trailing-whitespace exclude: ".bumpversion.cfg" - id: check-merge-conflict @@ -33,11 +43,13 @@ repos: rev: "23.3.0" hooks: - id: black + files: ^transforms/images/apply-flatfield-tool/ language_version: python3.9 exclude: | (?x)( ^src\/polus\/plugins\/_plugins\/models\/pydanticv1\/\w*Schema.py$| - ^src\/polus\/plugins\/_plugins\/models\/pydanticv2\/\w*Schema.py$ + ^src\/polus\/plugins\/_plugins\/models\/pydanticv2\/\w*Schema.py$| + .*\{\{.* ) - repo: https://github.com/charliermarsh/ruff-pre-commit @@ -45,11 +57,13 @@ repos: rev: "v0.0.274" hooks: - id: ruff + files: ^transforms/images/apply-flatfield-tool/ exclude: | (?x)( test_[a-zA-Z0-9]+.py$| ^src\/polus\/plugins\/_plugins\/models\/pydanticv1\/\w*Schema.py$| - ^src\/polus\/plugins\/_plugins\/models\/pydanticv2\/\w*Schema.py$ + ^src\/polus\/plugins\/_plugins\/models\/pydanticv2\/\w*Schema.py$| + .*\{\{.* ) args: [--fix] @@ -57,10 +71,12 @@ repos: rev: "v1.4.0" hooks: - id: mypy + files: ^transforms/images/apply-flatfield-tool/ exclude: | (?x)( test_[a-zA-Z0-9]+.py$| ^src\/polus\/plugins\/_plugins\/models\/pydanticv1\/\w*Schema.py$| - ^src\/polus\/plugins\/_plugins\/models\/pydanticv2\/\w*Schema.py$ + ^src\/polus\/plugins\/_plugins\/models\/pydanticv2\/\w*Schema.py$| + .*\{\{.* ) additional_dependencies: [types-requests==2.31.0.1] diff --git a/Jenkinsfile b/Jenkinsfile index 0fa72d11a..fb3fcc827 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -57,7 +57,7 @@ pipeline { image.push() image.push(dockerVersion) } - + env.PROJECT_NAME = "${pluginName}" env.FULL_DESC = readFile(file: 'README.md') env.BRIEF_DESC = "${PROJECT_URL}/tree/master/${PROJECT_NAME}" @@ -72,4 +72,4 @@ pipeline { } } } -} \ No newline at end of file +} diff --git a/dimension_reduction/PCA/Distributed-Memory/Dockerfile_DistributedMemory b/dimension_reduction/PCA/Distributed-Memory/Dockerfile_DistributedMemory index ca6f3efc9..fd99c590c 100644 --- a/dimension_reduction/PCA/Distributed-Memory/Dockerfile_DistributedMemory +++ b/dimension_reduction/PCA/Distributed-Memory/Dockerfile_DistributedMemory @@ -10,7 +10,7 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Instal Conda +# Instal Conda ENV PATH $CONDA_DIR/bin:$PATH RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda$CONDA_PYTHON_VERSION-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ echo 'export PATH=$CONDA_DIR/bin:$PATH' > /etc/profile.d/conda.sh && \ diff --git a/dimension_reduction/PCA/Distributed-Memory/PCA_Cov_DistributedMemory.py b/dimension_reduction/PCA/Distributed-Memory/PCA_Cov_DistributedMemory.py index b28c2c9f2..2fa232920 100644 --- a/dimension_reduction/PCA/Distributed-Memory/PCA_Cov_DistributedMemory.py +++ b/dimension_reduction/PCA/Distributed-Memory/PCA_Cov_DistributedMemory.py @@ -1,102 +1,119 @@ -''' - * @author Mahdi Maghrebi - * July 2019 -''' +"""* @author Mahdi Maghrebi +* July 2019. +""" +import argparse +import logging +import math import os -import socket -import torch -import torch.distributed as dist -import numpy as np -import pandas as pd from datetime import datetime -import subprocess from subprocess import check_output -import math -import argparse -import logging -''' +import numpy as np +import pandas as pd +import torch +import torch.distributed as dist + +""" readInput reads the input file and returns it as a numpy 2D array @param rank the rank of each processor @param size total number of processors @return input data as a numpy 2D array -''' -def readInput(rank,size): - linesTotalTensor = torch.tensor(1) - fileName = args.inputPath.split('.')[0].split('/')[-1] - #Procssor 0 splits the input data and creates one input file per processor - if (rank == 0): +""" + + +def readInput(rank, size): + linesTotalTensor = torch.tensor(1) + fileName = args.inputPath.split(".")[0].split("/")[-1] + # Procssor 0 splits the input data and creates one input file per processor + if rank == 0: output = check_output(["wc", "-l", args.inputPath]) - linesTotal = int(output.decode("utf-8").split(' ')[0]) - linesTotalTensor = torch.tensor(linesTotal) - linePerprocessor = math.floor(linesTotalTensor/size)+size - cmd = "split "+ "-dl "+ str(linePerprocessor) + " " + args.inputPath+ \ - " --additional-suffix=.csv " + fileName + "_" + linesTotal = int(output.decode("utf-8").split(" ")[0]) + linesTotalTensor = torch.tensor(linesTotal) + linePerprocessor = math.floor(linesTotalTensor / size) + size + cmd = ( + "split " + + "-dl " + + str(linePerprocessor) + + " " + + args.inputPath + + " --additional-suffix=.csv " + + fileName + + "_" + ) os.system(cmd) - #Halt all the processors until procssor 0 returns - torch.distributed.barrier(async_op=False) - #Each processor reads its own data and converts it to a numpy array - splitFileName = "{}_{:02d}.csv".format(fileName, rank) - if (rank == 0): - df=pd.read_csv(splitFileName) + # Halt all the processors until procssor 0 returns + torch.distributed.barrier(async_op=False) + # Each processor reads its own data and converts it to a numpy array + splitFileName = f"{fileName}_{rank:02d}.csv" + if rank == 0: + df = pd.read_csv(splitFileName) else: - df = pd.read_csv(splitFileName,header=None) + df = pd.read_csv(splitFileName, header=None) d = df.values - data = np.float32(d) - del d - #remove splitted input file for each processor - cmd2 = 'rm ' + splitFileName + data = np.float32(d) + del d + # remove splitted input file for each processor + cmd2 = "rm " + splitFileName os.system(cmd2) return data - -''' -Normalize the input data using Z-score method + + +""" +Normalize the input data using Z-score method @param data the input data as a 2D numpy array @return A PyTorch tensor with data normalized in each column -''' +""" + + def Normalize(data): - #convert numpy array to pytorch tensor + # convert numpy array to pytorch tensor x = torch.from_numpy(data).float().to(args.deviceName) - #compute the sum of each column for local data - xSum = torch.sum(x,0).to(args.deviceName) - #compute the count of rows for local data - xCounts=torch.tensor(x.shape[0]) - #compute the sum of each column for the entire global data - torch.distributed.all_reduce(xSum,op=dist.ReduceOp.SUM,async_op=False) - #compute the count of rows for the entire global data - torch.distributed.all_reduce(xCounts,op=dist.ReduceOp.SUM,async_op=False) - #compute the Mean of each column for the entire global data - xMean=xSum/xCounts - #compute (x-xmean)^2 for local data - xSquared=torch.sum(torch.pow((torch.sub(x,xMean[None,:])),2),0) - #compute (x-xmean)^2 for the entire global data - torch.distributed.all_reduce(xSquared,op=dist.ReduceOp.SUM,async_op=False) - #compute Standard Deviation for local data of each column - xStd=torch.sqrt(xSquared/(xCounts-1)) - #compute Z-Score Normalization for local data - xNormalized=torch.sub(x,xMean[None,:])/xStd[None,:] - #delete pytorch tensor for the local data - del x,xSum,xMean,xStd,xSquared,xCounts - #return tensor of normalized data + # compute the sum of each column for local data + xSum = torch.sum(x, 0).to(args.deviceName) + # compute the count of rows for local data + xCounts = torch.tensor(x.shape[0]) + # compute the sum of each column for the entire global data + torch.distributed.all_reduce(xSum, op=dist.ReduceOp.SUM, async_op=False) + # compute the count of rows for the entire global data + torch.distributed.all_reduce(xCounts, op=dist.ReduceOp.SUM, async_op=False) + # compute the Mean of each column for the entire global data + xMean = xSum / xCounts + # compute (x-xmean)^2 for local data + xSquared = torch.sum(torch.pow((torch.sub(x, xMean[None, :])), 2), 0) + # compute (x-xmean)^2 for the entire global data + torch.distributed.all_reduce(xSquared, op=dist.ReduceOp.SUM, async_op=False) + # compute Standard Deviation for local data of each column + xStd = torch.sqrt(xSquared / (xCounts - 1)) + # compute Z-Score Normalization for local data + xNormalized = torch.sub(x, xMean[None, :]) / xStd[None, :] + # delete pytorch tensor for the local data + del x, xSum, xMean, xStd, xSquared, xCounts + # return tensor of normalized data return xNormalized - -''' + + +""" Create Covariance Matrix and compute its eigenVectors @param xNormalized PyTorch tensor with data normalized in each column @return eigenVectors of Covariance Matrix -''' -def ComputePCA(xNormalized): +""" + + +def ComputePCA(xNormalized): n = xNormalized.shape[1] - coefficient = 1/(n-1) - covMatrix = torch.matmul(torch.transpose(xNormalized, 0, 1),xNormalized).to(args.deviceName) - covMatrix = torch.mul(covMatrix,coefficient) - torch.distributed.all_reduce(covMatrix,op=dist.ReduceOp.SUM,async_op=False) + coefficient = 1 / (n - 1) + covMatrix = torch.matmul(torch.transpose(xNormalized, 0, 1), xNormalized).to( + args.deviceName, + ) + covMatrix = torch.mul(covMatrix, coefficient) + torch.distributed.all_reduce(covMatrix, op=dist.ReduceOp.SUM, async_op=False) eigenValues, eigenVectors = torch.symeig(covMatrix, eigenvectors=True) - np.savetxt("eigenValues.csv",eigenValues.cpu().numpy(), delimiter=",") - np.savetxt("eigenVectors.csv",eigenVectors.cpu().numpy(), delimiter=",") - return eigenVectors - -''' + np.savetxt("eigenValues.csv", eigenValues.cpu().numpy(), delimiter=",") + np.savetxt("eigenVectors.csv", eigenVectors.cpu().numpy(), delimiter=",") + return eigenVectors + + +""" Project data to PCs space by multiplying normalized data with Eigenvectors The results are saved in outputPath (second input argument from command line) and the temporary files are removed @@ -104,58 +121,94 @@ def ComputePCA(xNormalized): @param eigenVectors eigenvectors of Covariance Matrix @param rank the rank of each processor @param size total number of processors -''' -def Project_Data(xNormalized,eigenVectors,rank,size): - projectedData = torch.matmul(xNormalized,eigenVectors).to(args.deviceName) - outputFile = "tmpData_"+str(rank)+".csv" - np.savetxt(outputFile,projectedData.cpu().numpy(), delimiter=",") - if (rank == 0): - cmd = 'cat ' +""" + + +def Project_Data(xNormalized, eigenVectors, rank, size): + projectedData = torch.matmul(xNormalized, eigenVectors).to(args.deviceName) + outputFile = "tmpData_" + str(rank) + ".csv" + np.savetxt(outputFile, projectedData.cpu().numpy(), delimiter=",") + if rank == 0: + cmd = "cat " for i in range(size): - cmd += " tmpData_"+str(i)+".csv " - cmd += ("> "+ str(args.outputPath)) + cmd += " tmpData_" + str(i) + ".csv " + cmd += "> " + str(args.outputPath) os.system(cmd) - #rm tmpData_*.csv + # rm tmpData_*.csv for i in range(size): - cmd2 = "rm tmpData_"+str(i)+".csv " - os.system(cmd2) - -''' + cmd2 = "rm tmpData_" + str(i) + ".csv " + os.system(cmd2) + + +""" Initialize processors and calling different funcions @param rank the rank of each processor @param size total number of processors -@param backend the communication method between processors -''' -def init_processes(rank, size, backend='mpi'): - """ Initialize the distributed environment. """ +@param backend the communication method between processors +""" + + +def init_processes(rank, size, backend="mpi"): + """Initialize the distributed environment.""" dist.init_process_group(backend, rank=rank, world_size=size) startTime = datetime.now() - TmpOutputData = readInput(rank,size) + TmpOutputData = readInput(rank, size) Duration = datetime.now() - startTime - logging.info("Rank # "+str(rank)+ " says, it took "+str(Duration) + " for READING INPUT FILE") - print("Rank # "+str(rank)+ " says, it took "+str(Duration) + " for READING INPUT FILE") + logging.info( + "Rank # " + + str(rank) + + " says, it took " + + str(Duration) + + " for READING INPUT FILE", + ) + print( + "Rank # " + + str(rank) + + " says, it took " + + str(Duration) + + " for READING INPUT FILE", + ) startTime = datetime.now() TmpOutputNormalized = Normalize(TmpOutputData) TmpOutputEigenVectors = ComputePCA(TmpOutputNormalized) Duration = datetime.now() - startTime - logging.info("Rank # "+str(rank)+ " says, it took "+str(Duration)+ " for COMPUTING PCA") - print("Rank # "+str(rank)+ " says, it took "+str(Duration)+ " for COMPUTING PCA") + logging.info( + "Rank # " + str(rank) + " says, it took " + str(Duration) + " for COMPUTING PCA", + ) + print( + "Rank # " + str(rank) + " says, it took " + str(Duration) + " for COMPUTING PCA", + ) startTime = datetime.now() - Project_Data(TmpOutputNormalized,TmpOutputEigenVectors,rank,size) + Project_Data(TmpOutputNormalized, TmpOutputEigenVectors, rank, size) Duration = datetime.now() - startTime - logging.info("Rank # "+str(rank) + " says, it took "+str(Duration)+ " for WRITING OUTPUTS") - print("Rank # "+str(rank) + " says, it took "+str(Duration)+ " for WRITING OUTPUTS") -''' -Code begins from here -''' + logging.info( + "Rank # " + + str(rank) + + " says, it took " + + str(Duration) + + " for WRITING OUTPUTS", + ) + print( + "Rank # " + + str(rank) + + " says, it took " + + str(Duration) + + " for WRITING OUTPUTS", + ) + + +""" +Code begins from here +""" if __name__ == "__main__": logging.basicConfig(filename="Setting.txt", level=logging.INFO) argparser = argparse.ArgumentParser() - argparser.add_argument('deviceName', type=str) - argparser.add_argument('inputPath', type=str) - argparser.add_argument('outputPath', type=str, nargs='?', default='./PCA_Projected_Data_Final.csv') + argparser.add_argument("deviceName", type=str) + argparser.add_argument("inputPath", type=str) + argparser.add_argument( + "outputPath", type=str, nargs="?", default="./PCA_Projected_Data_Final.csv", + ) args = argparser.parse_args() - world_size = int(os.environ['OMPI_COMM_WORLD_SIZE']) - world_rank = int(os.environ['OMPI_COMM_WORLD_RANK']) - init_processes(world_rank, world_size, 'mpi') - + world_size = int(os.environ["OMPI_COMM_WORLD_SIZE"]) + world_rank = int(os.environ["OMPI_COMM_WORLD_RANK"]) + init_processes(world_rank, world_size, "mpi") diff --git a/dimension_reduction/PCA/README.rst b/dimension_reduction/PCA/README.rst index 7df645f42..cc31f8ee5 100644 --- a/dimension_reduction/PCA/README.rst +++ b/dimension_reduction/PCA/README.rst @@ -4,11 +4,11 @@ PCA (Principle Component Analysis) Implementation Please refer to `this link `_ for detailed description of PCA algorithm. PCA has been implemented in PyTorch in two ways for Shared-Memory and -Distributed-Memory systems. The Shared-Memory implementation is an ideal solution -for relatively small-sized dataset where the entire data can be fit into the memory. +Distributed-Memory systems. The Shared-Memory implementation is an ideal solution +for relatively small-sized dataset where the entire data can be fit into the memory. For larger dataset, the Distributed-Memory implementation is recommended where the dataset is divided among multiple machines and each machine will perform independent -computing on a subset of the dataset. +computing on a subset of the dataset. ------------------------------------ Shared-Memory Systems Implementation @@ -17,15 +17,15 @@ Shared-Memory Systems Implementation The code requires the following input arguments. 1- ``deviceName``: This parameter defines the compute device and is either 'cpu' or 'cuda:0'. - + 2- ``applySignFlip``: If this parameter is set to 'yes' the sign of the projected data in PCs space is allowed to be flipped. 3- ``computeStdev``: If this parameter is set to 'yes' the post-compute analysis will be performed on the PC axes and the standard deviation of the projected data will be computed along PC axes (column 1) along with the ratio of (standard deviation for each axes)/(sum of standard deviations for all PC axes)*100 in (column 2). -4- ``inputPath``: The full path to the input csv file which contains raw data. In this file, the records are stored in rows and the features in columns. Please note that inputPath should only contain a single csv file. +4- ``inputPath``: The full path to the input csv file which contains raw data. In this file, the records are stored in rows and the features in columns. Please note that inputPath should only contain a single csv file. + +5- ``outputPath``: The full path to the csv file where the projected data in PCs space are saved. This argument is optional and the default path is the current directory with the file name PCA_Projected_Data_Final.csv -5- ``outputPath``: The full path to the csv file where the projected data in PCs space are saved. This argument is optional and the default path is the current directory with the file name PCA_Projected_Data_Final.csv - The code produces the following outputs. 1- ``outputPath.csv``: The output file where the projected data in PCs space are saved. The name of this file was inserted from the input argument and the default name is PCA_Projected_Data_Final.csv @@ -33,11 +33,11 @@ The code produces the following outputs. 2- ``PCs.csv``: The rows of this file represents the PCs spatial directions. 3- ``SingularValues.csv``: The singular values which were derived from SVD decompisition. - -4- ``Setting.txt``: The logging file containing the error and messages. + +4- ``Setting.txt``: The logging file containing the error and messages. 5- ``Stdev.csv``: This file is produced only if the input argument of "computeStdev" is set to "yes". This file contains 2 columns: the standard deviation of the projected data computed along PC axes (column 1) and the ratio of (standard deviation for each axes)/(sum of standard deviations for all PC axes)*100 in (column 2). - + -------------------------------------------- Installing PyTorch for Shared-Memory Systems -------------------------------------------- @@ -50,44 +50,44 @@ The first step is to install conda as shown below. chmod 755 Miniconda3-latest-Linux-x86_64.sh ./Miniconda3-latest-Linux-x86_64.sh conda create --name PyTorch_Shared Python=3.7.3 flask - conda activate PyTorch_Shared + conda activate PyTorch_Shared + + +Next, PyTorch is installed from the source as follows. - -Next, PyTorch is installed from the source as follows. - .. code:: bash - #Install Dependencies in Conda: - conda install numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing pandas dask + #Install Dependencies in Conda: + conda install numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing pandas dask conda install -c pytorch magma-cuda101 git clone --recursive https://github.com/pytorch/pytorch cd pytorch # if you are updating an existing checkout git submodule sync git submodule update --init --recursive - export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} + export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} python setup.py install >> output.txt 2>&1 - -Now, the python code can be executed as follows. - - .. code:: bash - +Now, the python code can be executed as follows. + + + .. code:: bash + python PCA_SVD_SharedMemory.py --deviceName cpu --applySignFlip true \ - --computeStdev true --inputPath . --outputPath . - + --computeStdev true --inputPath . --outputPath . + + +Also, the docker can be run as follows. -Also, the docker can be run as follows. - - .. code:: bash - - sudo docker run -v /path/to/Docker:/data/inputs -v /path/to/Docker:/data/outputs \ + .. code:: bash + + sudo docker run -v /path/to/Docker:/data/inputs -v /path/to/Docker:/data/outputs \ dockerImageName --deviceName cpu --applySignFlip true \ --computeStdev true --inputPath /data/inputs --outputPath /data/outputs - + ----------------------------------------- Distributed-Memory Systems Implementation ----------------------------------------- @@ -98,9 +98,9 @@ The code requires the following input arguments. 2- ``inputPath`` : The full path to the input csv file which contains raw data. In this file, the records are stored in rows and the features in columns. -3- ``outputPath``: The full path to the csv file where the projected data in PCs space are saved. This argument is optional and the default path is the current directory with the file name PCA_Projected_Data_Final.csv. +3- ``outputPath``: The full path to the csv file where the projected data in PCs space are saved. This argument is optional and the default path is the current directory with the file name PCA_Projected_Data_Final.csv. -Also, for launching PyTorch using mpirun, the number of processors should also be included after flag "-np". An example of exectuing the code is given below. In this example, 2 processors will run the code simultaneously. +Also, for launching PyTorch using mpirun, the number of processors should also be included after flag "-np". An example of exectuing the code is given below. In this example, 2 processors will run the code simultaneously. .. code:: bash @@ -108,13 +108,13 @@ mpirun -np 2 python PCA_Cov_DistributedMemory.py cpu /Path/input.csv /Path/outpu The code produces the following outputs. -1- ``outputPath.csv``: The output file where the projected data in PCs space is saved. The name of this file was inserted from the input argument and the default name is PCA_Projected_Data_Final.csv. +1- ``outputPath.csv``: The output file where the projected data in PCs space is saved. The name of this file was inserted from the input argument and the default name is PCA_Projected_Data_Final.csv. -2- ``Setting.txt``: The logging file containing the error and messages. +2- ``Setting.txt``: The logging file containing the error and messages. 3- ``eigenValues.csv``: The eigen values of the covariance matrix ordered in ascending order. -4- ``eigenVectors.csv``: The corresponding eigen vectors of the covariance matrix . +4- ``eigenVectors.csv``: The corresponding eigen vectors of the covariance matrix . ------------------------------------------------- Installing PyTorch for Distributed-Memory Systems @@ -154,19 +154,8 @@ Now, PyTorch can be launched on multiple distributed machines as follows. #Execute the Code on Multiple machines, multiple processes: mpirun --hostfile nodes.txt --map-by node -np 2 python PCA_Cov_DistributedMemory.py cpu /Path/input.csv /Path/output.csv - #The nodes.txt file is a simple text file where machines IP are listed on each line. + #The nodes.txt file is a simple text file where machines IP are listed on each line. For more information about installing PyTorch for the distributed systems, refer to the following links: https://github.com/pytorch/pytorch#from-source https://pytorch.org/tutorials/intermediate/dist_tuto.html - - - - - - - - - - - diff --git a/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_CPUonly b/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_CPUonly index e47adaa0f..60c8e93e1 100644 --- a/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_CPUonly +++ b/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_CPUonly @@ -6,4 +6,4 @@ RUN mkdir -p /home/PCA WORKDIR /home/PCA COPY . . -ENTRYPOINT ["python", "PCA_SVD_SharedMemory.py"] \ No newline at end of file +ENTRYPOINT ["python", "PCA_SVD_SharedMemory.py"] diff --git a/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_GPUEnabled b/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_GPUEnabled index 4957b5263..2ce32a98f 100644 --- a/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_GPUEnabled +++ b/dimension_reduction/PCA/Shared-Memory/Dockerfile_SharedMemory_GPUEnabled @@ -10,7 +10,7 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Instal Conda +# Instal Conda ENV PATH $CONDA_DIR/bin:$PATH RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda$CONDA_PYTHON_VERSION-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ echo 'export PATH=$CONDA_DIR/bin:$PATH' > /etc/profile.d/conda.sh && \ @@ -18,11 +18,11 @@ echo 'export PATH=$CONDA_DIR/bin:$PATH' > /etc/profile.d/conda.sh && \ rm -rf /tmp/* && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* - + RUN mkdir -p /home/PCA WORKDIR /home/PCA -RUN conda install flask numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing pandas dask && \ +RUN conda install flask numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing pandas dask && \ conda install -c pytorch magma-cuda101 RUN git clone --recursive https://github.com/pytorch/pytorch diff --git a/dimension_reduction/PCA/Shared-Memory/PCA_SVD_SharedMemory.py b/dimension_reduction/PCA/Shared-Memory/PCA_SVD_SharedMemory.py index db25e1c3a..7714f0158 100644 --- a/dimension_reduction/PCA/Shared-Memory/PCA_SVD_SharedMemory.py +++ b/dimension_reduction/PCA/Shared-Memory/PCA_SVD_SharedMemory.py @@ -1,41 +1,44 @@ -import numpy as np -import pandas as pd -from datetime import datetime -import torch -import mmap -import dask.dataframe as ddf -import dask.multiprocessing import argparse +import glob import logging +import mmap import os -import glob +from datetime import datetime + +import numpy as np +import torch argparser = argparse.ArgumentParser() -#argparser.add_argument('--readOption', dest='readOption', type=str) -argparser.add_argument('--deviceName', dest='deviceName', type=str) -argparser.add_argument('--applySignFlip', dest='applySignFlip', type=str) -argparser.add_argument('--computeStdev', dest='computeStdev', type=str) -argparser.add_argument('--inputPath', dest='inputPath', type=str) -argparser.add_argument('--outputPath', dest='outputPath', type=str, nargs='?', default='./PCA_Projected_Data_Final.csv') +argparser.add_argument("--deviceName", dest="deviceName", type=str) +argparser.add_argument("--applySignFlip", dest="applySignFlip", type=str) +argparser.add_argument("--computeStdev", dest="computeStdev", type=str) +argparser.add_argument("--inputPath", dest="inputPath", type=str) +argparser.add_argument( + "--outputPath", + dest="outputPath", + type=str, + nargs="?", + default="./PCA_Projected_Data_Final.csv", +) args = argparser.parse_args() # Find the first CSV file in the input folder inputPath = glob.glob(os.path.join(args.inputPath, "*.csv"))[0] # Set the path to the output files -outputPath = os.path.join(args.outputPath, 'PCA_Projected_Data_Final.csv') -SingularValuesOutputPath = os.path.join(args.outputPath, 'SingularValues.csv') -PCsOutputPath = os.path.join(args.outputPath, 'PCs.csv') -StdevOutputPath = os.path.join(args.outputPath, 'Stdev.csv') -SettingOutputPath = os.path.join(args.outputPath, 'Setting.txt') +outputPath = os.path.join(args.outputPath, "PCA_Projected_Data_Final.csv") +SingularValuesOutputPath = os.path.join(args.outputPath, "SingularValues.csv") +PCsOutputPath = os.path.join(args.outputPath, "PCs.csv") +StdevOutputPath = os.path.join(args.outputPath, "Stdev.csv") +SettingOutputPath = os.path.join(args.outputPath, "Setting.txt") -logging.basicConfig(filename=SettingOutputPath , level=logging.INFO) +logging.basicConfig(filename=SettingOutputPath, level=logging.INFO) startTime = datetime.now() -'''#Reading input data directly and creating a numpy array. +"""#Reading input data directly and creating a numpy array. if args.readOption=='direct': - df = ddf.read_csv(inputPath,sep=',') - d = df.compute(scheduler='threads') - data = np.float32(d) + df = ddf.read_csv(inputPath,sep=',') + d = df.compute(scheduler='threads') + data = np.float32(d) del d #Mapping Data to Memory and create a numpy array. @@ -44,7 +47,7 @@ fileName = open(inputPath, "r") m = mmap.mmap(fileName.fileno(), 0, prot=mmap.PROT_READ) #Remove Header Row - m.readline() + m.readline() lineCounts = 0 for line in fileName: lineCounts = lineCounts+1 @@ -55,102 +58,139 @@ m.close() data = np.array(lineArray,dtype='float32') del lineArray -''' -#For Now Lets Make mapping as the default way of reading input file -fileName = open(inputPath, "r") +""" +# For Now Lets Make mapping as the default way of reading input file +fileName = open(inputPath) m = mmap.mmap(fileName.fileno(), 0, prot=mmap.PROT_READ) -m.readline() +m.readline() lineCounts = 0 for line in fileName: - lineCounts = lineCounts+1 + lineCounts = lineCounts + 1 lineArray = [] -for i in range(lineCounts-1): +for i in range(lineCounts - 1): line = m.readline() - lineArray.append(line.strip().decode('utf-8').split(",")) + lineArray.append(line.strip().decode("utf-8").split(",")) m.close() -data = np.array(lineArray,dtype='float32') +data = np.array(lineArray, dtype="float32") del lineArray duration = datetime.now() - startTime -logging.info("Duration of Reading Data == "+str(duration)) -print("Duration of Reading Data == "+str(duration)) +logging.info("Duration of Reading Data == " + str(duration)) +print("Duration of Reading Data == " + str(duration)) startTime = datetime.now() -#Some outputs about the computing device +# Some outputs about the computing device device = torch.device(args.deviceName) -logging.info("Using device:"+str(device)) -print("Using device:"+str(device)) -if device.type == 'cuda': - logging.info('torch.cuda.current_device()= '+ str(torch.cuda.current_device())) - print('torch.cuda.current_device()= '+ str(torch.cuda.current_device())) - logging.info('torch.cuda.is_available()= '+ str(torch.cuda.is_available())) - print('torch.cuda.is_available()= '+ str(torch.cuda.is_available())) -#Convert numpy array to pytorch tensor +logging.info("Using device:" + str(device)) +print("Using device:" + str(device)) +if device.type == "cuda": + logging.info("torch.cuda.current_device()= " + str(torch.cuda.current_device())) + print("torch.cuda.current_device()= " + str(torch.cuda.current_device())) + logging.info("torch.cuda.is_available()= " + str(torch.cuda.is_available())) + print("torch.cuda.is_available()= " + str(torch.cuda.is_available())) +# Convert numpy array to pytorch tensor x = torch.from_numpy(data).float().to(device) -featureCounts=np.shape(data)[-1] -#del data +featureCounts = np.shape(data)[-1] +# del data -#Create tensor x which is normalize input data on each column -XMean = torch.mean(x,0).to(device) -XStd = torch.std(x,0).to(device) -x = (x-XMean.expand_as(x))/XStd.expand_as(x).to(device) +# Create tensor x which is normalize input data on each column +XMean = torch.mean(x, 0).to(device) +XStd = torch.std(x, 0).to(device) +x = (x - XMean.expand_as(x)) / XStd.expand_as(x).to(device) del XMean del XStd -#Compute SVD decomposition of the normalized tensor x -#PyTorch outputs v Matrix and not v.t() -u, s, v = torch.svd(x,some=True,compute_uv=True) +# Compute SVD decomposition of the normalized tensor x +# PyTorch outputs v Matrix and not v.t() +u, s, v = torch.svd(x, some=True, compute_uv=True) -np.savetxt (SingularValuesOutputPath, s.cpu().numpy(), delimiter=",", header="Singluar Values", comments='') +np.savetxt( + SingularValuesOutputPath, + s.cpu().numpy(), + delimiter=",", + header="Singluar Values", + comments="", +) strs = ["Axis" for x in range(featureCounts)] -nums=list(range(1,featureCounts+1)) -headerLiterals=''.join(n+str(s)+',' for (n,s) in zip(strs, nums)) -headerLiterals=headerLiterals.rstrip(',') -np.savetxt (PCsOutputPath, v.t().cpu().numpy(), delimiter=",", header=headerLiterals, comments='') -#and Project Data to new PCs -projectedData = torch.matmul(u,torch.diag(s)).to(device) - -#Compute the Standard Deviation of the projected data along each PC axis -if args.computeStdev == 'true': - Stdev = torch.std(projectedData,0).to(device) +nums = list(range(1, featureCounts + 1)) +headerLiterals = "".join(n + str(s) + "," for (n, s) in zip(strs, nums)) +headerLiterals = headerLiterals.rstrip(",") +np.savetxt( + PCsOutputPath, + v.t().cpu().numpy(), + delimiter=",", + header=headerLiterals, + comments="", +) +# and Project Data to new PCs +projectedData = torch.matmul(u, torch.diag(s)).to(device) + +# Compute the Standard Deviation of the projected data along each PC axis +if args.computeStdev == "true": + Stdev = torch.std(projectedData, 0).to(device) SumStdev = torch.sum(Stdev).to(device) - normalizedStdev= torch.mul(torch.div(Stdev,SumStdev).to(device),100).to(device) - headerLiteral="Standard Deviation of Data Along each PC, Normalized Value in Percent" - np.savetxt (StdevOutputPath, torch.stack((Stdev,normalizedStdev),1).cpu().numpy(), delimiter=",", header=headerLiteral, comments='') - del Stdev,SumStdev,normalizedStdev -del u,s,v - -#Apply Sign Flip for the projected data -if args.applySignFlip == 'true': + normalizedStdev = torch.mul(torch.div(Stdev, SumStdev).to(device), 100).to(device) + headerLiteral = ( + "Standard Deviation of Data Along each PC, Normalized Value in Percent" + ) + np.savetxt( + StdevOutputPath, + torch.stack((Stdev, normalizedStdev), 1).cpu().numpy(), + delimiter=",", + header=headerLiteral, + comments="", + ) + del Stdev, SumStdev, normalizedStdev +del u, s, v + +# Apply Sign Flip for the projected data +if args.applySignFlip == "true": absProjectedData = torch.abs(projectedData).to(device) - temp = torch.eq(absProjectedData,torch.max(absProjectedData,-2,keepdim=True).values).type(torch.FloatTensor).to(device) - signMatrix = torch.sign(torch.sum(projectedData*temp,-2,keepdim=True).to(device)).to(device) - projectedData = projectedData*signMatrix.to(device) + temp = ( + torch.eq(absProjectedData, torch.max(absProjectedData, -2, keepdim=True).values) + .type(torch.FloatTensor) + .to(device) + ) + signMatrix = torch.sign( + torch.sum(projectedData * temp, -2, keepdim=True).to(device), + ).to(device) + projectedData = projectedData * signMatrix.to(device) duration = datetime.now() - startTime -logging.info("Duration of Execution == "+str(duration)) -print("Duration of Execution == "+str(duration)) +logging.info("Duration of Execution == " + str(duration)) +print("Duration of Execution == " + str(duration)) startTime = datetime.now() -#Output the results +# Output the results strs = ["PC" for x in range(featureCounts)] -nums=list(range(1,featureCounts+1)) -headerLiterals=''.join(n+str(s)+',' for (n,s) in zip(strs, nums)) -headerLiterals=headerLiterals.rstrip(',') -np.savetxt (outputPath, projectedData.cpu().numpy(), delimiter=",", header=headerLiterals, comments='') +nums = list(range(1, featureCounts + 1)) +headerLiterals = "".join(n + str(s) + "," for (n, s) in zip(strs, nums)) +headerLiterals = headerLiterals.rstrip(",") +np.savetxt( + outputPath, + projectedData.cpu().numpy(), + delimiter=",", + header=headerLiterals, + comments="", +) duration = datetime.now() - startTime -logging.info("Duration of Writing Data == "+str(duration)) -print("Duration of Writing Data == "+str(duration)) +logging.info("Duration of Writing Data == " + str(duration)) +print("Duration of Writing Data == " + str(duration)) -#Output some useful information if the compute was performed on cuda -if device.type == 'cuda': +# Output some useful information if the compute was performed on cuda +if device.type == "cuda": logging.info(torch.cuda.get_device_name(0)) - logging.info('Memory Usage:') - logging.info('Allocated:'+ str(round(torch.cuda.memory_allocated(0)/1024**3,1))+ 'GB') - logging.info('Cached: '+ str(round(torch.cuda.memory_cached(0)/1024**3,1))+ 'GB') + logging.info("Memory Usage:") + logging.info( + "Allocated:" + str(round(torch.cuda.memory_allocated(0) / 1024**3, 1)) + "GB", + ) + logging.info( + "Cached: " + str(round(torch.cuda.memory_cached(0) / 1024**3, 1)) + "GB", + ) print(torch.cuda.get_device_name(0)) - print('Memory Usage:') - print('Allocated:'+ str(round(torch.cuda.memory_allocated(0)/1024**3,1))+ 'GB') - print('Cached: '+ str(round(torch.cuda.memory_cached(0)/1024**3,1))+ 'GB') - + print("Memory Usage:") + print( + "Allocated:" + str(round(torch.cuda.memory_allocated(0) / 1024**3, 1)) + "GB", + ) + print("Cached: " + str(round(torch.cuda.memory_cached(0) / 1024**3, 1)) + "GB") diff --git a/dimension_reduction/PCA/Shared-Memory/plugin_CPUOnly.json b/dimension_reduction/PCA/Shared-Memory/plugin_CPUOnly.json index 231ad4615..20b825fa1 100644 --- a/dimension_reduction/PCA/Shared-Memory/plugin_CPUOnly.json +++ b/dimension_reduction/PCA/Shared-Memory/plugin_CPUOnly.json @@ -1,72 +1,72 @@ { - "name": "PCA (Shared-Memory)", - "version": "cpuonly-0.1.3", - "title": "PCA (Shared-Memory)", - "description": "PCA Shared-Memory Code", - "author": "Mahdi Maghrebi", - "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", - "containerId": "labshare/polus-pca-plugin:cpuonly-0.1.3", - "inputs": [ - { - "name": "inputPath", - "type": "csvCollection", - "description": "Input csv file containing the raw data", - "required": "True" - }, - { - "name": "deviceName", - "type": "enum", - "description": "Computing Device (CPU or GPU)", - "options": { - "values": [ - "cpu", - "cuda" - ] - }, - "required": "True" - }, - { - "name": "applySignFlip", - "type": "boolean", - "description": "Apply sign flip on the projected data along certain PC axes", - "required": "True" - }, - { - "name": "computeStdev", - "type": "boolean", - "description": "Post-Compute Process to Compute the Standard Deviation", - "required": "True" - } - ], - "outputs": [ - { - "name": "outputPath", - "type": "csvCollection", - "description": "Full path of the output csv file" - } - ], - "ui": [ - { - "key": "inputs.inputPath", - "title": "Input CSV collection", - "description": "Insert the path to csv collection" - }, - { - "key": "inputs.deviceName", - "title": "Computing Device", - "description": "Which Device is Used for Computation: cpu or cuda", - "default": "cpu" - }, - { - "key": "inputs.applySignFlip", - "title": "Apply Sign Flip to the Projected Outputs?", - "description": "Apply sign flip on the projected data along certain PC axes", - "default": true - }, - { - "key": "inputs.computeStdev", - "title": "Post-Compute the Standard Deviation?", - "description": "Compute the Standard Deviation of the projected data along each PC axis?" - } - ] + "name": "PCA (Shared-Memory)", + "version": "cpuonly-0.1.3", + "title": "PCA (Shared-Memory)", + "description": "PCA Shared-Memory Code", + "author": "Mahdi Maghrebi", + "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", + "containerId": "labshare/polus-pca-plugin:cpuonly-0.1.3", + "inputs": [ + { + "name": "inputPath", + "type": "csvCollection", + "description": "Input csv file containing the raw data", + "required": "True" + }, + { + "name": "deviceName", + "type": "enum", + "description": "Computing Device (CPU or GPU)", + "options": { + "values": [ + "cpu", + "cuda" + ] + }, + "required": "True" + }, + { + "name": "applySignFlip", + "type": "boolean", + "description": "Apply sign flip on the projected data along certain PC axes", + "required": "True" + }, + { + "name": "computeStdev", + "type": "boolean", + "description": "Post-Compute Process to Compute the Standard Deviation", + "required": "True" + } + ], + "outputs": [ + { + "name": "outputPath", + "type": "csvCollection", + "description": "Full path of the output csv file" + } + ], + "ui": [ + { + "key": "inputs.inputPath", + "title": "Input CSV collection", + "description": "Insert the path to csv collection" + }, + { + "key": "inputs.deviceName", + "title": "Computing Device", + "description": "Which Device is Used for Computation: cpu or cuda", + "default": "cpu" + }, + { + "key": "inputs.applySignFlip", + "title": "Apply Sign Flip to the Projected Outputs?", + "description": "Apply sign flip on the projected data along certain PC axes", + "default": true + }, + { + "key": "inputs.computeStdev", + "title": "Post-Compute the Standard Deviation?", + "description": "Compute the Standard Deviation of the projected data along each PC axis?" + } + ] } diff --git a/dimension_reduction/PCA/Shared-Memory/plugin_GpuEnabled.json b/dimension_reduction/PCA/Shared-Memory/plugin_GpuEnabled.json index 0dd5d793e..327baabc1 100644 --- a/dimension_reduction/PCA/Shared-Memory/plugin_GpuEnabled.json +++ b/dimension_reduction/PCA/Shared-Memory/plugin_GpuEnabled.json @@ -1,72 +1,72 @@ { - "name": "PCA (Shared-Memory)", - "version": "Gpuenabled-0.1.3", - "title": "PCA (Shared-Memory)", - "description": "PCA Shared-Memory Code", - "author": "Mahdi Maghrebi", - "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", - "containerId": "labshare/polus-pca-plugin:Gpuenabled-0.1.3", - "inputs": [ - { - "name": "inputPath", - "type": "csvCollection", - "description": "Input csv file containing the raw data", - "required": "True" - }, - { - "name": "deviceName", - "type": "enum", - "description": "Computing Device (CPU or GPU)", - "options": { - "values": [ - "cpu", - "cuda" - ] - }, - "required": "True" - }, - { - "name": "applySignFlip", - "type": "boolean", - "description": "Apply sign flip on the projected data along certain PC axes", - "required": "True" - }, - { - "name": "computeStdev", - "type": "boolean", - "description": "Post-Compute Process to Compute the Standard Deviation", - "required": "True" - } - ], - "outputs": [ - { - "name": "outputPath", - "type": "csvCollection", - "description": "Full path of the output csv file" - } - ], - "ui": [ - { - "key": "inputs.inputPath", - "title": "Input CSV collection", - "description": "Insert the path to csv collection" - }, - { - "key": "inputs.deviceName", - "title": "Computing Device", - "description": "Which Device is Used for Computation: cpu or cuda", - "default": "cpu" - }, - { - "key": "inputs.applySignFlip", - "title": "Apply Sign Flip to the Projected Outputs?", - "description": "Apply sign flip on the projected data along certain PC axes", - "default": true - }, - { - "key": "inputs.computeStdev", - "title": "Post-Compute the Standard Deviation?", - "description": "Compute the Standard Deviation of the projected data along each PC axis?" - } - ] + "name": "PCA (Shared-Memory)", + "version": "Gpuenabled-0.1.3", + "title": "PCA (Shared-Memory)", + "description": "PCA Shared-Memory Code", + "author": "Mahdi Maghrebi", + "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", + "containerId": "labshare/polus-pca-plugin:Gpuenabled-0.1.3", + "inputs": [ + { + "name": "inputPath", + "type": "csvCollection", + "description": "Input csv file containing the raw data", + "required": "True" + }, + { + "name": "deviceName", + "type": "enum", + "description": "Computing Device (CPU or GPU)", + "options": { + "values": [ + "cpu", + "cuda" + ] + }, + "required": "True" + }, + { + "name": "applySignFlip", + "type": "boolean", + "description": "Apply sign flip on the projected data along certain PC axes", + "required": "True" + }, + { + "name": "computeStdev", + "type": "boolean", + "description": "Post-Compute Process to Compute the Standard Deviation", + "required": "True" + } + ], + "outputs": [ + { + "name": "outputPath", + "type": "csvCollection", + "description": "Full path of the output csv file" + } + ], + "ui": [ + { + "key": "inputs.inputPath", + "title": "Input CSV collection", + "description": "Insert the path to csv collection" + }, + { + "key": "inputs.deviceName", + "title": "Computing Device", + "description": "Which Device is Used for Computation: cpu or cuda", + "default": "cpu" + }, + { + "key": "inputs.applySignFlip", + "title": "Apply Sign Flip to the Projected Outputs?", + "description": "Apply sign flip on the projected data along certain PC axes", + "default": true + }, + { + "key": "inputs.computeStdev", + "title": "Post-Compute the Standard Deviation?", + "description": "Compute the Standard Deviation of the projected data along each PC axis?" + } + ] } diff --git a/dimension_reduction/UMAP/README.rst b/dimension_reduction/UMAP/README.rst index c029d5b7f..07412fec7 100644 --- a/dimension_reduction/UMAP/README.rst +++ b/dimension_reduction/UMAP/README.rst @@ -8,19 +8,19 @@ Please consider the following instruction for the execution of UMAP code for Sha Installing the Required Library ------------------------------- -UMAP requires three external libraries of Boost, Armadillo, and Eigen3 for the execution. +UMAP requires three external libraries of Boost, Armadillo, and Eigen3 for the execution. The steps for installing Boost library are explained below. - + .. code:: bash - + wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz - tar xfz boost_1_71_0.tar.gz + tar xfz boost_1_71_0.tar.gz cd boost_1_71_0/ ./bootstrap.sh ./b2 export LD_LIBRARY_PATH=currentpath/stage/lib:$LD_LIBRARY_PATH -It is recommended to include the above last line into ~/.bashrc file. +It is recommended to include the above last line into ~/.bashrc file. The Armadillo library can be installed using the following command. @@ -33,9 +33,9 @@ The Eigen3 library is an header-only library and can be downloaded using the fol .. code:: bash wget https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz - tar xfz eigen-3.3.7.tar.gz + tar xfz eigen-3.3.7.tar.gz rm eigen-3.3.7.tar.gz - + ----------------- Runtime Arguments ----------------- @@ -43,24 +43,24 @@ Runtime Arguments The code required the following parameters as the input. 1- ``inputPath``: The full path to the directory that contains the input csv file. Please note that the code reads the first csv file in this directory. - Therefore, any other csv files (than the input csv file) should be deleted from this directory before running the program including the output from the previous run. - -2- ``K``: the desired number of Nearest Neighbours to be computed. If larger K was selected, UMAP better preserves the global distribution of data. - For smaller K values, UMAP instead preserves the local distribution of data. + Therefore, any other csv files (than the input csv file) should be deleted from this directory before running the program including the output from the previous run. + +2- ``K``: the desired number of Nearest Neighbours to be computed. If larger K was selected, UMAP better preserves the global distribution of data. + For smaller K values, UMAP instead preserves the local distribution of data. 3- ``sampleRate``: the rate at which we do sampling in K-NN algorithm. This parameter plays a key role in the performance. This parameter is a trades-off between the performance and the accuracy of the K-NN estimates. The values closer to 1 provides more accurate - results but the execution instead takes longer. - + results but the execution instead takes longer. + 4- ``DimLowSpace``: Dimension of Low-D (or embedding) space (Usually is between 1 to 3). -5- ``randomInitializing``: If set to true, the positions of data in the lower dimension space are initialized randomly; - and if set to false, the positions are defined by solving Laplacian matrix using Armadillo library. - -6- ``outputPath``: The full path to the directory in which the output files will be saved. +5- ``randomInitializing``: If set to true, the positions of data in the lower dimension space are initialized randomly; + and if set to false, the positions are defined by solving Laplacian matrix using Armadillo library. + +6- ``outputPath``: The full path to the directory in which the output files will be saved. -7- ``n_epochs``: The total number of training epochs over the pairs of data points during SGD solution. +7- ``n_epochs``: The total number of training epochs over the pairs of data points during SGD solution. 8- ``min_dist``: defines how tight the points are from each other in Low-D space. The default value should be 0.001. @@ -70,7 +70,7 @@ The code required the following parameters as the input. 11- ``distanceV2``: is the second optional variable needed for computing distance in some metric. -12- ``inputPathOptionalArray``: The full path to the directory that contains a csv file of the optional array needed for computing distance in some metrics. +12- ``inputPathOptionalArray``: The full path to the directory that contains a csv file of the optional array needed for computing distance in some metrics. ----------- The Outputs @@ -80,7 +80,7 @@ The code produces the following output files at outputPath: 1- ``ProjectedData_EmbeddedSpace.csv``: The coordinates of the projected input data in the lower dimension space. -2- ``Setting.txt``: The logging file containing the error and informational messages. +2- ``Setting.txt``: The logging file containing the error and informational messages. ------------------------------ An Example of Running the code @@ -89,25 +89,25 @@ An Example of Running the code .. code:: bash ulimit -s unlimited - + g++ -I/path to boost directory/boost_1_71_0 -I/Path to eigen3 directory/eigen-3.3.7 \ main.cpp KNN_OpenMP_Code.cpp highDComputes.cpp Initialization.cpp LMOptimization.cpp \ Metrics.cpp SGD.cpp -o a.out -O2 - armadillo \ -L/path to boost directory/boost_1_71_0/stage/lib -lboost_iostreams \ -lboost_system -lboost_filesystem -fopenmp - + time ./a.out --inputPath . --K 15 --sampleRate 0.8 --DimLowSpace 2 \ --randomInitializing true --outputPath . --n_epochs 500 --min_dist 0.001 \ --distanceMetric euclidean - + ------------------- Install WIPP Plugin -------------------- +------------------- If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of plugin.json into the pop-up window and submit. - + ------------------------------------------ An Example of Running the Docker Container ------------------------------------------- +------------------------------------------ .. code:: bash @@ -115,13 +115,13 @@ An Example of Running the Docker Container containername --inputPath /home/Inputs --K 15 --sampleRate 0.8 \ --DimLowSpace 2 --randomInitializing true --outputPath /home/Outputs \ --n_epochs 500 --min_dist 0.001 --distanceMetric euclidean - + =========================== CUDA Implementation of UMAP =========================== - + The inputs and outputs to CUDA UMAP code are the same as the serial code explained before. The state-of-the-art CUDA implementation of UMAP has significantly improved the performance of UMAP code. Two hotspots in UMAP execution were parallelized using CUDA directives which are computing KNN and solving for SGD solution. For KNN part, the number of thread blocks are equal to the number of datapoints and the distance computations for each pair of points is performed by each thread of the block. The variable MAXTPB controls the hardward limitation on the number of threads per block, which is by default 1024. If the computations within a block requires more threads, the computations will be carried on CPU instead. Furthermore, if the number of threads per block is very low, the computations will be on CPU too due to the performance benefits. This latter is controlled by the variable MinimumThreads. For SGD part, the number of threads per block is controlled by the variable TPB, which is by default 32. Please also note that the performance of KNN engine has been improved by using multi-threading OpenMP directives. The number of threads in the OpenMP parallelized region of the code is automatically set equal to the number of threads in the machine minus 1. - + ------------------------------ An Example of Running the code ------------------------------ @@ -129,20 +129,20 @@ An Example of Running the code .. code:: bash ulimit -s unlimited - + nvcc -I/path to boost directory/boost_1_71_0 -I/Path to eigen3 directory/eigen-3.3.7 \ main.cu highDComputes.cpp Initialization.cpp LMOptimization.cpp Metrics.cpp KNN_GPU_Code.cu \ -o a.out -larmadillo -L/path to boost directory/boost_1_71_0/stage/lib -lboost_iostreams \ -lboost_system -lboost_filesystem -arch=sm_75 -O2 --expt-relaxed-constexpr -Xcompiler -fopenmp \ -Xcompiler -O2 - + time ./a.out --inputPath . --K 15 --sampleRate 0.8 --DimLowSpace 2 \ --randomInitializing true --outputPath . --n_epochs 500 --min_dist 0.001 \ - --distanceMetric euclidean - + --distanceMetric euclidean + ------------------------------------------ An Example of Running the Docker Container ------------------------------------------- +------------------------------------------ .. code:: bash @@ -150,4 +150,3 @@ An Example of Running the Docker Container containername --inputPath /home/Inputs --K 15 --sampleRate 0.8 \ --DimLowSpace 2 --randomInitializing true --outputPath /home/Outputs \ --n_epochs 500 --min_dist 0.001 --distanceMetric euclidean - diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/Dockerfile b/dimension_reduction/UMAP/Shared-Memory-GPU/Dockerfile index 17cfa91fa..4976a1252 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/Dockerfile +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/Dockerfile @@ -2,38 +2,35 @@ FROM nvidia/cuda:10.2-devel-ubuntu18.04 # Update apps on the base image RUN apt-get -y update && apt-get install -y g++ wget make libarmadillo-dev - -#Create new directory + +#Create new directory RUN mkdir -p /home/GPU_UMAP /home/Inputs /home/Outputs - + # Specify the working directory WORKDIR /home/GPU_UMAP - -# Install Boost Library + +# Install Boost Library RUN wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz -RUN tar xfz boost_1_71_0.tar.gz +RUN tar xfz boost_1_71_0.tar.gz RUN rm boost_1_71_0.tar.gz WORKDIR /home/GPU_UMAP/boost_1_71_0 RUN ./bootstrap.sh RUN ./b2 -ENV LD_LIBRARY_PATH="/home/GPU_UMAP/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/home/GPU_UMAP/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}" WORKDIR /home/GPU_UMAP -# Download Eigen3 Library +# Download Eigen3 Library RUN wget https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz -RUN tar xfz eigen-3.3.7.tar.gz +RUN tar xfz eigen-3.3.7.tar.gz RUN rm eigen-3.3.7.tar.gz # Copy the current folder to the docker image COPY . /home/GPU_UMAP -# Compile the source file +# Compile the source file WORKDIR /home/GPU_UMAP RUN nvcc -I/home/GPU_UMAP/boost_1_71_0 -I/home/GPU_UMAP/eigen-3.3.7 main.cu highDComputes.cpp Initialization.cpp LMOptimization.cpp Metrics.cpp KNN_GPU_Code.cu -larmadillo -o Out.exe -L/home/GPU_UMAP/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem -arch=sm_75 -O2 --expt-relaxed-constexpr -Xcompiler -O2 -Xcompiler -fopenmp # Run the output program from the previous step ENTRYPOINT ["./Out.exe"] - - - diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/Initialization.cpp b/dimension_reduction/UMAP/Shared-Memory-GPU/Initialization.cpp index e6c01353c..10b864389 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/Initialization.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/Initialization.cpp @@ -19,12 +19,12 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF /** * zero approximation in float precision */ - float epsilon=1e-6; + float epsilon=1e-6; /** - * By deafult, low-D space dimensions are between -10 and 10 + * By deafult, low-D space dimensions are between -10 and 10 */ int minDimLowDSpace=-10; - int maxDimLowDSpace=10; + int maxDimLowDSpace=10; if (!randominitializing){ try{ @@ -32,10 +32,10 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF cout<<" Spectral Initialization of Data in Lower Space"<::InnerIterator it(graphSM,k); it; ++it) { graph[it.row()][it.col()]=it.value(); } - } + } /** * Removing the small weights in accordance to https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1032 */ #pragma omp parallel for - for (int i = 0; i < N; ++i){ + for (int i = 0; i < N; ++i){ for (int j = 0; j < N; ++j){ - if (graph[i][j] < epsilon) continue; - if (graph[i][j] < MaxWeight/n_epochs) graph[i][j]=0; + if (graph[i][j] < epsilon) continue; + if (graph[i][j] < MaxWeight/n_epochs) graph[i][j]=0; } - } + } /** - * DegreeMatrix is a diagonal matrix contains information about the degree of each vertex + * DegreeMatrix is a diagonal matrix contains information about the degree of each vertex * sqrtDegreeMatrix transforms the diagonal values of DegreeMatrix by 1.0/sqrt() */ float** sqrtDegreeMatrix = new float*[N]; @@ -76,7 +76,7 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF for (int i = 0; i < N; ++i) { float sum=0; - for (int j = 0; j < N; ++j) { + for (int j = 0; j < N; ++j) { sum+=graph[i][j]; } sqrtDegreeMatrix[i][i]=1.0/sqrt(sum); @@ -84,10 +84,10 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF /** * aux_mem is the column-wise transformation of sqrtDegreeMatrix as needed by armadillo function fmat */ - float* aux_mem = new float[N*N]; + float* aux_mem = new float[N*N]; for (int i = 0; i < N; ++i){ for (int j = 0; j < N; ++j){ - aux_mem[j*N+i]=sqrtDegreeMatrix[i][j]; + aux_mem[j*N+i]=sqrtDegreeMatrix[i][j]; } } delete [] sqrtDegreeMatrix; @@ -95,11 +95,11 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF * Making an armadillo sparse matrix spmatDegreeMatrix from sqrtDegreeMatrix */ fmat matDegreeMatrix(aux_mem,N,N,false,true); - sp_fmat spmatDegreeMatrix(matDegreeMatrix); + sp_fmat spmatDegreeMatrix(matDegreeMatrix); /** * aux_mem2 is the column-wise transformation of adjacencyMatrix as needed by armadillo function fmat */ - float* aux_mem2 = new float[N*N]; + float* aux_mem2 = new float[N*N]; for (int i = 0; i < N; ++i){ for (int j = 0; j < N; ++j){ aux_mem2[j*N+i]=graph[i][j]; //column-wise @@ -114,12 +114,12 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF fmat matadjacencyMatrix(aux_mem2,N,N,false,true); sp_fmat spmatadjacencyMatrix(matadjacencyMatrix); /** - * Making an armadillo sparse matrix of identity - */ - sp_fmat Unity = speye(N,N); + * Making an armadillo sparse matrix of identity + */ + sp_fmat Unity = speye(N,N); /** - * Making an armadillo sparse matrix of Laplacian - */ + * Making an armadillo sparse matrix of Laplacian + */ sp_fmat laplacianMatrix; laplacianMatrix= Unity-spmatDegreeMatrix*spmatadjacencyMatrix*spmatDegreeMatrix; /** @@ -127,29 +127,29 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF */ fvec eigval; fmat eigvec; - eigs_sym(eigval, eigvec, laplacianMatrix, DimLowSpace+1 , "sm"); + eigs_sym(eigval, eigvec, laplacianMatrix, DimLowSpace+1 , "sm"); /** - * Converting eigenvectors to tmpvector + * Converting eigenvectors to tmpvector * will throw "error: Mat::col(): index out of bounds" if no eigvec was available */ typedef std::vector stdvec; std::vector< std::vector > tmpvector; for (int i = 1; i < DimLowSpace+1; ++i) { - stdvec vectest = arma::conv_to< stdvec >::from(eigvec.col(i)); - tmpvector.push_back(vectest); + stdvec vectest = arma::conv_to< stdvec >::from(eigvec.col(i)); + tmpvector.push_back(vectest); } /** * using tmpvector to intialize the locations of the points in low-D space * embedding should not be outside the chosen dimensions for low-D space */ double maxembedding=0; - for (int j = 0; j < DimLowSpace; ++j) { + for (int j = 0; j < DimLowSpace; ++j) { for (int i = 0; i < N; ++i) { double tmp=tmpvector[j][i]; embedding[i][j]= tmp; - if (abs(tmp) > maxembedding) maxembedding=tmp; + if (abs(tmp) > maxembedding) maxembedding=tmp; } } @@ -158,22 +158,22 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF // Also adding a noise as prescribed in https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1040 unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); std::default_random_engine generator (seed); - std::normal_distribution distribution (0.0,1.0); + std::normal_distribution distribution (0.0,1.0); - for (int i = 0; i < N; ++i) { - for (int j = 0; j < DimLowSpace; ++j) { + for (int i = 0; i < N; ++i) { + for (int j = 0; j < DimLowSpace; ++j) { embedding[i][j] =embedding[i][j]* expansion+ SCALE*distribution(generator); } } } catch(std::exception& e){ - logFile<<" Spectral Initialization Failed. Will proceed with random initialization."< #include -#include -#include -#include +#include +#include +#include #include #include #include #include #include -#include +#include #include "KNN_GPU_Code.cuh" -#include "Metrics.h" +#include "Metrics.h" #include "Metrics.cuh" #include #include #include "highDComputes.h" #include -#include -#include +#include +#include using boost::iostreams::mapped_file_source; -using boost::iostreams::stream; +using boost::iostreams::stream; using namespace std; /** @@ -46,7 +46,7 @@ __global__ void ComputeDistancesKernel(int * device_New_Final_List_1D, int * dev int localDim=Dim; double localvalue=0; int Cnts=device_New_Final_List_Index[blockIdx.x+1]-device_New_Final_List_Index[blockIdx.x]; - int Cnts_Dist=device_New_Final_List_Dist_Index[blockIdx.x+1]-device_New_Final_List_Dist_Index[blockIdx.x]; + int Cnts_Dist=device_New_Final_List_Dist_Index[blockIdx.x+1]-device_New_Final_List_Dist_Index[blockIdx.x]; int par1, par2; int cnt=0; int flag=0; @@ -57,18 +57,18 @@ __global__ void ComputeDistancesKernel(int * device_New_Final_List_1D, int * dev for (int j=i+1; j < Cnts; ++j){ if (threadIdx.x == cnt) { par1 = device_New_Final_List_1D[i + device_New_Final_List_Index[blockIdx.x]]; - par2 = device_New_Final_List_1D[j + device_New_Final_List_Index[blockIdx.x]]; + par2 = device_New_Final_List_1D[j + device_New_Final_List_Index[blockIdx.x]]; flag=1; - break; + break; } ++cnt; } } localvalue= distanceCompute (localDim, device_dataPointsGPU, par1, par2, metricID, distanceV1, distanceV2, v0, v1); - + int IndexIDWrite= device_New_Final_List_Dist_Index[blockIdx.x]+threadIdx.x; - device_New_Final_List_Dist_1D[IndexIDWrite] = localvalue; + device_New_Final_List_Dist_1D[IndexIDWrite] = localvalue; } return; } @@ -89,8 +89,8 @@ __global__ void ComputeDistancesKernel(int * device_New_Final_List_1D, int * dev */ int UpdateNN (int** B_Index, double ** B_Dist, short** B_IsNew, short* allEntriesFilled, int K, int u1, int u2, double distance, int flag = 1) { - if(allEntriesFilled[u1]==0){ - for (int j = 0; j < K; j++) { + if(allEntriesFilled[u1]==0){ + for (int j = 0; j < K; j++) { if (B_Dist[u1][j] < 0) { for (int jj = 0; jj < j; jj++) {if (B_Index[u1][jj] == u2) return 0;} @@ -116,7 +116,7 @@ int UpdateNN (int** B_Index, double ** B_Dist, short** B_IsNew, short* allEntrie index = j; } } - if (index == -1) { cout << "Error"< Max_New_Final_List_Length) Max_New_Final_List_Length=New_Final_List[i].size(); } /** * ThreadsPerBlockNeeded is the required number of threads per block to compute the longest array of New_Final_List */ - int ThreadsPerBlockNeeded=0; - for (int i = 0; i < Max_New_Final_List_Length; ++i) { - for (int j = i+1; j < Max_New_Final_List_Length; ++j) { - ++ThreadsPerBlockNeeded; + int ThreadsPerBlockNeeded=0; + for (int i = 0; i < Max_New_Final_List_Length; ++i) { + for (int j = i+1; j < Max_New_Final_List_Length; ++j) { + ++ThreadsPerBlockNeeded; } } /** - * Switch to GPU computations if the following conditions met. Otherwise proceed to CPU computations. + * Switch to GPU computations if the following conditions met. Otherwise proceed to CPU computations. * For now, exclude the metrics depend on filePathOptionalArray from GPU computations - */ - if (ThreadsPerBlockNeeded < MAXTPB && ThreadsPerBlockNeeded > MinimumThreads && filePathOptionalArray=="") { + */ + if (ThreadsPerBlockNeeded < MAXTPB && ThreadsPerBlockNeeded > MinimumThreads && filePathOptionalArray=="") { /** * TotalCounts is the total number of elements in New_Final_List - */ - int TotalCounts=0; - for (int i = 0; i < N; ++i) { + */ + int TotalCounts=0; + for (int i = 0; i < N; ++i) { TotalCounts += New_Final_List[i].size(); - } + } /** * New_Final_List_1D is the 1D representation of New_Final_List for transferring to GPU - */ - int * New_Final_List_1D = new int [TotalCounts]; + */ + int * New_Final_List_1D = new int [TotalCounts]; int cnt=0; for (int i = 0; i < N; ++i) { - for (int j = 0; j < New_Final_List[i].size(); ++j) { + for (int j = 0; j < New_Final_List[i].size(); ++j) { New_Final_List_1D[cnt] = New_Final_List[i][j]; ++cnt; } - } + } /** * device_New_Final_List_1D is on the GPU memory and contains New_Final_List_1D - */ - int *device_New_Final_List_1D; - cudaMalloc ((void **) &device_New_Final_List_1D, TotalCounts*sizeof(int)); - gpuErrchk(cudaMemcpy (device_New_Final_List_1D, New_Final_List_1D, TotalCounts* sizeof(int),cudaMemcpyHostToDevice)); + */ + int *device_New_Final_List_1D; + cudaMalloc ((void **) &device_New_Final_List_1D, TotalCounts*sizeof(int)); + gpuErrchk(cudaMemcpy (device_New_Final_List_1D, New_Final_List_1D, TotalCounts* sizeof(int),cudaMemcpyHostToDevice)); /** * New_Final_List_Index is the index of New_Final_List[i] data. It is needed as New_Final_List has variable size in each row of data. - */ + */ int * New_Final_List_Index = new int [N+1]; New_Final_List_Index[0] = 0; - for (int i = 1; i < N+1; ++i) { + for (int i = 1; i < N+1; ++i) { New_Final_List_Index[i] = New_Final_List[i-1].size()+New_Final_List_Index[i-1]; } /** * device_New_Final_List_Index is on the GPU memory and contains New_Final_List_Index - */ - int *device_New_Final_List_Index; - cudaMalloc ((void **) &device_New_Final_List_Index, (N+1)*sizeof(int)); + */ + int *device_New_Final_List_Index; + cudaMalloc ((void **) &device_New_Final_List_Index, (N+1)*sizeof(int)); gpuErrchk(cudaMemcpy (device_New_Final_List_Index, New_Final_List_Index, (N+1)* sizeof(int),cudaMemcpyHostToDevice)); /** - * New_Final_List_Dist_Index is the index of pairs of distances computed in GPU. - */ - int * New_Final_List_Dist_Index = new int [N+1]; + * New_Final_List_Dist_Index is the index of pairs of distances computed in GPU. + */ + int * New_Final_List_Dist_Index = new int [N+1]; int TotalCounts_Dist=0; for (int i = 0; i < N; ++i) { New_Final_List_Dist_Index[i]=TotalCounts_Dist; - for (int j = 0; j < New_Final_List[i].size(); ++j) { - for (int k = j+1; k < New_Final_List[i].size(); ++k) { + for (int j = 0; j < New_Final_List[i].size(); ++j) { + for (int k = j+1; k < New_Final_List[i].size(); ++k) { ++TotalCounts_Dist; } - } + } } New_Final_List_Dist_Index[N]=TotalCounts_Dist; /** * device_New_Final_List_Dist_Index is on the GPU memory and contains New_Final_List_Dist_Index - */ - int * device_New_Final_List_Dist_Index; - cudaMalloc ((void **) &device_New_Final_List_Dist_Index, (N+1)*sizeof(int)); + */ + int * device_New_Final_List_Dist_Index; + cudaMalloc ((void **) &device_New_Final_List_Dist_Index, (N+1)*sizeof(int)); gpuErrchk(cudaMemcpy (device_New_Final_List_Dist_Index, New_Final_List_Dist_Index, (N+1) * sizeof(int),cudaMemcpyHostToDevice)); /** * device_New_Final_List_Dist_1D is on the GPU memory and contains 1D array of pairs of distances computed in GPU. - */ - double *device_New_Final_List_Dist_1D; - cudaMalloc ((void **) &device_New_Final_List_Dist_1D, TotalCounts_Dist*sizeof(double)); - + */ + double *device_New_Final_List_Dist_1D; + cudaMalloc ((void **) &device_New_Final_List_Dist_1D, TotalCounts_Dist*sizeof(double)); + /** * Creating 2 arrays on device for the case that the Metric is levenshtein - */ + */ float *device_v0, *device_v1; - cudaMalloc ((void **) &device_v0, (Dim+1)*sizeof(float)); - cudaMalloc ((void **) &device_v1, (Dim+1)*sizeof(float)); + cudaMalloc ((void **) &device_v0, (Dim+1)*sizeof(float)); + cudaMalloc ((void **) &device_v1, (Dim+1)*sizeof(float)); /** * Launch the Kernel to compute the distance computations for all pairs of the points. * cudaDeviceSynchronize is required to ensure data transfer to GPU memory is already finished. - */ - gpuErrchk(cudaDeviceSynchronize()); + */ + gpuErrchk(cudaDeviceSynchronize()); logFile<< "Number of Blocks = "< -#include -#include -#include -#include -#include -#include -using namespace std; - -struct LMFunctor -{ - // 'm' pairs of (x, f(x)) - Eigen::MatrixXf measuredValues; - - // Compute 'm' errors, one for each data point, for the given parameter values in 'x' - int operator()(const Eigen::VectorXf &x, Eigen::VectorXf &fvec) const - { - // 'x' has dimensions n x 1 - // It contains the current estimates for the parameters. - - // 'fvec' has dimensions m x 1 - // It will contain the error for each data point. - - float aParam = x(0); - float bParam = x(1); - - for (int i = 0; i < values(); i++) { - float xValue = measuredValues(i, 0); - float yValue = measuredValues(i, 1); - - fvec(i) = yValue - (1.0 / (1.0+ aParam * pow(xValue, 2*bParam)) ); - } - return 0; - } - - // Compute the jacobian of the errors - int df(const Eigen::VectorXf &x, Eigen::MatrixXf &fjac) const - { - // 'x' has dimensions n x 1 - // It contains the current estimates for the parameters. - - // 'fjac' has dimensions m x n - // It will contain the jacobian of the errors, calculated numerically in this case. - - float epsilon; - epsilon = 1e-5f; - - for (int i = 0; i < x.size(); i++) { - Eigen::VectorXf xPlus(x); - xPlus(i) += epsilon; - Eigen::VectorXf xMinus(x); - xMinus(i) -= epsilon; - - Eigen::VectorXf fvecPlus(values()); - operator()(xPlus, fvecPlus); - - Eigen::VectorXf fvecMinus(values()); - operator()(xMinus, fvecMinus); - - Eigen::VectorXf fvecDiff(values()); - fvecDiff = (fvecPlus - fvecMinus) / (2.0f * epsilon); - - fjac.block(0, i, values(), 1) = fvecDiff; - } - - return 0; - } - - // Number of data points, i.e. values. - int m; - - // Returns 'm', the number of values. - int values() const { return m; } - - // The number of parameters, i.e. inputs. - int n; - - // Returns 'n', the number of inputs. - int inputs() const { return n; } - -}; - - - -// -// Goal -// -// Given a non-linear equation: f(x) = 1.0/(1.0+a*pow(x,2*b)) -// and 'm' data points (x1, f(x1)), (x2, f(x2)), ..., (xm, f(xm)) -// our goal is to estimate 'n' parameters (2 in this case: a, b) -// using LM optimization. -// -void estimateParameters(float &a, float &b, float min_dist, float spread, ofstream& logFile) -{ - - std::vector x_values; - std::vector y_values; - - /** - * The interval used for data fitting - * The values were adopted from https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1138 - */ - const float minInterval=0; - const float maxInterval=3*spread; - const int intervalCounts=300; - - - for (int i = 0; i min_dist) y_values.push_back(exp((min_dist-tmp)/spread)); - else { - logFile<< "Error: Negative x_values during Parameter Estimation"< lm(functor); - int status = lm.minimize(x); - logFile << "LM optimization status: " << status << std::endl; - cout << "LM optimization status: " << status << std::endl; - // - // Results - // The 'x' vector also contains the results of the optimization. - // - logFile << "Optimization results" << std::endl; - logFile << "\ta: " << x(0) << std::endl; - logFile << "\tb: " << x(1) << std::endl; - cout << "Optimization results" << std::endl; - cout << "\ta: " << x(0) << std::endl; - cout << "\tb: " << x(1) << std::endl; - - a=x(0); - b=x(1); - -} +// This section of program is the Levenberg-Marquardt solution to estimate 2 parameters of a and b. +//and was modified from this source: https://github.com/SarvagyaVaish/Eigen-Levenberg-Marquardt-Optimization + +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +struct LMFunctor +{ + // 'm' pairs of (x, f(x)) + Eigen::MatrixXf measuredValues; + + // Compute 'm' errors, one for each data point, for the given parameter values in 'x' + int operator()(const Eigen::VectorXf &x, Eigen::VectorXf &fvec) const + { + // 'x' has dimensions n x 1 + // It contains the current estimates for the parameters. + + // 'fvec' has dimensions m x 1 + // It will contain the error for each data point. + + float aParam = x(0); + float bParam = x(1); + + for (int i = 0; i < values(); i++) { + float xValue = measuredValues(i, 0); + float yValue = measuredValues(i, 1); + + fvec(i) = yValue - (1.0 / (1.0+ aParam * pow(xValue, 2*bParam)) ); + } + return 0; + } + + // Compute the jacobian of the errors + int df(const Eigen::VectorXf &x, Eigen::MatrixXf &fjac) const + { + // 'x' has dimensions n x 1 + // It contains the current estimates for the parameters. + + // 'fjac' has dimensions m x n + // It will contain the jacobian of the errors, calculated numerically in this case. + + float epsilon; + epsilon = 1e-5f; + + for (int i = 0; i < x.size(); i++) { + Eigen::VectorXf xPlus(x); + xPlus(i) += epsilon; + Eigen::VectorXf xMinus(x); + xMinus(i) -= epsilon; + + Eigen::VectorXf fvecPlus(values()); + operator()(xPlus, fvecPlus); + + Eigen::VectorXf fvecMinus(values()); + operator()(xMinus, fvecMinus); + + Eigen::VectorXf fvecDiff(values()); + fvecDiff = (fvecPlus - fvecMinus) / (2.0f * epsilon); + + fjac.block(0, i, values(), 1) = fvecDiff; + } + + return 0; + } + + // Number of data points, i.e. values. + int m; + + // Returns 'm', the number of values. + int values() const { return m; } + + // The number of parameters, i.e. inputs. + int n; + + // Returns 'n', the number of inputs. + int inputs() const { return n; } + +}; + + + +// +// Goal +// +// Given a non-linear equation: f(x) = 1.0/(1.0+a*pow(x,2*b)) +// and 'm' data points (x1, f(x1)), (x2, f(x2)), ..., (xm, f(xm)) +// our goal is to estimate 'n' parameters (2 in this case: a, b) +// using LM optimization. +// +void estimateParameters(float &a, float &b, float min_dist, float spread, ofstream& logFile) +{ + + std::vector x_values; + std::vector y_values; + + /** + * The interval used for data fitting + * The values were adopted from https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1138 + */ + const float minInterval=0; + const float maxInterval=3*spread; + const int intervalCounts=300; + + + for (int i = 0; i min_dist) y_values.push_back(exp((min_dist-tmp)/spread)); + else { + logFile<< "Error: Negative x_values during Parameter Estimation"< lm(functor); + int status = lm.minimize(x); + logFile << "LM optimization status: " << status << std::endl; + cout << "LM optimization status: " << status << std::endl; + // + // Results + // The 'x' vector also contains the results of the optimization. + // + logFile << "Optimization results" << std::endl; + logFile << "\ta: " << x(0) << std::endl; + logFile << "\tb: " << x(1) << std::endl; + cout << "Optimization results" << std::endl; + cout << "\ta: " << x(0) << std::endl; + cout << "\tb: " << x(1) << std::endl; + + a=x(0); + b=x(1); + +} diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/LMOptimization.h b/dimension_reduction/UMAP/Shared-Memory-GPU/LMOptimization.h index 1a844b5f8..0630437a1 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/LMOptimization.h +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/LMOptimization.h @@ -2,4 +2,3 @@ using namespace std; void estimateParameters(float &a, float &b, float min_dist,float spread, ofstream& logFile); - diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cpp b/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cpp index 320d269f5..f9a8d7a04 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cpp @@ -34,7 +34,7 @@ double log_beta(double x, double y){ double value = -log(b); for (int i = 1; i < int(a); ++i) value += log(i) - log(b + i); return value; - } + } else return approx_log_Gamma(x) + approx_log_Gamma(y) - approx_log_Gamma(x + y); } @@ -48,18 +48,18 @@ double log_single_beta(double x){ */ int classification (string distanceKeyword){ if (distanceKeyword == "euclidean") return 1; - else if (distanceKeyword == "manhattan") return 2; - else if (distanceKeyword == "minkowski") return 3; - else if (distanceKeyword == "cosine") return 4; - else if (distanceKeyword == "correlation") return 5; - else if (distanceKeyword == "bray_curtis") return 6; - else if (distanceKeyword == "ll_dirichlet") return 7; - else if (distanceKeyword == "jaccard") return 8; - else if (distanceKeyword == "dice") return 9; - else if (distanceKeyword == "categorical_distance") return 10; - else if (distanceKeyword == "ordinal_distance") return 11; - else if (distanceKeyword == "count_distance") return 12; - else if (distanceKeyword == "levenshtein") return 13; + else if (distanceKeyword == "manhattan") return 2; + else if (distanceKeyword == "minkowski") return 3; + else if (distanceKeyword == "cosine") return 4; + else if (distanceKeyword == "correlation") return 5; + else if (distanceKeyword == "bray_curtis") return 6; + else if (distanceKeyword == "ll_dirichlet") return 7; + else if (distanceKeyword == "jaccard") return 8; + else if (distanceKeyword == "dice") return 9; + else if (distanceKeyword == "categorical_distance") return 10; + else if (distanceKeyword == "ordinal_distance") return 11; + else if (distanceKeyword == "count_distance") return 12; + else if (distanceKeyword == "levenshtein") return 13; } /** @@ -69,15 +69,15 @@ int classification (string distanceKeyword){ * @param *it and *it2 indices of the desired points in input dataset * @param Dim is #columns (or features) in input dataset * @param distanceV1 is the first optional variable needed for computing distance in some metrics - * @param distanceV2 is the second optional variable needed for computing distance in some metrics - * @param filePathOptionalArray The full path to optional array for the distance metric computation - * @param logFile The errors and informational messages are outputted to the log file - * @return spatial distance between points two points + * @param distanceV2 is the second optional variable needed for computing distance in some metrics + * @param filePathOptionalArray The full path to optional array for the distance metric computation + * @param logFile The errors and informational messages are outputted to the log file + * @return spatial distance between points two points */ double computeDistance (string distanceKeyword, double** dataPoints, int it, int it2, int Dim, float distanceV1, float distanceV2, string filePathOptionalArray, ofstream & logFile){ /** - * We first focus on computing the distance from a few Metrics that depend on input array from the user + * We first focus on computing the distance from a few Metrics that depend on input array from the user */ if (filePathOptionalArray !=""){ /** @@ -89,14 +89,14 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int if (RecordCounts != Dim) { logFile<<"ALERT: The Optional Vector has different length than the number of features in the input data set"< 0.9) self_denom1 += log_single_beta(dataPoints[it][i]); - if (dataPoints[it2][i] > 0.9) self_denom2 += log_single_beta(dataPoints[it2][i]); - } + if (dataPoints[it2][i] > 0.9) self_denom2 += log_single_beta(dataPoints[it2][i]); + } } return sqrt(1.0 / n2 * (log_b - log_beta(n1, n2) - (self_denom2 - log_single_beta(n2))) @@ -271,7 +271,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int } else if (distanceKeyword =="jaccard") { - int x_true, y_true, num_non_zero=0, num_equal=0; + int x_true, y_true, num_non_zero=0, num_equal=0; for (int i = 0; i < Dim; ++i) { if ( dataPoints[it][i] < epsilon) x_true=0; @@ -281,7 +281,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int else y_true=1; if (x_true==1 || y_true==1) ++num_non_zero; - if (x_true==1 && y_true==1) ++num_equal; + if (x_true==1 && y_true==1) ++num_equal; } if (num_non_zero == 0) return 0.0; @@ -298,7 +298,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int else y_true=1; if (x_true==1 && y_true==1) ++num_true_true; - if (x_true != y_true) ++num_not_equal; + if (x_true != y_true) ++num_not_equal; } if (num_not_equal==0) return 0.0; @@ -335,7 +335,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int for (int k = lo; k < hi; ++k) { result += k * log_lambda - poisson_lambda - log_k_factorial; log_k_factorial += log(k); - } + } return result/normalisation; } @@ -356,7 +356,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int float minVal,deletion_cost,insertion_cost; for (int i=0; i max_distance) return float(max_distance)/normalisation; } - return v0[y_len] / normalisation; + return v0[y_len] / normalisation; } else { logFile << "Wrong input for metric name!" << endl; diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cuh b/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cuh index 3d07bd7ee..368449871 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cuh +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/Metrics.cuh @@ -19,7 +19,7 @@ __device__ double log_betaGPU(double x, double y){ double value = -log(b); for (int i = 1; i < int(a); ++i) value += log(i) - log(b + i); return value; - } + } else return approx_log_GammaGPU(x) + approx_log_GammaGPU(y) - approx_log_GammaGPU(x + y); } @@ -38,7 +38,7 @@ __device__ double distanceCompute(int Dim, double * device_dataPointsGPU, int pa double output; for (int i=0; i 0.9) self_denom1 += log_single_betaGPU(device_dataPointsGPU[par1*Dim+i]); - if (device_dataPointsGPU[par2*Dim+i] > 0.9) self_denom2 += log_single_betaGPU(device_dataPointsGPU[par2*Dim+i]); - } + if (device_dataPointsGPU[par2*Dim+i] > 0.9) self_denom2 += log_single_betaGPU(device_dataPointsGPU[par2*Dim+i]); + } } return sqrt(1.0 / n2 * (log_b - log_betaGPU(n1, n2) - (self_denom2 - log_single_betaGPU(n2))) + 1.0 / n1 * (log_b - log_betaGPU(n2, n1) - (self_denom1 - log_single_betaGPU(n1))) ); } else if (metricID ==8) { //jaccard - int x_true, y_true, num_non_zero=0, num_equal=0; + int x_true, y_true, num_non_zero=0, num_equal=0; for (int i = 0; i < Dim; ++i) { if ( device_dataPointsGPU[par1*Dim+i] < epsilon) x_true=0; @@ -137,7 +137,7 @@ __device__ double distanceCompute(int Dim, double * device_dataPointsGPU, int pa else y_true=1; if (x_true==1 || y_true==1) ++num_non_zero; - if (x_true==1 && y_true==1) ++num_equal; + if (x_true==1 && y_true==1) ++num_equal; } if (num_non_zero == 0) return 0.0; @@ -154,7 +154,7 @@ __device__ double distanceCompute(int Dim, double * device_dataPointsGPU, int pa else y_true=1; if (x_true==1 && y_true==1) ++num_true_true; - if (x_true != y_true) ++num_not_equal; + if (x_true != y_true) ++num_not_equal; } if (num_not_equal==0) return 0.0; @@ -191,7 +191,7 @@ __device__ double distanceCompute(int Dim, double * device_dataPointsGPU, int pa for (int k = lo; k < hi; ++k) { result += k * log_lambda - poisson_lambda - log_k_factorial; log_k_factorial += log(k); - } + } return result/normalisation; } @@ -212,7 +212,7 @@ __device__ double distanceCompute(int Dim, double * device_dataPointsGPU, int pa float minVal,deletion_cost,insertion_cost; for (int i=0; i max_distance) return float(max_distance)/normalisation; } - return v0[y_len] / normalisation; + return v0[y_len] / normalisation; } else { printf("Wrong input for GPU metric name!"); } return -1; -} - +} diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/SGD.cuh b/dimension_reduction/UMAP/Shared-Memory-GPU/SGD.cuh index 597d4cfe6..c107aa31b 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/SGD.cuh +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/SGD.cuh @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -10,7 +10,7 @@ using namespace std; /** * negative_sample_rate is a Constant variable on GPU device - * negative_sample_rate is the rate at which we sample from the non-connected surrounding points as compared to the connected edges. + * negative_sample_rate is the rate at which we sample from the non-connected surrounding points as compared to the connected edges. * Increasing this value will result in greater repulsive force being applied, greater optimization cost, but slightly more accuracy. */ #define negative_sample_rate_Value 5 @@ -23,11 +23,11 @@ __constant__ int negative_sample_rate=negative_sample_rate_Value; __device__ double rdist(double * embedding, int Dim, int index1, int index2){ double dist_squared=0; - for (int j = 0; j < Dim; ++j) { + for (int j = 0; j < Dim; ++j) { double tmp= embedding[index1*Dim+j]-embedding[index2*Dim+j]; dist_squared += tmp *tmp; - } - return dist_squared; + } + return dist_squared; } /** @@ -35,7 +35,7 @@ __device__ double rdist(double * embedding, int Dim, int index1, int index2){ */ __global__ void setup_curand(curandState *state){ int idx = threadIdx.x+blockDim.x*blockIdx.x; - int seed=threadIdx.x; + int seed=threadIdx.x; curand_init(seed, idx, 0, &state[idx]); } @@ -57,7 +57,7 @@ __device__ double clip(double value){ /** * GPU kernel to initialize some arrays on GPU memory * device_epoch_of_next_sample is an index of the epoch state of the edges. If it is less than epoch index, we will use the edge in the computation - * device_epoch_of_next_negative_sample is an index of the epoch state of the edges for sampling from non-connected surrounding points. + * device_epoch_of_next_negative_sample is an index of the epoch state of the edges for sampling from non-connected surrounding points. */ __global__ void initializeEpochs(int edgeCounts, float* device_epochs_per_sample, float *device_epoch_of_next_sample, float *device_epochs_per_negative_sample, float *device_epoch_of_next_negative_sample){ @@ -66,65 +66,63 @@ __global__ void initializeEpochs(int edgeCounts, float* device_epochs_per_sample if (tid < edgeCounts){ device_epoch_of_next_sample[tid]=device_epochs_per_sample[tid]; device_epochs_per_negative_sample[tid]=device_epochs_per_sample[tid]/negative_sample_rate; - device_epoch_of_next_negative_sample[tid]=device_epochs_per_negative_sample[tid]; + device_epoch_of_next_negative_sample[tid]=device_epochs_per_negative_sample[tid]; } -} +} /** - * Main GPU kernel to solve Stochastic Gradient Descent (SGD) problem + * Main GPU kernel to solve Stochastic Gradient Descent (SGD) problem */ __global__ void SGDEngine(double * embedding, int * head, int * tail, float alpha, int N,int DimLowSpace, float aValue, float bValue, int edgeCounts,float* device_epochs_per_sample, float *device_epoch_of_next_sample, float *device_epochs_per_negative_sample, float *device_epoch_of_next_negative_sample ,int n, int move_other,curandState *state){ /** * dEpsilon is zero approximation in double precision - */ + */ double dEpsilon=1e-14; int i = blockIdx.x * blockDim.x + threadIdx.x; if ( i < edgeCounts){ - if (device_epoch_of_next_sample[i] <= n){ - int headIndex = head[i]; - int tailIndex = tail[i]; - double dist_squared = rdist(embedding, DimLowSpace, headIndex, tailIndex); + if (device_epoch_of_next_sample[i] <= n){ + int headIndex = head[i]; + int tailIndex = tail[i]; + double dist_squared = rdist(embedding, DimLowSpace, headIndex, tailIndex); double grad_coeff; - if (dist_squared 0) { grad_d = alpha*clip(grad_coeff*(embedding[headIndex*DimLowSpace+jj]-embedding[randomIndex*DimLowSpace+jj])); - } else { - grad_d = alpha*4.0; + } else { + grad_d = alpha*4.0; } atomicAdd(&embedding[headIndex*DimLowSpace+jj], grad_d); - } + } } - device_epoch_of_next_negative_sample[i] += (n_neg_samples * device_epochs_per_negative_sample[i]); + device_epoch_of_next_negative_sample[i] += (n_neg_samples * device_epochs_per_negative_sample[i]); } } } - - diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.cpp b/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.cpp index fa48d3125..46029f176 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.cpp @@ -1,14 +1,14 @@ /** - * Extra Functions needed for computations + * Extra Functions needed for computations */ -#include +#include #include using namespace std; /** - * Compute B_Index and B_Dist for the closest points (K-NNs) + * Compute B_Index and B_Dist for the closest points (K-NNs) */ void findMin(int** B_Index,double** B_Dist, int N,int K,int* B_Index_Min,double* B_Dist_Min){ @@ -38,7 +38,7 @@ void findSigma(double ** B_Dist, double * B_Dist_Min, double * SigmaValues, int double target=log2(K); /** * Design Parameters to estimate SigmaValues - */ + */ const int iterations=640; const double Error=1e-5; diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.h b/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.h index b205d956c..9eefa2f06 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.h +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/highDComputes.h @@ -1,6 +1,6 @@ #include #include -#include +#include using namespace std; @@ -9,7 +9,7 @@ void findMin(int** B_Index,double** B_Dist, int N,int K,int* B_Index_Min,double* void findSigma(double ** B_Dist, double * B_Dist_Min, double * SigmaValues, int N, int K); /** - * Read the output of linux command execution + * Read the output of linux command execution * @param cmd is the linux command to be executed * @return the output from the execution of the linux command */ @@ -25,5 +25,3 @@ inline std::string exec(const char* cmd) { } return result; } - - diff --git a/dimension_reduction/UMAP/Shared-Memory-GPU/main.cu b/dimension_reduction/UMAP/Shared-Memory-GPU/main.cu index a06e7059f..e49015bf5 100644 --- a/dimension_reduction/UMAP/Shared-Memory-GPU/main.cu +++ b/dimension_reduction/UMAP/Shared-Memory-GPU/main.cu @@ -1,15 +1,15 @@ /** * @author Mahdi Maghrebi - * This code is the CUDA implementation for UMAP algorithm used in dimension reduction. + * This code is the CUDA implementation for UMAP algorithm used in dimension reduction. * The reference paper is “UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction“, by McInnes et al., 2018 (https://arxiv.org/abs/1802.03426) * May 2020 */ #include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -33,7 +33,7 @@ using namespace Eigen; int main(int argc, char ** argv) { /** - * The errors and informational messages are outputted to the log file + * The errors and informational messages are outputted to the log file */ ofstream logFile; string logFileName="Setting.txt"; @@ -53,7 +53,7 @@ int main(int argc, char ** argv) { * distanceMetric is the metric to compute the distance between the points in high-D space, by deafult should be euclidean * distanceV1 is the first optional variable needed for computing distance in some metrics * distanceV2 is the second optional variable needed for computing distance in some metrics - * inputPathOptionalArray is the full path to the directory that contains a csv file of the optional array needed for computing distance in some metrics. + * inputPathOptionalArray is the full path to the directory that contains a csv file of the optional array needed for computing distance in some metrics. */ string filePath, filePathOptionalArray="", outputPath, LogoutputPath, inputPath; int K,DimLowSpace,n_epochs; @@ -153,29 +153,29 @@ int main(int argc, char ** argv) { logFile<<"The full path to the input file: "<< filePath< N) { logFile<<" The desired number of NN has exceeded the size of dataset "< adjacencyMatrixA(N,N), adjacencyMatrixAT(N,N), graphSM(N,N); + SparseMatrix adjacencyMatrixA(N,N), adjacencyMatrixAT(N,N), graphSM(N,N); typedef Eigen::Triplet T; std::vector tripletList; tripletList.reserve(N*K); for (int i=0; i::InnerIterator it(graphSM,k); it; ++it) { - sum += it.value(); - if (it.value() > MaxWeight) MaxWeight=it.value(); + sum += it.value(); + if (it.value() > MaxWeight) MaxWeight=it.value(); } - } + } delete [] SigmaValues; delete [] B_Dist_Min; @@ -323,50 +323,50 @@ int main(int argc, char ** argv) { cout<<"------------Setting Low-D Space Design------------"<::InnerIterator it(graphSM,k); it; ++it) { - if (it.value() < MaxWeight/n_epochs) continue; + if (it.value() < MaxWeight/n_epochs) continue; epochs_per_sample.push_back(MaxWeight/it.value()); head.push_back(it.col()); - tail.push_back(it.row()); + tail.push_back(it.row()); } } /** * Making 1D array for the coordinates of the points in Low-D space and transfer it to GPU memory - */ - double* embedding1D = new double[N*DimLowSpace]; + */ + double* embedding1D = new double[N*DimLowSpace]; for (int i = 0; i < N; ++i) for (int j = 0; j < DimLowSpace; ++j) embedding1D[i*DimLowSpace+j]=embedding[i][j]; @@ -392,16 +392,16 @@ int main(int argc, char ** argv) { double *device_embedding1D; cudaMalloc ((void **) &device_embedding1D, N*DimLowSpace*sizeof(double)); gpuErrchk(cudaPeekAtLastError()); - cudaMemcpy (device_embedding1D,embedding1D , N*DimLowSpace*sizeof(double),cudaMemcpyHostToDevice); + cudaMemcpy (device_embedding1D,embedding1D , N*DimLowSpace*sizeof(double),cudaMemcpyHostToDevice); gpuErrchk(cudaPeekAtLastError()); /** * edgeCounts is total number of edges in the high-D space graph - */ + */ int edgeCounts=epochs_per_sample.size(); /** * Converting some vectors to array pointers and transfer them to GPU memory - */ + */ float * pointer_epochs_per_sample = new float [edgeCounts]; int * pointer_head= new int[edgeCounts]; int * pointer_tail= new int[edgeCounts]; @@ -415,28 +415,28 @@ int main(int argc, char ** argv) { int * device_head, *device_tail; cudaMalloc ((void **) &device_head, edgeCounts*sizeof(int)); gpuErrchk(cudaPeekAtLastError()); - cudaMalloc ((void **) &device_tail, edgeCounts*sizeof(int)); + cudaMalloc ((void **) &device_tail, edgeCounts*sizeof(int)); gpuErrchk(cudaPeekAtLastError()); - cudaMemcpy (device_head, pointer_head, edgeCounts*sizeof(int),cudaMemcpyHostToDevice); + cudaMemcpy (device_head, pointer_head, edgeCounts*sizeof(int),cudaMemcpyHostToDevice); gpuErrchk(cudaPeekAtLastError()); - cudaMemcpy (device_tail, pointer_tail, edgeCounts*sizeof(int),cudaMemcpyHostToDevice); + cudaMemcpy (device_tail, pointer_tail, edgeCounts*sizeof(int),cudaMemcpyHostToDevice); gpuErrchk(cudaPeekAtLastError()); - + /** * Allocate memory in GPU for some arrays * device_epoch_of_next_sample is an index of the epoch state of the edges. If it is less than epoch index, we will use the edge in the computation - * device_epoch_of_next_negative_sample is an index of the epoch state of the edges for sampling from non-connected surrounding points. - */ - float* device_epochs_per_sample, *device_epoch_of_next_sample, *device_epochs_per_negative_sample, *device_epoch_of_next_negative_sample; + * device_epoch_of_next_negative_sample is an index of the epoch state of the edges for sampling from non-connected surrounding points. + */ + float* device_epochs_per_sample, *device_epoch_of_next_sample, *device_epochs_per_negative_sample, *device_epoch_of_next_negative_sample; cudaMalloc ((void **) &device_epochs_per_sample, edgeCounts*sizeof(float)); gpuErrchk(cudaPeekAtLastError()); cudaMalloc ((void **) &device_epoch_of_next_sample, edgeCounts*sizeof(float)); gpuErrchk(cudaPeekAtLastError()); cudaMalloc ((void **) &device_epochs_per_negative_sample, edgeCounts*sizeof(float)); gpuErrchk(cudaPeekAtLastError()); - cudaMalloc ((void **) &device_epoch_of_next_negative_sample, edgeCounts*sizeof(float)); + cudaMalloc ((void **) &device_epoch_of_next_negative_sample, edgeCounts*sizeof(float)); gpuErrchk(cudaPeekAtLastError()); - cudaMemcpy (device_epochs_per_sample, pointer_epochs_per_sample, edgeCounts*sizeof(float),cudaMemcpyHostToDevice); + cudaMemcpy (device_epochs_per_sample, pointer_epochs_per_sample, edgeCounts*sizeof(float),cudaMemcpyHostToDevice); gpuErrchk(cudaPeekAtLastError()); /** * requiredBlocks is the required number of thread blocks in GPU computes @@ -452,19 +452,19 @@ int main(int argc, char ** argv) { * Create curand state on GPU for random number generation and initialize it */ curandState *device_state; - cudaMalloc(&device_state, edgeCounts*sizeof(curandState)); + cudaMalloc(&device_state, edgeCounts*sizeof(curandState)); gpuErrchk(cudaPeekAtLastError()); setup_curand<<>>(device_state); gpuErrchk(cudaPeekAtLastError()); - gpuErrchk(cudaDeviceSynchronize()); + gpuErrchk(cudaDeviceSynchronize()); /** * move_other is equal to 1 if not embedding new previously unseen points to low-D space */ - const int move_other=1; + const int move_other=1; /** * The main training loop in SGD solver - */ - for (int n = 1; n <= n_epochs; ++n) { + */ + for (int n = 1; n <= n_epochs; ++n) { if (n%100 == 0){ logFile << "SGD iteration = "<&1 /dev/null"; string outputCmd3 = exec(cmd3.c_str()); diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Dockerfile b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Dockerfile index 2a5518097..c819f7589 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Dockerfile +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Dockerfile @@ -11,21 +11,21 @@ RUN mkdir -p /home/UMAP /home/Inputs /home/Outputs # Specify the working directory WORKDIR /home/UMAP - -# Install Boost Library + +# Install Boost Library RUN wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz -RUN tar xfz boost_1_71_0.tar.gz +RUN tar xfz boost_1_71_0.tar.gz RUN rm boost_1_71_0.tar.gz WORKDIR /home/UMAP/boost_1_71_0 RUN ./bootstrap.sh RUN ./b2 -ENV LD_LIBRARY_PATH="/home/UMAP/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/home/UMAP/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}" WORKDIR /home/UMAP -# Download Eigen3 Library +# Download Eigen3 Library RUN wget https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz -RUN tar xfz eigen-3.3.7.tar.gz +RUN tar xfz eigen-3.3.7.tar.gz RUN rm eigen-3.3.7.tar.gz # Copy the current folder which contains C++ source code to the Docker image @@ -34,4 +34,3 @@ COPY . /home/UMAP RUN g++ -I/home/UMAP/boost_1_71_0 -I/home/UMAP/eigen-3.3.7 main.cpp KNN_OpenMP_Code.cpp highDComputes.cpp Initialization.cpp LMOptimization.cpp Metrics.cpp SGD.cpp -o Out.exe -O2 -larmadillo -L/home/UMAP/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem -fopenmp ENTRYPOINT ["./Out.exe"] - diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py index c32700647..1041308be 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py @@ -1,44 +1,53 @@ import matplotlib -import pandas as pd import matplotlib.pyplot as plt import numpy as np +import pandas as pd +df = pd.read_csv("ProjectedData_EmbeddedSpace.csv") -df = pd.read_csv("ProjectedData_EmbeddedSpace.csv") - -x=df['Dimension1'] -y=df['Dimension2'] -#labels=df['Data'] - -labels = pd.read_csv("mnist_784-Labels.csv") -labels2=np.int32(labels) -labels3=tuple(labels2.reshape(1,-1)[0]) +x = df["Dimension1"] +y = df["Dimension2"] +labels = pd.read_csv("mnist_784-Labels.csv") +labels2 = np.int32(labels) +labels3 = tuple(labels2.reshape(1, -1)[0]) -scatter= plt.scatter(x,y, c = labels3, cmap = matplotlib.colors.ListedColormap(['red','green','blue','purple', 'yellow', 'black','magenta','brown','grey', 'cyan']), s=1) +scatter = plt.scatter( + x, + y, + c=labels3, + cmap=matplotlib.colors.ListedColormap( + [ + "red", + "green", + "blue", + "purple", + "yellow", + "black", + "magenta", + "brown", + "grey", + "cyan", + ], + ), + s=1, +) -classes=['0','1','2','3', '4', '5','6','7','8', '9'] -plt.legend(handles=scatter.legend_elements()[0], labels=classes, loc=7, bbox_to_anchor=(1.13, 0.5)) -plt.savefig('Image.png') +classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] +plt.legend( + handles=scatter.legend_elements()[0], + labels=classes, + loc=7, + bbox_to_anchor=(1.13, 0.5), +) +plt.savefig("Image.png") -#colors = ['red','green','blue','purple', 'yellow', 'blank','magenta','brown','grey', 'cyan'] -#colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] +# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] -#labels = pd.read_csv("mnist_784_Labels.csv") -#labels2 = np.float32(labels) -#plt.scatter(x,y,c=labels, cmap=matplotlib.colors.ListedColormap(colors)) -#plt.scatter(xx,yy,c=labels, cmap=matplotlib.colors.ListedColormap(colors)) -#plt.scatter(x,y, edgecolors='r') -#plt.xlim(-10, 10) -#plt.ylim(-10, 10) -#plt.savefig('myImage.png') -#plt.show() -#xx=np.float32(x) -#yy=np.float32(y) diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Initialization.cpp b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Initialization.cpp index e6c01353c..10b864389 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Initialization.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Initialization.cpp @@ -19,12 +19,12 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF /** * zero approximation in float precision */ - float epsilon=1e-6; + float epsilon=1e-6; /** - * By deafult, low-D space dimensions are between -10 and 10 + * By deafult, low-D space dimensions are between -10 and 10 */ int minDimLowDSpace=-10; - int maxDimLowDSpace=10; + int maxDimLowDSpace=10; if (!randominitializing){ try{ @@ -32,10 +32,10 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF cout<<" Spectral Initialization of Data in Lower Space"<::InnerIterator it(graphSM,k); it; ++it) { graph[it.row()][it.col()]=it.value(); } - } + } /** * Removing the small weights in accordance to https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1032 */ #pragma omp parallel for - for (int i = 0; i < N; ++i){ + for (int i = 0; i < N; ++i){ for (int j = 0; j < N; ++j){ - if (graph[i][j] < epsilon) continue; - if (graph[i][j] < MaxWeight/n_epochs) graph[i][j]=0; + if (graph[i][j] < epsilon) continue; + if (graph[i][j] < MaxWeight/n_epochs) graph[i][j]=0; } - } + } /** - * DegreeMatrix is a diagonal matrix contains information about the degree of each vertex + * DegreeMatrix is a diagonal matrix contains information about the degree of each vertex * sqrtDegreeMatrix transforms the diagonal values of DegreeMatrix by 1.0/sqrt() */ float** sqrtDegreeMatrix = new float*[N]; @@ -76,7 +76,7 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF for (int i = 0; i < N; ++i) { float sum=0; - for (int j = 0; j < N; ++j) { + for (int j = 0; j < N; ++j) { sum+=graph[i][j]; } sqrtDegreeMatrix[i][i]=1.0/sqrt(sum); @@ -84,10 +84,10 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF /** * aux_mem is the column-wise transformation of sqrtDegreeMatrix as needed by armadillo function fmat */ - float* aux_mem = new float[N*N]; + float* aux_mem = new float[N*N]; for (int i = 0; i < N; ++i){ for (int j = 0; j < N; ++j){ - aux_mem[j*N+i]=sqrtDegreeMatrix[i][j]; + aux_mem[j*N+i]=sqrtDegreeMatrix[i][j]; } } delete [] sqrtDegreeMatrix; @@ -95,11 +95,11 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF * Making an armadillo sparse matrix spmatDegreeMatrix from sqrtDegreeMatrix */ fmat matDegreeMatrix(aux_mem,N,N,false,true); - sp_fmat spmatDegreeMatrix(matDegreeMatrix); + sp_fmat spmatDegreeMatrix(matDegreeMatrix); /** * aux_mem2 is the column-wise transformation of adjacencyMatrix as needed by armadillo function fmat */ - float* aux_mem2 = new float[N*N]; + float* aux_mem2 = new float[N*N]; for (int i = 0; i < N; ++i){ for (int j = 0; j < N; ++j){ aux_mem2[j*N+i]=graph[i][j]; //column-wise @@ -114,12 +114,12 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF fmat matadjacencyMatrix(aux_mem2,N,N,false,true); sp_fmat spmatadjacencyMatrix(matadjacencyMatrix); /** - * Making an armadillo sparse matrix of identity - */ - sp_fmat Unity = speye(N,N); + * Making an armadillo sparse matrix of identity + */ + sp_fmat Unity = speye(N,N); /** - * Making an armadillo sparse matrix of Laplacian - */ + * Making an armadillo sparse matrix of Laplacian + */ sp_fmat laplacianMatrix; laplacianMatrix= Unity-spmatDegreeMatrix*spmatadjacencyMatrix*spmatDegreeMatrix; /** @@ -127,29 +127,29 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF */ fvec eigval; fmat eigvec; - eigs_sym(eigval, eigvec, laplacianMatrix, DimLowSpace+1 , "sm"); + eigs_sym(eigval, eigvec, laplacianMatrix, DimLowSpace+1 , "sm"); /** - * Converting eigenvectors to tmpvector + * Converting eigenvectors to tmpvector * will throw "error: Mat::col(): index out of bounds" if no eigvec was available */ typedef std::vector stdvec; std::vector< std::vector > tmpvector; for (int i = 1; i < DimLowSpace+1; ++i) { - stdvec vectest = arma::conv_to< stdvec >::from(eigvec.col(i)); - tmpvector.push_back(vectest); + stdvec vectest = arma::conv_to< stdvec >::from(eigvec.col(i)); + tmpvector.push_back(vectest); } /** * using tmpvector to intialize the locations of the points in low-D space * embedding should not be outside the chosen dimensions for low-D space */ double maxembedding=0; - for (int j = 0; j < DimLowSpace; ++j) { + for (int j = 0; j < DimLowSpace; ++j) { for (int i = 0; i < N; ++i) { double tmp=tmpvector[j][i]; embedding[i][j]= tmp; - if (abs(tmp) > maxembedding) maxembedding=tmp; + if (abs(tmp) > maxembedding) maxembedding=tmp; } } @@ -158,22 +158,22 @@ void Initialization (bool randominitializing, double** embedding, ofstream& logF // Also adding a noise as prescribed in https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1040 unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); std::default_random_engine generator (seed); - std::normal_distribution distribution (0.0,1.0); + std::normal_distribution distribution (0.0,1.0); - for (int i = 0; i < N; ++i) { - for (int j = 0; j < DimLowSpace; ++j) { + for (int i = 0; i < N; ++i) { + for (int j = 0; j < DimLowSpace; ++j) { embedding[i][j] =embedding[i][j]* expansion+ SCALE*distribution(generator); } } } catch(std::exception& e){ - logFile<<" Spectral Initialization Failed. Will proceed with random initialization."< #include -#include -#include -#include +#include +#include +#include #include #include #include #include #include -#include +#include #include "KNN_OpenMP_Code.h" -#include "Metrics.h" +#include "Metrics.h" #include -#include -#include +#include +#include using boost::iostreams::mapped_file_source; -using boost::iostreams::stream; +using boost::iostreams::stream; using namespace std; /** - * Read the output of linux command execution + * Read the output of linux command execution * @param cmd is the linux command to be executed * @return the output from the execution of the linux command */ @@ -40,21 +40,21 @@ std::string exec(const char* cmd) { /** * Compute K-NN following the algorithm for shared-memory K-NN * @param filePath The full path to the input file containig the dataset. - * @param N Size of Dataset without the header (i.e.(#Rows in dataset)-1). - * @param Dim Dimension of Dataset (#Columns) + * @param N Size of Dataset without the header (i.e.(#Rows in dataset)-1). + * @param Dim Dimension of Dataset (#Columns) * @param K the desired number of Nearest Neighbours to be computed * @param sampleRate The rate at which we do sampling * @param convThreshold Convergance Threshold - * @param logFile The errors and informational messages are outputted to the log file + * @param logFile The errors and informational messages are outputted to the log file * @param distanceMetric is the metric to compute the distance between the points in high-D space, by deafult should be euclidean * @param distanceV1 is the first optional variable needed for computing distance in some metrics - * @param distanceV2 is the second optional variable needed for computing distance in some metrics - * @param filePathOptionalArray The full path to optional array for the distance metric computation - * @return B_Index indices of K-NN for each data point - * @return B_Dist corresponding distance for K-NN indices stored in B_Index + * @param distanceV2 is the second optional variable needed for computing distance in some metrics + * @param filePathOptionalArray The full path to optional array for the distance metric computation + * @return B_Index indices of K-NN for each data point + * @return B_Dist corresponding distance for K-NN indices stored in B_Index */ //void computeKNNs(string filePath, const int N, const int Dim, const int K, float sampleRate, const int convThreshold,int** B_Index,double** B_Dist, ofstream& logFile){ -void computeKNNs(string filePath, const int N, const int Dim, const int K, float sampleRate, const int convThreshold,int** B_Index,double** B_Dist, ofstream& logFile, string distanceMetric, float distanceV1, float distanceV2, string filePathOptionalArray){ +void computeKNNs(string filePath, const int N, const int Dim, const int K, float sampleRate, const int convThreshold,int** B_Index,double** B_Dist, ofstream& logFile, string distanceMetric, float distanceV1, float distanceV2, string filePathOptionalArray){ logFile<<"------------Starting K-NN Solution------------"< -#include -#include -#include -#include -#include - -#include -using namespace std; - -struct LMFunctor -{ - // 'm' pairs of (x, f(x)) - Eigen::MatrixXf measuredValues; - - // Compute 'm' errors, one for each data point, for the given parameter values in 'x' - int operator()(const Eigen::VectorXf &x, Eigen::VectorXf &fvec) const - { - // 'x' has dimensions n x 1 - // It contains the current estimates for the parameters. - - // 'fvec' has dimensions m x 1 - // It will contain the error for each data point. - - float aParam = x(0); - float bParam = x(1); - - for (int i = 0; i < values(); i++) { - float xValue = measuredValues(i, 0); - float yValue = measuredValues(i, 1); - - fvec(i) = yValue - (1.0 / (1.0+ aParam * pow(xValue, 2*bParam)) ); - } - return 0; - } - - // Compute the jacobian of the errors - int df(const Eigen::VectorXf &x, Eigen::MatrixXf &fjac) const - { - // 'x' has dimensions n x 1 - // It contains the current estimates for the parameters. - - // 'fjac' has dimensions m x n - // It will contain the jacobian of the errors, calculated numerically in this case. - - float epsilon; - epsilon = 1e-5f; - - for (int i = 0; i < x.size(); i++) { - Eigen::VectorXf xPlus(x); - xPlus(i) += epsilon; - Eigen::VectorXf xMinus(x); - xMinus(i) -= epsilon; - - Eigen::VectorXf fvecPlus(values()); - operator()(xPlus, fvecPlus); - - Eigen::VectorXf fvecMinus(values()); - operator()(xMinus, fvecMinus); - - Eigen::VectorXf fvecDiff(values()); - fvecDiff = (fvecPlus - fvecMinus) / (2.0f * epsilon); - - fjac.block(0, i, values(), 1) = fvecDiff; - } - - return 0; - } - - // Number of data points, i.e. values. - int m; - - // Returns 'm', the number of values. - int values() const { return m; } - - // The number of parameters, i.e. inputs. - int n; - - // Returns 'n', the number of inputs. - int inputs() const { return n; } - -}; - - - -// -// Goal -// -// Given a non-linear equation: f(x) = 1.0/(1.0+a*pow(x,2*b)) -// and 'm' data points (x1, f(x1)), (x2, f(x2)), ..., (xm, f(xm)) -// our goal is to estimate 'n' parameters (2 in this case: a, b) -// using LM optimization. -// -void estimateParameters(float &a, float &b, float mindist, float spread, ofstream& logFile) -{ - - std::vector x_values; - std::vector y_values; - - /** - * The interval used for data fitting - * The values were adopted from https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1138 - */ - const float minInterval=0; - const float maxInterval=3*spread; - const int intervalCounts=300; - - - for (int i = 0; i mindist) y_values.push_back(exp((mindist-tmp)/spread)); - else { - logFile<< "Error: Negative x_values during Parameter Estimation"< lm(functor); - int status = lm.minimize(x); - logFile << "LM optimization status: " << status << std::endl; - cout << "LM optimization status: " << status << std::endl; - // - // Results - // The 'x' vector also contains the results of the optimization. - // - logFile << "Optimization results" << std::endl; - logFile << "\ta: " << x(0) << std::endl; - logFile << "\tb: " << x(1) << std::endl; - cout << "Optimization results" << std::endl; - cout << "\ta: " << x(0) << std::endl; - cout << "\tb: " << x(1) << std::endl; - - a=x(0); - b=x(1); - -} +// This section of program is the Levenberg-Marquardt solution to estimate 2 parameters of a and b. +//and was modified from this source: https://github.com/SarvagyaVaish/Eigen-Levenberg-Marquardt-Optimization + +#include +#include +#include +#include +#include +#include + +#include +using namespace std; + +struct LMFunctor +{ + // 'm' pairs of (x, f(x)) + Eigen::MatrixXf measuredValues; + + // Compute 'm' errors, one for each data point, for the given parameter values in 'x' + int operator()(const Eigen::VectorXf &x, Eigen::VectorXf &fvec) const + { + // 'x' has dimensions n x 1 + // It contains the current estimates for the parameters. + + // 'fvec' has dimensions m x 1 + // It will contain the error for each data point. + + float aParam = x(0); + float bParam = x(1); + + for (int i = 0; i < values(); i++) { + float xValue = measuredValues(i, 0); + float yValue = measuredValues(i, 1); + + fvec(i) = yValue - (1.0 / (1.0+ aParam * pow(xValue, 2*bParam)) ); + } + return 0; + } + + // Compute the jacobian of the errors + int df(const Eigen::VectorXf &x, Eigen::MatrixXf &fjac) const + { + // 'x' has dimensions n x 1 + // It contains the current estimates for the parameters. + + // 'fjac' has dimensions m x n + // It will contain the jacobian of the errors, calculated numerically in this case. + + float epsilon; + epsilon = 1e-5f; + + for (int i = 0; i < x.size(); i++) { + Eigen::VectorXf xPlus(x); + xPlus(i) += epsilon; + Eigen::VectorXf xMinus(x); + xMinus(i) -= epsilon; + + Eigen::VectorXf fvecPlus(values()); + operator()(xPlus, fvecPlus); + + Eigen::VectorXf fvecMinus(values()); + operator()(xMinus, fvecMinus); + + Eigen::VectorXf fvecDiff(values()); + fvecDiff = (fvecPlus - fvecMinus) / (2.0f * epsilon); + + fjac.block(0, i, values(), 1) = fvecDiff; + } + + return 0; + } + + // Number of data points, i.e. values. + int m; + + // Returns 'm', the number of values. + int values() const { return m; } + + // The number of parameters, i.e. inputs. + int n; + + // Returns 'n', the number of inputs. + int inputs() const { return n; } + +}; + + + +// +// Goal +// +// Given a non-linear equation: f(x) = 1.0/(1.0+a*pow(x,2*b)) +// and 'm' data points (x1, f(x1)), (x2, f(x2)), ..., (xm, f(xm)) +// our goal is to estimate 'n' parameters (2 in this case: a, b) +// using LM optimization. +// +void estimateParameters(float &a, float &b, float mindist, float spread, ofstream& logFile) +{ + + std::vector x_values; + std::vector y_values; + + /** + * The interval used for data fitting + * The values were adopted from https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1138 + */ + const float minInterval=0; + const float maxInterval=3*spread; + const int intervalCounts=300; + + + for (int i = 0; i mindist) y_values.push_back(exp((mindist-tmp)/spread)); + else { + logFile<< "Error: Negative x_values during Parameter Estimation"< lm(functor); + int status = lm.minimize(x); + logFile << "LM optimization status: " << status << std::endl; + cout << "LM optimization status: " << status << std::endl; + // + // Results + // The 'x' vector also contains the results of the optimization. + // + logFile << "Optimization results" << std::endl; + logFile << "\ta: " << x(0) << std::endl; + logFile << "\tb: " << x(1) << std::endl; + cout << "Optimization results" << std::endl; + cout << "\ta: " << x(0) << std::endl; + cout << "\tb: " << x(1) << std::endl; + + a=x(0); + b=x(1); + +} diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/LMOptimization.h b/dimension_reduction/UMAP/Shared-Memory-OpenMP/LMOptimization.h index 1a844b5f8..0630437a1 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/LMOptimization.h +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/LMOptimization.h @@ -2,4 +2,3 @@ using namespace std; void estimateParameters(float &a, float &b, float min_dist,float spread, ofstream& logFile); - diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.cpp b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.cpp index 0d6e8b753..60448adeb 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.cpp @@ -35,7 +35,7 @@ double log_beta(double x, double y){ double value = -log(b); for (int i = 1; i < int(a); ++i) value += log(i) - log(b + i); return value; - } + } else return approx_log_Gamma(x) + approx_log_Gamma(y) - approx_log_Gamma(x + y); } @@ -50,15 +50,15 @@ double log_single_beta(double x){ * @param *it and *it2 indices of the desired points in input dataset * @param Dim is #columns (or features) in input dataset * @param distanceV1 is the first optional variable needed for computing distance in some metrics - * @param distanceV2 is the second optional variable needed for computing distance in some metrics - * @param filePathOptionalArray The full path to optional array for the distance metric computation - * @param logFile The errors and informational messages are outputted to the log file - * @return spatial distance between points two points + * @param distanceV2 is the second optional variable needed for computing distance in some metrics + * @param filePathOptionalArray The full path to optional array for the distance metric computation + * @param logFile The errors and informational messages are outputted to the log file + * @return spatial distance between points two points */ double computeDistance (string distanceKeyword, double** dataPoints, int it, int it2, int Dim, float distanceV1, float distanceV2, string filePathOptionalArray, ofstream & logFile){ /** - * We first focus on computing the distance from a few Metrics that depend on input array from the user + * We first focus on computing the distance from a few Metrics that depend on input array from the user */ if (filePathOptionalArray !=""){ /** @@ -70,14 +70,14 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int if (RecordCounts != Dim) { logFile<<"ALERT: The Optional Vector has different length than the number of features in the input data set"< 0.9) self_denom1 += log_single_beta(dataPoints[it][i]); - if (dataPoints[it2][i] > 0.9) self_denom2 += log_single_beta(dataPoints[it2][i]); - } + if (dataPoints[it2][i] > 0.9) self_denom2 += log_single_beta(dataPoints[it2][i]); + } } return sqrt(1.0 / n2 * (log_b - log_beta(n1, n2) - (self_denom2 - log_single_beta(n2))) @@ -252,7 +252,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int } else if (distanceKeyword =="jaccard") { - int x_true, y_true, num_non_zero=0, num_equal=0; + int x_true, y_true, num_non_zero=0, num_equal=0; for (int i = 0; i < Dim; ++i) { if ( dataPoints[it][i] < epsilon) x_true=0; @@ -262,7 +262,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int else y_true=1; if (x_true==1 || y_true==1) ++num_non_zero; - if (x_true==1 && y_true==1) ++num_equal; + if (x_true==1 && y_true==1) ++num_equal; } if (num_non_zero == 0) return 0.0; @@ -279,7 +279,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int else y_true=1; if (x_true==1 && y_true==1) ++num_true_true; - if (x_true != y_true) ++num_not_equal; + if (x_true != y_true) ++num_not_equal; } if (num_not_equal==0) return 0.0; @@ -316,7 +316,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int for (int k = lo; k < hi; ++k) { result += k * log_lambda - poisson_lambda - log_k_factorial; log_k_factorial += log(k); - } + } return result/normalisation; } @@ -337,7 +337,7 @@ double computeDistance (string distanceKeyword, double** dataPoints, int it, int float minVal,deletion_cost,insertion_cost; for (int i=0; i max_distance) return float(max_distance)/normalisation; } - return v0[y_len] / normalisation; + return v0[y_len] / normalisation; } else { logFile << "Wrong input for metric name!" << endl; diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.h b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.h index 48456c88d..a8c4a02c7 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.h +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Metrics.h @@ -3,5 +3,3 @@ using namespace std; double computeDistance(string distanceKeyword, double** dataPoints, int it, int it2, int Dim, float distanceV1, float distanceV2, string filePathOptionalArray, ofstream & logFile); - - diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.cpp b/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.cpp index 2896d5884..43f4289a8 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.cpp @@ -1,11 +1,11 @@ -#include +#include using namespace std; /** * Standard clamping of a value into a fixed range (in this case -4.0 to 4.0) * This function is used in SGD solver - */ + */ double clip(double value){ const double clipLowVal=-4.0; @@ -22,14 +22,14 @@ double clip(double value){ /** * The squared distance between 2 points in the Low-D (embedded) space - */ + */ double rdist(double ** embedding, int Dim, int index1, int index2){ double dist_squared=0; - for (int j = 0; j < Dim; ++j) { + for (int j = 0; j < Dim; ++j) { dist_squared += pow(embedding[index1][j]-embedding[index2][j],2); - } + } - return dist_squared; + return dist_squared; } diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.h b/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.h index 00073240f..2e869d9be 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.h +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/SGD.h @@ -3,4 +3,3 @@ double clip(double value); double rdist( double ** embedding, int Dim, int index1, int index2); - diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.cpp b/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.cpp index ede5904ab..b13295a9c 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.cpp @@ -1,8 +1,8 @@ /** - * Extra Functions needed for computations + * Extra Functions needed for computations */ -#include +#include #include #include @@ -10,7 +10,7 @@ using namespace std; /** - * Compute B_Index and B_Dist for the closest points (K-NNs) + * Compute B_Index and B_Dist for the closest points (K-NNs) */ void findMin(int** B_Index,double** B_Dist, int N,int K,int* B_Index_Min,double* B_Dist_Min){ @@ -40,7 +40,7 @@ void findSigma(double ** B_Dist, double * B_Dist_Min, double * SigmaValues, int double target=log2(K); /** * Design Parameters to estimate SigmaValues - */ + */ const int iterations=640; const double Error=1e-5; @@ -74,7 +74,3 @@ void findSigma(double ** B_Dist, double * B_Dist_Min, double * SigmaValues, int SigmaValues[i] = sigma; } } - - - - diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.h b/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.h index 0120cf7d3..ad581dda5 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.h +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/highDComputes.h @@ -2,4 +2,3 @@ void findMin(int** B_Index,double** B_Dist, int N,int K,int* B_Index_Min,double* B_Dist_Min); void findSigma(double ** B_Dist, double * B_Dist_Min, double * SigmaValues, int N, int K); - diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/main.cpp b/dimension_reduction/UMAP/Shared-Memory-OpenMP/main.cpp index 678f6e373..d02e0e3b5 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/main.cpp +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/main.cpp @@ -1,15 +1,15 @@ /** * @author Mahdi Maghrebi - * This code is an implementation of UMAP algorithm for dimension reduction. + * This code is an implementation of UMAP algorithm for dimension reduction. * The reference paper is “UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction“, by McInnes et al., 2018 (https://arxiv.org/abs/1802.03426) * Jan 2020 */ #include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -31,7 +31,7 @@ using namespace Eigen; int main(int argc, char ** argv) { /** - * The errors and informational messages are outputted to the log file + * The errors and informational messages are outputted to the log file */ ofstream logFile; string logFileName="Setting.txt"; @@ -51,7 +51,7 @@ int main(int argc, char ** argv) { * distanceMetric is the metric to compute the distance between the points in high-D space, by deafult should be euclidean * distanceV1 is the first optional variable needed for computing distance in some metrics * distanceV2 is the second optional variable needed for computing distance in some metrics - * inputPathOptionalArray is the full path to the directory that contains a csv file of the optional array needed for computing distance in some metrics. + * inputPathOptionalArray is the full path to the directory that contains a csv file of the optional array needed for computing distance in some metrics. */ string filePath, filePathOptionalArray="", outputPath, LogoutputPath, inputPath; int K,DimLowSpace,nepochs; @@ -151,29 +151,29 @@ int main(int argc, char ** argv) { logFile<<"The full path to the input file: "<< filePath< N) { logFile<<" The desired number of NN has exceeded the size of dataset "< adjacencyMatrixA(N,N), adjacencyMatrixAT(N,N), graphSM(N,N); + SparseMatrix adjacencyMatrixA(N,N), adjacencyMatrixAT(N,N), graphSM(N,N); typedef Eigen::Triplet T; std::vector tripletList; tripletList.reserve(N*K); for (int i=0; i::InnerIterator it(graphSM,k); it; ++it) { - sum += it.value(); - if (it.value() > MaxWeight) MaxWeight=it.value(); + sum += it.value(); + if (it.value() > MaxWeight) MaxWeight=it.value(); } - } + } logFile<<"------------Setting Low-D Space Design------------"<::InnerIterator it(graphSM,k); it; ++it) { - if (it.value() < MaxWeight/nepochs) continue; + if (it.value() < MaxWeight/nepochs) continue; epochs_per_sample.push_back(MaxWeight/it.value()); head.push_back(it.col()); - tail.push_back(it.row()); + tail.push_back(it.row()); } } @@ -378,17 +378,17 @@ int main(int argc, char ** argv) { * This section was adopted from SGD implementation at https://github.com/lmcinnes/umap/blob/8f2ef23ec835cc5071fe6351a0da8313d8e75706/umap/layouts.py#L136 * edgeCounts is total number of edges in the high-D space graph * epoch_of_next_sample is an index of the epoch state of the edges. If it is less than epoch index, we will use the edge in the computation - * epoch_of_next_negative_sample is an index of the epoch state of the edges for sampling from non-connected surrounding points. - * negative_sample_rate is the rate at which we sample from the non-connected surrounding points as compared to the connected edges. + * epoch_of_next_negative_sample is an index of the epoch state of the edges for sampling from non-connected surrounding points. + * negative_sample_rate is the rate at which we sample from the non-connected surrounding points as compared to the connected edges. * Increasing this value will result in greater repulsive force being applied, greater optimization cost, but slightly more accuracy. - */ + */ int edgeCounts=epochs_per_sample.size(); const int negative_sample_rate=5; int n_neg_samples; //Substituting with Vectors due to Stacksize run-time error -// float epoch_of_next_sample[edgeCounts]; -// float epochs_per_negative_sample[edgeCounts]; -// float epoch_of_next_negative_sample[edgeCounts]; +// float epoch_of_next_sample[edgeCounts]; +// float epochs_per_negative_sample[edgeCounts]; +// float epoch_of_next_negative_sample[edgeCounts]; vector epoch_of_next_sample,epochs_per_negative_sample,epoch_of_next_negative_sample; for (int i = 0; i < edgeCounts; ++i) { @@ -398,77 +398,77 @@ int main(int argc, char ** argv) { epoch_of_next_sample.push_back(epochs_per_sample[i]); epochs_per_negative_sample.push_back(epochs_per_sample[i]/negative_sample_rate); epoch_of_next_negative_sample.push_back(epochs_per_negative_sample[i]); - } + } /** * move_other is equal to 1 if not embedding new previously unseen points to low-D space */ - const int move_other=1; + const int move_other=1; /** * dEpsilon is zero approximation in double precision - */ + */ const double dEpsilon=1e-14; double dist_squared; - // The main training loop + // The main training loop for (int n = 1; n < nepochs; ++n) { - //Loop over all edges of the graph + //Loop over all edges of the graph if (n%100 == 0){ logFile << "SGD iteration = "< 0) { embedding[headIndex][jj] += alpha*clip(grad_coeff*(embedding[headIndex][jj]-embedding[randomIndex][jj])); - } else { - embedding[headIndex][jj] += alpha*4.0; + } else { + embedding[headIndex][jj] += alpha*4.0; } - } - } - epoch_of_next_negative_sample[i] += (n_neg_samples * epochs_per_negative_sample[i]); - } - } - alpha=1.0-((float)n)/nepochs; + } + } + epoch_of_next_negative_sample[i] += (n_neg_samples * epochs_per_negative_sample[i]); + } + } + alpha=1.0-((float)n)/nepochs; } logFile<<"------------Starting Outputing the Results------------"<&1 /dev/null"; string outputCmd3 = exec(cmd3.c_str()); return 0; } - - - diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/plugin.json b/dimension_reduction/UMAP/Shared-Memory-OpenMP/plugin.json index c1cfbe573..c5c944754 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/plugin.json +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/plugin.json @@ -1,169 +1,169 @@ { - "name": "UMAP (Multi-Threaded)", - "version": "openmp-0.1.6", - "title": "UMAP (Multi-Threaded)", - "description": "UMAP Multi-Threaded (Shared-Memory) Code", - "author": "Mahdi Maghrebi", - "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", - "containerId": "labshare/polus-umap-plugin:openmp-0.1.6", - "inputs": [ - { - "name": "inputPath", - "type": "csvCollection", - "description": "Input csv file containing the raw data", - "required": "True" - }, - { - "name": "K", - "type": "number", - "description": "The desired number of Nearest Neighbors (NN) to be computed", - "required": "True" - }, - { - "name": "sampleRate", - "type": "number", - "description": "The rate at which the sampling is conducted. The values closer to 1 provides more accurate results but the execution takes longer.", - "required": "True" - }, - { - "name": "DimLowSpace", - "type": "number", - "description": "Dimension of the embedding space (usually 1-3)", - "required": "True" - }, - { - "name": "randomInitializing", - "type": "boolean", - "description": "The method for initialization of data in the embedding space", - "required": "True" - }, - { - "name": "nepochs", - "type": "number", - "description": "The number of training epochs", - "required": "True" - }, - { - "name": "mindist", - "type": "number", - "description": "The variable that controls how tight (to each other) the data are placed in the embedding space", - "required": "True" - }, - { - "name": "distanceMetric", - "type": "enum", - "options": { - "values": [ - "euclidean", - "manhattan", - "minkowski", - "cosine", - "correlation", - "braycurtis", - "lldirichlet", - "jaccard", - "dice", - "categoricaldistance", - "ordinaldistance", - "countdistance", - "levenshtein", - "standardisedEuclidean", - "weightedMinkowski", - "mahalanobis" - ] - }, - "description": "The metric to compute the distance in the original space", - "required": "True" - }, - { - "name": "distanceV1", - "type": "number", - "description": "The optional input needed for computation of some metrics", - "required": "False" - }, - { - "name": "distanceV2", - "type": "number", - "description": "The optional input needed for computation of some metrics", - "required": "False" - }, - { - "name": "inputPathOptionalArray", - "type": "csvCollection", - "description": "The optional csv file representing a vector needed in computation of some metrics", - "required": "False" - } - ], - "outputs": [ - { - "name": "outputPath", - "type": "csvCollection", - "description": "The full path to the output csv collection containing the coordinates of data in the embedding space" - } - ], - "ui": [ - { - "key": "inputs.inputPath", - "title": "Input CSV collection", - "description": "Insert the path to csv collection" - }, - { - "key": "inputs.K", - "title": "The desired number of Nearest Neighbours (NN) in the original space to be computed", - "description": "Insert an integer", - "default": 15 - }, - { - "key": "inputs.sampleRate", - "title": "Sampling Rate", - "description": "Insert a value between 0 and 1", - "default": 0.9 - }, - { - "key": "inputs.DimLowSpace", - "title": "Dimension of the embedding space", - "description": "Insert a value (usually 1-3)", - "default": 2 - }, - { - "key": "inputs.randomInitializing", - "title": "Random initialization in the embedded space?", - "default": true - }, - { - "key": "inputs.nepochs", - "title": "The number of training epochs", - "description": "Insert an integer (usually 200-500)", - "default": 500 - }, - { - "key": "inputs.mindist", - "title": "mindist", - "description": "Insert a value between 0 and 1", - "default": 0.01 - }, - { - "key": "inputs.distanceMetric", - "title": "The metric to compute the distance in the original space", - "description": "Select the metric" - }, - { - "key": "inputs.distanceV1", - "title": "The optional input #1 needed for the chosen metric", - "description": "Insert a value", - "condition": "model.inputs.distanceMetric==['weightedMinkowski','minkowski','ordinaldistance','countdistance','levenshtein']" - }, - { - "key": "inputs.distanceV2", - "title": "The optional input #2 needed for the chosen metric", - "description": "Insert a value", - "condition": "model.inputs.distanceMetric==['countdistance','levenshtein']" - }, - { - "key": "inputs.inputPathOptionalArray", - "title": "The optional csv collection representing a vector needed for the chosen metric", - "description": "Insert the Path to csv collection", - "condition": "model.inputs.distanceMetric==['standardisedEuclidean','weightedMinkowski','mahalanobis']" - } - ] + "name": "UMAP (Multi-Threaded)", + "version": "openmp-0.1.6", + "title": "UMAP (Multi-Threaded)", + "description": "UMAP Multi-Threaded (Shared-Memory) Code", + "author": "Mahdi Maghrebi", + "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", + "containerId": "labshare/polus-umap-plugin:openmp-0.1.6", + "inputs": [ + { + "name": "inputPath", + "type": "csvCollection", + "description": "Input csv file containing the raw data", + "required": "True" + }, + { + "name": "K", + "type": "number", + "description": "The desired number of Nearest Neighbors (NN) to be computed", + "required": "True" + }, + { + "name": "sampleRate", + "type": "number", + "description": "The rate at which the sampling is conducted. The values closer to 1 provides more accurate results but the execution takes longer.", + "required": "True" + }, + { + "name": "DimLowSpace", + "type": "number", + "description": "Dimension of the embedding space (usually 1-3)", + "required": "True" + }, + { + "name": "randomInitializing", + "type": "boolean", + "description": "The method for initialization of data in the embedding space", + "required": "True" + }, + { + "name": "nepochs", + "type": "number", + "description": "The number of training epochs", + "required": "True" + }, + { + "name": "mindist", + "type": "number", + "description": "The variable that controls how tight (to each other) the data are placed in the embedding space", + "required": "True" + }, + { + "name": "distanceMetric", + "type": "enum", + "options": { + "values": [ + "euclidean", + "manhattan", + "minkowski", + "cosine", + "correlation", + "braycurtis", + "lldirichlet", + "jaccard", + "dice", + "categoricaldistance", + "ordinaldistance", + "countdistance", + "levenshtein", + "standardisedEuclidean", + "weightedMinkowski", + "mahalanobis" + ] + }, + "description": "The metric to compute the distance in the original space", + "required": "True" + }, + { + "name": "distanceV1", + "type": "number", + "description": "The optional input needed for computation of some metrics", + "required": "False" + }, + { + "name": "distanceV2", + "type": "number", + "description": "The optional input needed for computation of some metrics", + "required": "False" + }, + { + "name": "inputPathOptionalArray", + "type": "csvCollection", + "description": "The optional csv file representing a vector needed in computation of some metrics", + "required": "False" + } + ], + "outputs": [ + { + "name": "outputPath", + "type": "csvCollection", + "description": "The full path to the output csv collection containing the coordinates of data in the embedding space" + } + ], + "ui": [ + { + "key": "inputs.inputPath", + "title": "Input CSV collection", + "description": "Insert the path to csv collection" + }, + { + "key": "inputs.K", + "title": "The desired number of Nearest Neighbours (NN) in the original space to be computed", + "description": "Insert an integer", + "default": 15 + }, + { + "key": "inputs.sampleRate", + "title": "Sampling Rate", + "description": "Insert a value between 0 and 1", + "default": 0.9 + }, + { + "key": "inputs.DimLowSpace", + "title": "Dimension of the embedding space", + "description": "Insert a value (usually 1-3)", + "default": 2 + }, + { + "key": "inputs.randomInitializing", + "title": "Random initialization in the embedded space?", + "default": true + }, + { + "key": "inputs.nepochs", + "title": "The number of training epochs", + "description": "Insert an integer (usually 200-500)", + "default": 500 + }, + { + "key": "inputs.mindist", + "title": "mindist", + "description": "Insert a value between 0 and 1", + "default": 0.01 + }, + { + "key": "inputs.distanceMetric", + "title": "The metric to compute the distance in the original space", + "description": "Select the metric" + }, + { + "key": "inputs.distanceV1", + "title": "The optional input #1 needed for the chosen metric", + "description": "Insert a value", + "condition": "model.inputs.distanceMetric==['weightedMinkowski','minkowski','ordinaldistance','countdistance','levenshtein']" + }, + { + "key": "inputs.distanceV2", + "title": "The optional input #2 needed for the chosen metric", + "description": "Insert a value", + "condition": "model.inputs.distanceMetric==['countdistance','levenshtein']" + }, + { + "key": "inputs.inputPathOptionalArray", + "title": "The optional csv collection representing a vector needed for the chosen metric", + "description": "Insert the Path to csv collection", + "condition": "model.inputs.distanceMetric==['standardisedEuclidean','weightedMinkowski','mahalanobis']" + } + ] } diff --git a/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/__main__.py b/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/__main__.py index beeb3e955..7727448aa 100644 --- a/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/__main__.py +++ b/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/__main__.py @@ -2,14 +2,13 @@ import json import logging import pathlib -from typing import Any, Optional +from typing import Any +from typing import Optional import filepattern as fp import typer -from polus.images.features.pixel_segmentation_eval.evaluate import ( - evaluation, - POLUS_TAB_EXT -) +from polus.images.features.pixel_segmentation_eval.evaluate import POLUS_TAB_EXT +from polus.images.features.pixel_segmentation_eval.evaluate import evaluation # Initialize the logger logging.basicConfig( @@ -36,7 +35,7 @@ def main( ), input_classes: int = typer.Option(1, "--inputClasses", help="Number of Classes"), file_pattern: Optional[str] = typer.Option( - ".+", "--filePattern", help="Filename pattern to filter data." + ".+", "--filePattern", help="Filename pattern to filter data.", ), individual_stats: Optional[bool] = typer.Option( False, @@ -50,7 +49,7 @@ def main( ), out_dir: pathlib.Path = typer.Option(..., "--outDir", help="Output collection"), preview: Optional[bool] = typer.Option( - False, "--preview", help="Output a JSON preview of files" + False, "--preview", help="Output a JSON preview of files", ), ) -> None: """To generate evaluation metrics for pixel-wise comparison of ground truth and predicted images.""" @@ -68,7 +67,7 @@ def main( assert ( gt_dir.exists() - ), f"{gt_dir} does not exist!! Please check input path again" # noqa + ), f"{gt_dir} does not exist!! Please check input path again" assert ( pred_dir.exists() ), f"{pred_dir} does not exist!! Please check input path again" @@ -78,7 +77,6 @@ def main( fps = fp.FilePattern(pred_dir, file_pattern) - if preview: with open(pathlib.Path(out_dir, "preview.json"), "w") as jfile: out_json: dict[str, Any] = { diff --git a/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/evaluate.py b/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/evaluate.py index 24d8d6018..a906340bd 100644 --- a/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/evaluate.py +++ b/features/pixel-segmentation-eval-tool/src/polus/images/features/pixel_segmentation_eval/evaluate.py @@ -3,8 +3,10 @@ import math import os import pathlib +from collections.abc import Sequence from multiprocessing import cpu_count -from typing import List, Optional, Sequence, Union +from typing import Optional +from typing import Union import filepattern import numpy as np @@ -84,7 +86,7 @@ def metrics( - tp: Union[float, int], fp: int, fn: int, tn: Union[float, int] + tp: Union[float, int], fp: int, fn: int, tn: Union[float, int], ) -> Sequence[float]: """Compute evaluation metrics. @@ -207,7 +209,7 @@ def metrics( def write_outfile( x, - header: List[str], + header: list[str], out_name: pathlib.Path, chunk_size: int, ) -> None: @@ -267,153 +269,133 @@ def evaluation( tile_grid_size = 1 tile_size = tile_grid_size * 2048 # Set up the BioReader - with BioReader(file_name, max_workers=cpu_count()) as br_pred: - with BioReader( - pathlib.Path(gt_dir, file_name.name), max_workers=cpu_count() - ) as br_gt: - # Loop through z-slices - logger.info(f"Evaluating image {file_name}") - for cl in range(1, input_classes + 1): - tn = 0 - tp = 0 - fp = 0 - fn = 0 - for z in range(br_gt.Z): - # Loop across the length of the image - for y in range(0, br_gt.Y, tile_size): - y_max = min([br_gt.Y, y + tile_size]) - # Loop across the depth of the image - for x in range(0, br_gt.X, tile_size): - x_max = min([br_gt.X, x + tile_size]) - y_true = np.squeeze( - br_gt[y:y_max, x:x_max, z : z + 1, 0, 0] # noqa - ) - y_pred = np.squeeze( - br_pred[ - y:y_max, x:x_max, z : z + 1, 0, 0 # noqa - ] # noqa - ) - if input_classes == 1: - y_true = ( - (y_true > 0).astype("uint8") * 1 - ).flatten() - y_pred = ( - (y_pred > 0).astype("uint8") * 1 - ).flatten() - else: - y_true = y_true.flatten() - y_pred = y_pred.flatten() + with BioReader(file_name, max_workers=cpu_count()) as br_pred, BioReader( + pathlib.Path(gt_dir, file_name.name), max_workers=cpu_count(), + ) as br_gt: + # Loop through z-slices + logger.info(f"Evaluating image {file_name}") + for cl in range(1, input_classes + 1): + tn = 0 + tp = 0 + fp = 0 + fn = 0 + for z in range(br_gt.Z): + # Loop across the length of the image + for y in range(0, br_gt.Y, tile_size): + y_max = min([br_gt.Y, y + tile_size]) + # Loop across the depth of the image + for x in range(0, br_gt.X, tile_size): + x_max = min([br_gt.X, x + tile_size]) + y_true = np.squeeze( + br_gt[y:y_max, x:x_max, z : z + 1, 0, 0] # noqa + ) + y_pred = np.squeeze( + br_pred[ + y:y_max, x:x_max, z : z + 1, 0, 0 # noqa + ] # noqa + ) + if input_classes == 1: + y_true = ( + (y_true > 0).astype("uint8") * 1 + ).flatten() + y_pred = ( + (y_pred > 0).astype("uint8") * 1 + ).flatten() + else: + y_true = y_true.flatten() + y_pred = y_pred.flatten() - for i in range(len(y_true)): - if y_true[i] == cl: - if y_true[i] == y_pred[i]: - tp += 1 - else: - fn += 1 + for i in range(len(y_true)): + if y_true[i] == cl: + if y_true[i] == y_pred[i]: + tp += 1 else: - if y_pred[i] == cl: - fp += 1 - else: - tn += 1 - - if tp == 0: - tp_ = 1e-20 - else: - tp_ = tp - if tn == 0: - tn_ = 1e-20 - else: - tn_ = tn - ( - iou, - tpr, - precision, - tnr, - npv, - fnr, - fpr, - fdr, - fr, - prev, - accuracy, - ba, - fscore, - f1_score, - pt, - mcc, - fmi, - bi, - mkn, - ck, - mm, - amm, - ari, - ) = metrics(tp_, fp, fn, tn_) - data = [ - file_name.name, - cl, - tp, - tn, - fp, - fn, - iou, - tpr, - precision, - tnr, - npv, - fnr, - fpr, - fdr, - fr, - prev, - accuracy, - ba, - fscore, - f1_score, - pt, - mcc, - fmi, - bi, - mkn, - ck, - mm, - amm, - ari, - ] + fn += 1 + else: + if y_pred[i] == cl: + fp += 1 + else: + tn += 1 - if individual_stats: - individual_file = pathlib.Path( - out_dir, f"{file_name.name}{POLUS_TAB_EXT}" - ) - write_outfile( - data, - header, - individual_file, - chunk_size - ) + tp_ = 1e-20 if tp == 0 else tp + tn_ = 1e-20 if tn == 0 else tn + ( + iou, + tpr, + precision, + tnr, + npv, + fnr, + fpr, + fdr, + fr, + prev, + accuracy, + ba, + fscore, + f1_score, + pt, + mcc, + fmi, + bi, + mkn, + ck, + mm, + amm, + ari, + ) = metrics(tp_, fp, fn, tn_) + data = [ + file_name.name, + cl, + tp, + tn, + fp, + fn, + iou, + tpr, + precision, + tnr, + npv, + fnr, + fpr, + fdr, + fr, + prev, + accuracy, + ba, + fscore, + f1_score, + pt, + mcc, + fmi, + bi, + mkn, + ck, + mm, + amm, + ari, + ] - result.append(data) - filename = pathlib.Path(out_dir, f"result{POLUS_TAB_EXT}") - write_outfile( - result, header, filename, chunk_size + if individual_stats: + individual_file = pathlib.Path( + out_dir, f"{file_name.name}{POLUS_TAB_EXT}", ) + write_outfile(data, header, individual_file, chunk_size) + + result.append(data) + filename = pathlib.Path(out_dir, f"result{POLUS_TAB_EXT}") + write_outfile(result, header, filename, chunk_size) - if total_stats: - TP[cl] += tp - TN[cl] += tn - FP[cl] += fp - FN[cl] += fn + if total_stats: + TP[cl] += tp + TN[cl] += tn + FP[cl] += fp + FN[cl] += fn if total_stats: for cl in range(1, input_classes + 1): - if TP[cl] == 0: - TP_ = 1e-20 - else: - TP_ = TP[cl] - if TN[cl] == 0: - TN_ = 1e-20 - else: - TN_ = TN[cl] + TP_ = 1e-20 if TP[cl] == 0 else TP[cl] + TN_ = 1e-20 if TN[cl] == 0 else TN[cl] ( iou, tpr, @@ -471,9 +453,7 @@ def evaluation( ] overall_file = pathlib.Path(out_dir, f"total_stats_result{POLUS_TAB_EXT}") - write_outfile( - data, totalStats_header, overall_file, chunk_size - ) + write_outfile(data, totalStats_header, overall_file, chunk_size) logger.info(f"total_stats_result{POLUS_TAB_EXT}") finally: diff --git a/features/polus-feature-extraction-plugin/Dockerfile b/features/polus-feature-extraction-plugin/Dockerfile index bdf517b4c..07612f271 100644 --- a/features/polus-feature-extraction-plugin/Dockerfile +++ b/features/polus-feature-extraction-plugin/Dockerfile @@ -1,7 +1,7 @@ FROM polusai/bfio:2.1.9 COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -17,4 +17,4 @@ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir WORKDIR ${EXEC_DIR} -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/features/polus-feature-extraction-plugin/README.md b/features/polus-feature-extraction-plugin/README.md index 14b2c79f0..f0b0cd088 100644 --- a/features/polus-feature-extraction-plugin/README.md +++ b/features/polus-feature-extraction-plugin/README.md @@ -2,60 +2,60 @@ The feature extraction plugin extracts shape and intensity based features from images and outputs csv file.The input image should be in OME tiled tiff format. These are the features that can be extracted from this plugin: - 1. Area - + 1. Area - Number of pixels in the region. - 2. Perimeter - + 2. Perimeter - The length of the outside boundary of the region. - 3. Orientation - + 3. Orientation - Angle between the 0th axis and the major axis of the ellipse that has same second moments as the region. - 4. Convex area - + 4. Convex area - Number of pixels of convex hull image. - 5. Eccentricity - + 5. Eccentricity - Ratio of focal distance over the major axis length. - 6. Equivalent diameter - + 6. Equivalent diameter - The diameter of a circle with the same area as the region. - 7. Solidity - + 7. Solidity - Ratio of pixels in the region to pixels of convex hull image. - 8. Centroid - + 8. Centroid - The center point of the region. Centroid x and y indicates the (x,y) coordinates. - 9. Neighbors - + 9. Neighbors - The number of neighbors touching the object. - 10. Maximum feret - + 10. Maximum feret - The longest distance between any two points in the region (maximum caliber diameter) is calculated. The feret diameter for number of angles (0-180 degrees) are calculated and their maximum is selected. - 11. Minimum feret - + 11. Minimum feret - The minimum caliber diameter is calculated. The feret diameter for number of angles (0-180 degrees) are calculated and their minimum is selected. - 12. Polygonality score - + 12. Polygonality score - The score ranges from -infinity to 10. Score 10 indicates the object shape is polygon and score -infinity indicates the object shape is not polygon. - 13. Hexagonality score - + 13. Hexagonality score - The score ranges from -infinity to 10. Score 10 indicates the object shape is hexagon and score -infinity indicates the object shape is not hexagon. - 14. Hexagonality standard deviation - + 14. Hexagonality standard deviation - Dispersion of hexagonality_score relative to its mean. - 15. Euler number - + 15. Euler number - Euler characteristic of the region. - 16. Major axis length - + 16. Major axis length - The length of major axis of the ellipse that has the same normalized second central moments as the region. - 17. Minor axis length - + 17. Minor axis length - The length of minor axis of the ellipse that has the same normalized second central moments as the region. - 18. Bounding Box - + 18. Bounding Box - Position and size of the smallest box containing the region. - Bounding box xmin and ymin indicates the (x,y) coordinates.Bounding box width and height indicates the width and height of the box respectively. - 18. Mean intensity - + Bounding box xmin and ymin indicates the (x,y) coordinates.Bounding box width and height indicates the width and height of the box respectively. + 18. Mean intensity - Mean intensity value of the region. - 19. Median - + 19. Median - The median value of pixels in the region. - 20. Mode - + 20. Mode - The mode value of pixels in the region. - 21. Maximum intensity - + 21. Maximum intensity - Maximum intensity value in the region. - 22. Minimum intensity - + 22. Minimum intensity - Minimum intensity value in the region. - 23. Skewness - + 23. Skewness - The third order moment about the mean. - 24. Kurtosis - + 24. Kurtosis - The fourth order moment about the mean. - 25. Entropy - + 25. Entropy - Entropy is a measure of randomness. It is the amount of information in the region. - 26. Standard deviation - + 26. Standard deviation - Dispersion of image gray level intensities The features are calculated using scikit-image (https://scikit-image.org/docs/dev/api/skimage.measure.html#skimage.measure.regionprops). @@ -74,14 +74,14 @@ Filepattern will sort alphabetically the files in the labeled image folder and i If specific pattern is mentioned as input, then filepattern will get matches from labeled image folder and intensity image folder based on that pattern. ### Pixel distance: -Enter value for this parameter if neighbors touching cells needs to be calculated. The default value is 5. This is an optional parameter. +Enter value for this parameter if neighbors touching cells needs to be calculated. The default value is 5. This is an optional parameter. ### Features: Choose the features that need to be extracted. Multiple features can be selected. If all the 26 features are required, then choose ‘all’ option. ### Csvfile: There are 2 options available under this category. -Separatecsv - Allows to save all the features extracted for each image in separate csv file. +Separatecsv - Allows to save all the features extracted for each image in separate csv file. Singlecsv - Allows to save all the features extracted from all the images in the same csv file. ### Embedded pixel size: diff --git a/features/polus-feature-extraction-plugin/VERSION b/features/polus-feature-extraction-plugin/VERSION index e96a87111..26acbf080 100644 --- a/features/polus-feature-extraction-plugin/VERSION +++ b/features/polus-feature-extraction-plugin/VERSION @@ -1 +1 @@ -0.12.2 \ No newline at end of file +0.12.2 diff --git a/features/polus-feature-extraction-plugin/build-docker.sh b/features/polus-feature-extraction-plugin/build-docker.sh index 98c48be07..64cd72071 100644 --- a/features/polus-feature-extraction-plugin/build-docker.sh +++ b/features/polus-feature-extraction-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$(=0 and k>=0)] + pypdd = [int(py - pixeldistance) for py in pyy] + pxpdd = [int(px - pixeldistance) for px in pxx] + pypd_rnn = [int(py + pixeldistance) for py in pyy] + pxpd_rnn = [int(px + pixeldistance) for px in pxx] + cand = [ + (l, k) + for pypd, pypd_rn, pxpd, pxpd_rn in zip(pypdd, pypd_rnn, pxpdd, pxpd_rnn) + for l in range(pypd, pypd_rn + 1) + for k in range(pxpd, pxpd_rn + 1) + ] + values = [ + lbl_img[l][k] + for l, k in cand + if (l < shape_img[0] and k < shape_img[1] and l >= 0 and k >= 0) + ] nei.append(values) - #Get list of number of neighbors - uniq_nei=np.unique(nei) - num_nei.append(len(uniq_nei)-2) - nei=[] - num_nei = num_nei[0] - return num_nei + # Get list of number of neighbors + uniq_nei = np.unique(nei) + num_nei.append(len(uniq_nei) - 2) + nei = [] + return num_nei[0] + def feret_diameter(lbl_img, boxsize, thetastart, thetastop): """Calculate the maximum caliper diamter and minimum caliper diameter of an object at angle(1-180degrees). @@ -183,18 +250,18 @@ def feret_diameter(lbl_img, boxsize, thetastart, thetastop): An array with feret diameters of the corresponding objects at each of the angles in theta. """ - counts_scalar_copy=None + counts_scalar_copy = None - #Convert to radians + # Convert to radians theta = np.arange(thetastart, thetastop + 1) theta = np.asarray(theta) theta = np.radians(theta) - #Get perimeter of objects + # Get perimeter of objects obj_edges = box_border_search(lbl_img, boxsize=3) - #Get indices and label of all pixels - obj_edges_flat = obj_edges.flatten(order = 'F') + # Get indices and label of all pixels + obj_edges_flat = obj_edges.flatten(order="F") obj_edges_reshape = obj_edges_flat.reshape(-1, 1) objnum = obj_edges_reshape[obj_edges_reshape != 0] obj_edges_transpose = obj_edges.T @@ -205,18 +272,34 @@ def feret_diameter(lbl_img, boxsize, thetastart, thetastop): index = list(range(len(objnum))) index = np.asarray(index).reshape(objnum.shape) stack_index_objnum = np.column_stack((index, objnum)) - del obj_edges_flat, obj_edges_reshape, objnum, index, obj_edges, positionx, obj_edges_transpose, positiony - - #Sort pixels by label + del ( + obj_edges_flat, + obj_edges_reshape, + objnum, + index, + obj_edges, + positionx, + obj_edges_transpose, + positiony, + ) + + # Sort pixels by label sort_index_objnum = sorted(stack_index_objnum, key=itemgetter(1)) index_objnum_array = np.asarray(sort_index_objnum) index_split = index_objnum_array[:, 0] objnum_split = index_objnum_array[:, 1] positionx_index = positionx_reshape[index_split] positiony_index = positiony_reshape[index_split] - del positiony_reshape, index_split, stack_index_objnum, sort_index_objnum, index_objnum_array, positionx_reshape - - #Get number of pixels for each object + del ( + positiony_reshape, + index_split, + stack_index_objnum, + sort_index_objnum, + index_objnum_array, + positionx_reshape, + ) + + # Get number of pixels for each object objnum_reshape = np.asarray(objnum_split).reshape(-1, 1) difference_objnum = np.diff(objnum_reshape, axis=0) stack_objnum = np.vstack((1, difference_objnum, 1)) @@ -224,11 +307,20 @@ def feret_diameter(lbl_img, boxsize, thetastart, thetastop): objbounds_array = np.asarray(objbounds) objbounds_split = objbounds_array[0, :] objbounds_reshape = objbounds_split.reshape(-1, 1) - objbounds_counts = objbounds_reshape[1:]-objbounds_reshape[:-1] - del objnum_split, difference_objnum, stack_objnum, objbounds, objbounds_array, objbounds_split, objnum_reshape, objbounds_reshape + objbounds_counts = objbounds_reshape[1:] - objbounds_reshape[:-1] + del ( + objnum_split, + difference_objnum, + stack_objnum, + objbounds, + objbounds_array, + objbounds_split, + objnum_reshape, + objbounds_reshape, + ) uniqueindices_list = [] - #Create cell with x, y positions of each objects border + # Create cell with x, y positions of each objects border for counts in objbounds_counts: counts_scalar = np.asscalar(counts) if counts_scalar == objbounds_counts[0]: @@ -237,24 +329,32 @@ def feret_diameter(lbl_img, boxsize, thetastart, thetastop): counts_scalar_copy = counts_scalar if counts_scalar != objbounds_counts[0]: index_range = counts_scalar_copy + counts_scalar - uniqueindices_x = positionx_index[counts_scalar_copy: index_range] - uniqueindices_y = positiony_index[counts_scalar_copy: index_range] + uniqueindices_x = positionx_index[counts_scalar_copy:index_range] + uniqueindices_y = positiony_index[counts_scalar_copy:index_range] counts_scalar_copy = index_range uniqueindices_x_reshape = uniqueindices_x.reshape(-1, 1) uniqueindices_y_reshape = uniqueindices_y.reshape(-1, 1) - uniqueindices_concate = np.concatenate((uniqueindices_x_reshape, uniqueindices_y_reshape), axis=1) + uniqueindices_concate = np.concatenate( + (uniqueindices_x_reshape, uniqueindices_y_reshape), axis=1, + ) uniqueindices_list.append(uniqueindices_concate) - del uniqueindices_concate, uniqueindices_x, uniqueindices_y, uniqueindices_x_reshape, uniqueindices_y_reshape - - #Center points based on object centroid + del ( + uniqueindices_concate, + uniqueindices_x, + uniqueindices_y, + uniqueindices_x_reshape, + uniqueindices_y_reshape, + ) + + # Center points based on object centroid uniqueindices_array = np.asarray(uniqueindices_list) meanind_list = [] for indices in uniqueindices_array: - repitations= (len(indices), 2) + repitations = (len(indices), 2) sum_indices0 = np.sum(indices[:, 0]) sum_indices1 = np.sum(indices[:, 1]) - length_indices0 =(sum_indices0 / len(indices)) - length_indices1 =(sum_indices1 / len(indices)) + length_indices0 = sum_indices0 / len(indices) + length_indices1 = sum_indices1 / len(indices) mean_tile0 = np.tile(length_indices0, repitations) sub_mean0_indices = np.subtract(indices, mean_tile0) sub_mean0_indices = sub_mean0_indices[:, 0] @@ -265,18 +365,30 @@ def feret_diameter(lbl_img, boxsize, thetastart, thetastop): meanind1_reshape = sub_mean1_indices.reshape(-1, 1) meanind_concate = np.concatenate((meanind0_reshape, meanind1_reshape), axis=1) meanind_list.append(meanind_concate) - del meanind_concate, sum_indices0, sum_indices1, length_indices0, mean_tile0, repitations, length_indices1, indices, mean_tile1, sub_mean0_indices, sub_mean1_indices - del uniqueindices_array + del ( + meanind_concate, + sum_indices0, + sum_indices1, + length_indices0, + mean_tile0, + repitations, + length_indices1, + indices, + mean_tile1, + sub_mean0_indices, + sub_mean1_indices, + ) + del uniqueindices_array center_point = np.asarray(meanind_list) - #Create transformation matrix + # Create transformation matrix rot_trans = np.array((np.cos(theta), -np.sin(theta))) rot_trans = rot_trans.T rot_list = [] - rot_position=[] - sub_rot_list=[] + rot_position = [] + sub_rot_list = [] - #Calculate rotation positions + # Calculate rotation positions for point in center_point: rot_position.clear() for rotation in rot_trans: @@ -290,10 +402,10 @@ def feret_diameter(lbl_img, boxsize, thetastart, thetastop): del point, center_point feretdiam = [] - #Get Ferets diameter + # Get Ferets diameter for rot in rot_list: sub_rot_list.clear() - for rt,trans in zip(rot, rot_trans): + for rt, trans in zip(rot, rot_trans): sub_rot = np.subtract(np.max(rt), np.min(rt)) sub_rot_add = np.add(sub_rot, np.sum(abs(trans))) sub_rot_list.append(sub_rot_add) @@ -307,6 +419,7 @@ def feret_diameter(lbl_img, boxsize, thetastart, thetastop): del feretdiam, rot_list, theta, rot return feret_diameter + def polygonality_hexagonality(area, perimeter, neighbors, solidity, maxferet, minferet): """Calculate the polygonality score, hexagonality score and hexagonality standard deviation of object n. @@ -324,13 +437,13 @@ def polygonality_hexagonality(area, perimeter, neighbors, solidity, maxferet, mi The dispersion of hexagonality_score relative to its mean. """ - area_list=[] - perim_list=[] + area_list = [] + perim_list = [] - #Calculate area hull + # Calculate area hull area_hull = area / solidity - #Calculate Perimeter hull + # Calculate Perimeter hull perim_hull = 6 * math.sqrt(area_hull / (1.5 * math.sqrt(3))) if neighbors == 0: @@ -338,15 +451,67 @@ def polygonality_hexagonality(area, perimeter, neighbors, solidity, maxferet, mi elif neighbors > 0: perimeter_neighbors = perimeter / neighbors - #Polygonality metrics calculated based on the number of sides of the polygon + # Polygonality metrics calculated based on the number of sides of the polygon if neighbors > 2: - poly_size_ratio = 1 - math.sqrt((1 - (perimeter_neighbors / (math.sqrt((4 * area) / (neighbors * (1 / (math.tan(math.pi / neighbors)))))))) * (1 -(perimeter_neighbors / (math.sqrt(( 4 * area) / (neighbors * (1 / (math.tan(math.pi / neighbors))))))))) - poly_area_ratio = 1 - math.sqrt((1 - (area / (0.25 * neighbors * perimeter_neighbors * perimeter_neighbors * (1 / (math.tan(math.pi / neighbors)))))) * (1 - (area / (0.25 * neighbors * perimeter_neighbors * perimeter_neighbors * (1 / (math.tan(math.pi / neighbors))))))) - - #Calculate Polygonality Score + poly_size_ratio = 1 - math.sqrt( + ( + 1 + - ( + perimeter_neighbors + / ( + math.sqrt( + (4 * area) + / (neighbors * (1 / (math.tan(math.pi / neighbors)))), + ) + ) + ) + ) + * ( + 1 + - ( + perimeter_neighbors + / ( + math.sqrt( + (4 * area) + / (neighbors * (1 / (math.tan(math.pi / neighbors)))), + ) + ) + ) + ), + ) + poly_area_ratio = 1 - math.sqrt( + ( + 1 + - ( + area + / ( + 0.25 + * neighbors + * perimeter_neighbors + * perimeter_neighbors + * (1 / (math.tan(math.pi / neighbors))) + ) + ) + ) + * ( + 1 + - ( + area + / ( + 0.25 + * neighbors + * perimeter_neighbors + * perimeter_neighbors + * (1 / (math.tan(math.pi / neighbors))) + ) + ) + ), + ) + + # Calculate Polygonality Score poly_ave = 10 * (poly_size_ratio + poly_area_ratio) / 2 - #Hexagonality metrics calculated based on a convex, regular, hexagon + # Hexagonality metrics calculated based on a convex, regular, hexagon apoth1 = math.sqrt(3) * perimeter / 12 apoth2 = math.sqrt(3) * maxferet / 4 apoth3 = minferet / 2 @@ -355,7 +520,7 @@ def polygonality_hexagonality(area, perimeter, neighbors, solidity, maxferet, mi side3 = minferet / math.sqrt(3) side4 = perim_hull / 6 - #Unique area calculations from the derived and primary measures above + # Unique area calculations from the derived and primary measures above area1 = 0.5 * (3 * math.sqrt(3)) * side1 * side1 area2 = 0.5 * (3 * math.sqrt(3)) * side2 * side2 area3 = 0.5 * (3 * math.sqrt(3)) * side3 * side3 @@ -368,28 +533,43 @@ def polygonality_hexagonality(area, perimeter, neighbors, solidity, maxferet, mi area10 = area_hull area11 = area - #Create an array of all unique areas - list_area=[area1, area2, area3, area4, area5, area6, area7, area8, area9, area10, area11] + # Create an array of all unique areas + list_area = [ + area1, + area2, + area3, + area4, + area5, + area6, + area7, + area8, + area9, + area10, + area11, + ] area_uniq = np.asarray(list_area, dtype=float) - #Create an array of the ratio of all areas to eachother - for ib in range (0, len(area_uniq)): - for ic in range (ib + 1, len(area_uniq)): - area_ratio = 1 - math.sqrt((1 - (area_uniq[ib] / area_uniq[ic])) * (1 - (area_uniq[ib] / area_uniq[ic]))) + # Create an array of the ratio of all areas to eachother + for ib in range(0, len(area_uniq)): + for ic in range(ib + 1, len(area_uniq)): + area_ratio = 1 - math.sqrt( + (1 - (area_uniq[ib] / area_uniq[ic])) + * (1 - (area_uniq[ib] / area_uniq[ic])), + ) area_list.append(area_ratio) area_array = np.asarray(area_list) stat_value_area = stats.describe(area_array) del area_uniq, list_area, area_array, area_list - #Create Summary statistics of all array ratios + # Create Summary statistics of all array ratios area_ratio_ave = stat_value_area.mean area_ratio_sd = math.sqrt(stat_value_area.variance) - #Set the hexagon area ratio equal to the average Area Ratio + # Set the hexagon area ratio equal to the average Area Ratio hex_area_ratio = area_ratio_ave # Perimeter Ratio Calculations - # Two extra apothems are now useful + # Two extra apothems are now useful apoth4 = math.sqrt(3) * perim_hull / 12 apoth5 = math.sqrt(4 * area_hull / (4.5 * math.sqrt(3))) @@ -408,26 +588,44 @@ def polygonality_hexagonality(area, perimeter, neighbors, solidity, maxferet, mi perim13 = 2 * area_hull / (apoth2) perim14 = 2 * area_hull / (apoth3) - #Create an array of all unique Perimeters - list_perim = [perim1, perim2, perim3, perim4, perim5, perim6, perim7, perim8, perim9, perim10, perim11, perim12, perim13, perim14] + # Create an array of all unique Perimeters + list_perim = [ + perim1, + perim2, + perim3, + perim4, + perim5, + perim6, + perim7, + perim8, + perim9, + perim10, + perim11, + perim12, + perim13, + perim14, + ] perim_uniq = np.asarray(list_perim, dtype=float) del list_perim - #Create an array of the ratio of all Perimeters to eachother - for ib in range (0, len(perim_uniq)): - for ic in range (ib + 1, len(perim_uniq)): - perim_ratio = 1 - math.sqrt((1 - (perim_uniq[ib] / perim_uniq[ic])) * (1 - (perim_uniq[ib] / perim_uniq[ic]))) + # Create an array of the ratio of all Perimeters to eachother + for ib in range(0, len(perim_uniq)): + for ic in range(ib + 1, len(perim_uniq)): + perim_ratio = 1 - math.sqrt( + (1 - (perim_uniq[ib] / perim_uniq[ic])) + * (1 - (perim_uniq[ib] / perim_uniq[ic])), + ) perim_list.append(perim_ratio) del perim_ratio perim_array = np.asarray(perim_list) stat_value_perim = stats.describe(perim_array) del perim_uniq, perim_list, perim_array - #Create Summary statistics of all array ratios + # Create Summary statistics of all array ratios perim_ratio_ave = stat_value_perim.mean perim_ratio_sd = math.sqrt(stat_value_perim.variance) - #Set the HSR equal to the average Perimeter Ratio + # Set the HSR equal to the average Perimeter Ratio hex_size_ratio = perim_ratio_ave hex_sd = np.sqrt((area_ratio_sd**2 + perim_ratio_sd**2) / 2) @@ -442,19 +640,22 @@ def polygonality_hexagonality(area, perimeter, neighbors, solidity, maxferet, mi hex_area_ratio = "NAN" hex_ave = "NAN" hex_sd = "NAN" - return(poly_ave, hex_ave, hex_sd) - -def feature_extraction(features, - embeddedpixelsize, - unitLength, - pixelsPerunit, - pixelDistance, - channel, - intensity_image=None, - img_emb_unit=None, - label_image=None, - seg_file_names1=None, - int_file_name=None): + return (poly_ave, hex_ave, hex_sd) + + +def feature_extraction( + features, + embeddedpixelsize, + unitLength, + pixelsPerunit, + pixelDistance, + channel, + intensity_image=None, + img_emb_unit=None, + label_image=None, + seg_file_names1=None, + int_file_name=None, +): """Calculate shape and intensity based features. Args: @@ -468,26 +669,26 @@ def feature_extraction(features, intensity_image (ndarray): Intensity image array. pixelDistance (int): Distance between pixels to calculate the neighbors touching the object and default valus is 5. channel (int): Channel of the image. - + Returns: Dataframe containing the features extracted and the filename of the labeled image. - """ + """ df_insert = pd.DataFrame([]) boxsize = 3 thetastart = 1 thetastop = 180 if pixelDistance is None: pixelDistance = 5 - + def area(seg_img, units, *args): - """Calculate area for all the regions of interest in the image.""" + """Calculate area for all the regions of interest in the image.""" data_dict1 = [region.area for region in regions] if unitLength and not embeddedpixelsize: data_dict = [dt_pixel / pixelsPerunit**2 for dt_pixel in data_dict1] else: data_dict = data_dict1 - logger.debug('Completed extracting area for ' + seg_file_names1.name) + logger.debug("Completed extracting area for " + seg_file_names1.name) return data_dict def perimeter(seg_img, units, *args): @@ -497,111 +698,124 @@ def perimeter(seg_img, units, *args): data_dict = [dt_pixel / pixelsPerunit for dt_pixel in data_dict1] else: data_dict = data_dict1 - logger.debug('Completed extracting perimeter for ' + seg_file_names1.name) + logger.debug("Completed extracting perimeter for " + seg_file_names1.name) return data_dict - def orientation(seg_img,*args): + def orientation(seg_img, *args): """Calculate orientation for all the regions of interest in the image.""" label = [region.label for region in regions] - data_dict=[] + data_dict = [] + def compute_M(data): cols = np.arange(data.size) """ Limit it (with a safe copy) """ - data_sc = np.empty_like (data) + data_sc = np.empty_like(data) np.copyto(data_sc, data) - data_sc[data_sc>=len(label)+1] = len(label) + data_sc[data_sc >= len(label) + 1] = len(label) """""" - return csr_matrix((cols, (data_sc.ravel(), cols)),shape=(len(label) + 1, data_sc.size)) + return csr_matrix( + (cols, (data_sc.ravel(), cols)), shape=(len(label) + 1, data_sc.size), + ) + def get_indices_sparse(data): M = compute_M(data) return [np.unravel_index(row.data, data.shape) for row in M] + ori_data = get_indices_sparse(seg_img) data_pro = ori_data[1:] for i in data_pro: - x=i[0] - y=i[1] + x = i[0] + y = i[1] xg, yg = x.mean(), y.mean() x = x - xg y = y - yg uyy = (y**2).sum() uxx = (x**2).sum() - uxy = (x*y).sum() - if (uyy > uxx): - num = uyy - uxx + np.sqrt((uyy - uxx)**2 + 4*uxy**2) - den = 2*uxy + uxy = (x * y).sum() + if uyy > uxx: + num = uyy - uxx + np.sqrt((uyy - uxx) ** 2 + 4 * uxy**2) + den = 2 * uxy else: - num = 2*uxy - den = uxx - uyy + np.sqrt((uxx - uyy)**2 + 4*uxy**2) - + num = 2 * uxy + den = uxx - uyy + np.sqrt((uxx - uyy) ** 2 + 4 * uxy**2) + if (num == 0) and (den == 0): orientation1 = 0 else: - value = num/den - orientation1 = -(180/math.pi) * math.atan(value) + value = num / den + orientation1 = -(180 / math.pi) * math.atan(value) data_dict.append(orientation1) - logger.debug('Completed extracting orientation for ' + seg_file_names1.name) + logger.debug("Completed extracting orientation for " + seg_file_names1.name) return data_dict def convex_area(seg_img, units, *args): - """Calculate convex_area for all the regions of interest in the image.""" + """Calculate convex_area for all the regions of interest in the image.""" data_dict1 = [region.convex_area for region in regions] if unitLength and not embeddedpixelsize: data_dict = [dt_pixel / pixelsPerunit**2 for dt_pixel in data_dict1] else: data_dict = data_dict1 - logger.debug('Completed extracting convex area for ' + seg_file_names1.name) + logger.debug("Completed extracting convex area for " + seg_file_names1.name) return data_dict - + def bbox_ymin(*args): """Calculate bounding box xmin for all the regions of interest in the image.""" bbox_value = [str(region.bbox) for region in regions] - bbox_all = [value.split(',') for value in bbox_value] - data_dict = [bbox_min[0].replace('(','') for bbox_min in bbox_all] - logger.debug('Completed extracting boundingbox_ymin for ' + seg_file_names1.name) + bbox_all = [value.split(",") for value in bbox_value] + data_dict = [bbox_min[0].replace("(", "") for bbox_min in bbox_all] + logger.debug( + "Completed extracting boundingbox_ymin for " + seg_file_names1.name, + ) return data_dict - + def bbox_xmin(*args): """Calculate bounding box ymin for all the regions of interest in the image.""" bbox_value = [str(region.bbox) for region in regions] - bbox_all = [value.split(',') for value in bbox_value] + bbox_all = [value.split(",") for value in bbox_value] data_dict = [bbox_min[1] for bbox_min in bbox_all] - logger.debug('Completed extracting boundingbox_xmin for ' + seg_file_names1.name) + logger.debug( + "Completed extracting boundingbox_xmin for " + seg_file_names1.name, + ) return data_dict - + def bbox_width(*args): """Calculate bounding box width for all the regions of interest in the image.""" - imgs= [region.image for region in regions] + imgs = [region.image for region in regions] data_dict = [w.shape[1] for w in imgs] - logger.debug('Completed extracting boundingbox_width for ' + seg_file_names1.name) + logger.debug( + "Completed extracting boundingbox_width for " + seg_file_names1.name, + ) return data_dict - + def bbox_height(*args): """Calculate bounding box height for all the regions of interest in the image.""" - imgs= [region.image for region in regions] + imgs = [region.image for region in regions] data_dict = [h.shape[0] for h in imgs] - logger.debug('Completed extracting boundingbox_height for ' + seg_file_names1.name) + logger.debug( + "Completed extracting boundingbox_height for " + seg_file_names1.name, + ) return data_dict def centroid_y(*args): """Calculate centroidy for all the regions of interest in the image.""" centroid_value = [str(region.centroid) for region in regions] - cent_y= [cent.split(',') for cent in centroid_value] - data_dict = [centroid_y[0].replace('(','') for centroid_y in cent_y] - logger.debug('Completed extracting centroid_row for ' + seg_file_names1.name) + cent_y = [cent.split(",") for cent in centroid_value] + data_dict = [centroid_y[0].replace("(", "") for centroid_y in cent_y] + logger.debug("Completed extracting centroid_row for " + seg_file_names1.name) return data_dict def centroid_x(*args): """Calculate centroidx for all the regions of interest in the image.""" centroid_value = [str(region.centroid) for region in regions] - cent_x = [cent.split(',') for cent in centroid_value] - data_dict = [centroid_x[1].replace(')','') for centroid_x in cent_x] - logger.debug('Completed extracting centroid_column for ' + seg_file_names1.name) + cent_x = [cent.split(",") for cent in centroid_value] + data_dict = [centroid_x[1].replace(")", "") for centroid_x in cent_x] + logger.debug("Completed extracting centroid_column for " + seg_file_names1.name) return data_dict def eccentricity(*args): """Calculate eccentricity for all the regions of interest in the image.""" data_dict = [region.eccentricity for region in regions] - logger.debug('Completed extracting eccentricity for ' + seg_file_names1.name) + logger.debug("Completed extracting eccentricity for " + seg_file_names1.name) return data_dict def equivalent_diameter(seg_img, units, *args): @@ -611,13 +825,15 @@ def equivalent_diameter(seg_img, units, *args): data_dict = [dt_pixel / pixelsPerunit for dt_pixel in data_dict1] else: data_dict = data_dict1 - logger.debug('Completed extracting equivalent diameter for ' + seg_file_names1.name) + logger.debug( + "Completed extracting equivalent diameter for " + seg_file_names1.name, + ) return data_dict def euler_number(*args): """Calculate euler_number for all the regions of interest in the image.""" data_dict = [region.euler_number for region in regions] - logger.debug('Completed extracting euler number for ' + seg_file_names1.name) + logger.debug("Completed extracting euler number for " + seg_file_names1.name) return data_dict def major_axis_length(seg_img, units, *args): @@ -627,7 +843,9 @@ def major_axis_length(seg_img, units, *args): data_dict = [dt_pixel / pixelsPerunit for dt_pixel in data_dict1] else: data_dict = data_dict1 - logger.debug('Completed extracting major axis length for ' + seg_file_names1.name) + logger.debug( + "Completed extracting major axis length for " + seg_file_names1.name, + ) return data_dict def minor_axis_length(seg_img, units, *args): @@ -637,13 +855,15 @@ def minor_axis_length(seg_img, units, *args): data_dict = [dt_pixel / pixelsPerunit for dt_pixel in data_dict1] else: data_dict = data_dict1 - logger.debug('Completed extracting minor axis length for ' + seg_file_names1.name) + logger.debug( + "Completed extracting minor axis length for " + seg_file_names1.name, + ) return data_dict def solidity(*args): """Calculate solidity for all the regions of interest in the image.""" data_dict = [region.solidity for region in regions] - logger.debug('Completed extracting solidity for ' + seg_file_names1.name) + logger.debug("Completed extracting solidity for " + seg_file_names1.name) return data_dict def mean_intensity(*args): @@ -651,10 +871,13 @@ def mean_intensity(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [(np.mean(intensity[seg])) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + (np.mean(intensity[seg])) + for intensity, seg in zip(intensity_images, imgs) + ] else: - data_dict =np.mean(intensity_image.reshape(-1)) - logger.debug('Completed extracting mean intensity for ' + int_file_name) + data_dict = np.mean(intensity_image.reshape(-1)) + logger.debug("Completed extracting mean intensity for " + int_file_name) return data_dict def max_intensity(*args): @@ -662,10 +885,13 @@ def max_intensity(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [int((np.max(intensity[seg]))) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + int(np.max(intensity[seg])) + for intensity, seg in zip(intensity_images, imgs) + ] else: data_dict = np.max(intensity_image.reshape(-1)) - logger.debug('Completed extracting maximum intensity for ' + int_file_name) + logger.debug("Completed extracting maximum intensity for " + int_file_name) return data_dict def min_intensity(*args): @@ -673,10 +899,13 @@ def min_intensity(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [int((np.min(intensity[seg]))) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + int(np.min(intensity[seg])) + for intensity, seg in zip(intensity_images, imgs) + ] else: data_dict = np.min(intensity_image.reshape(-1)) - logger.debug('Completed extracting minimum intensity for ' + int_file_name) + logger.debug("Completed extracting minimum intensity for " + int_file_name) return data_dict def median(*args): @@ -684,10 +913,13 @@ def median(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [int((np.median(intensity[seg]))) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + int(np.median(intensity[seg])) + for intensity, seg in zip(intensity_images, imgs) + ] else: data_dict = np.median(intensity_image.reshape(-1)) - logger.debug('Completed extracting median for ' + int_file_name) + logger.debug("Completed extracting median for " + int_file_name) return data_dict def mode(*args): @@ -695,11 +927,14 @@ def mode(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - mode_list = [modevalue(intensity[seg])[0] for intensity, seg in zip(intensity_images, imgs)] + mode_list = [ + modevalue(intensity[seg])[0] + for intensity, seg in zip(intensity_images, imgs) + ] data_dict = [str(mode_ls)[1:-1] for mode_ls in mode_list] else: data_dict = modevalue(intensity_image.reshape(-1))[0] - logger.debug('Completed extracting mode for ' + int_file_name) + logger.debug("Completed extracting mode for " + int_file_name) return data_dict def standard_deviation(*args): @@ -707,10 +942,13 @@ def standard_deviation(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [(np.std(intensity[seg])) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + (np.std(intensity[seg])) + for intensity, seg in zip(intensity_images, imgs) + ] else: - data_dict= np.std(intensity_image.reshape(-1)) - logger.debug('Completed extracting standard deviation for ' + int_file_name) + data_dict = np.std(intensity_image.reshape(-1)) + logger.debug("Completed extracting standard deviation for " + int_file_name) return data_dict def skewness(*args): @@ -718,10 +956,13 @@ def skewness(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [skew(intensity[seg], axis=0, bias=True) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + skew(intensity[seg], axis=0, bias=True) + for intensity, seg in zip(intensity_images, imgs) + ] else: - data_dict= skew(intensity_image.reshape(-1),axis=0, bias=True) - logger.debug('Completed extracting skewness for ' + int_file_name) + data_dict = skew(intensity_image.reshape(-1), axis=0, bias=True) + logger.debug("Completed extracting skewness for " + int_file_name) return data_dict def entropy(*args): @@ -729,10 +970,13 @@ def entropy(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [shannon_entropy(intensity[seg]) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + shannon_entropy(intensity[seg]) + for intensity, seg in zip(intensity_images, imgs) + ] else: data_dict = shannon_entropy(intensity_image.reshape(-1)) - logger.debug('Completed extracting entropy for ' + int_file_name) + logger.debug("Completed extracting entropy for " + int_file_name) return data_dict def kurtosis(*args): @@ -740,239 +984,320 @@ def kurtosis(*args): if label_image is not None: intensity_images = [region.intensity_image for region in regions] imgs = [region.image for region in regions] - data_dict = [kurto(intensity[seg], axis=0, fisher=False, bias=True) for intensity, seg in zip(intensity_images, imgs)] + data_dict = [ + kurto(intensity[seg], axis=0, fisher=False, bias=True) + for intensity, seg in zip(intensity_images, imgs) + ] else: - data_dict= kurto(intensity_image.reshape(-1),axis=0, fisher=False, bias=True) - logger.debug('Completed extracting kurtosis for ' + int_file_name) + data_dict = kurto( + intensity_image.reshape(-1), axis=0, fisher=False, bias=True, + ) + logger.debug("Completed extracting kurtosis for " + int_file_name) return data_dict def neighbors(seg_img, *args): """Calculate neighbors for all the regions of interest in the image.""" - data_dict=[] - label=[region.label for region in regions] - executor = concurrent.futures.ThreadPoolExecutor(max_workers = multiprocessing.cpu_count()) - results = executor.map(neighbors_find, repeat(seg_img), label,repeat(pixelDistance)) + data_dict = [] + label = [region.label for region in regions] + executor = concurrent.futures.ThreadPoolExecutor( + max_workers=multiprocessing.cpu_count(), + ) + results = executor.map( + neighbors_find, repeat(seg_img), label, repeat(pixelDistance), + ) data_dict = list(results) - logger.debug('Completed extraction neighbors for ' + seg_file_names1.name) + logger.debug("Completed extraction neighbors for " + seg_file_names1.name) return data_dict def maxferet(seg_img, *args): """Calculate maxferet for all the regions of interest in the image.""" - edges= box_border_search(seg_img, boxsize) + edges = box_border_search(seg_img, boxsize) feretdiam = feret_diameter(edges, boxsize, thetastart, thetastop) maxferet1 = [np.max(feret) for feret in feretdiam] if unitLength and not embeddedpixelsize: maxferet = [dt_pixel / pixelsPerunit for dt_pixel in maxferet1] else: maxferet = maxferet1 - logger.debug('Completed extracting maxferet for ' + seg_file_names1.name) + logger.debug("Completed extracting maxferet for " + seg_file_names1.name) return maxferet def minferet(seg_img, *args): """Calculate minferet for all the regions of interest in the image.""" - edges= box_border_search(seg_img, boxsize) + edges = box_border_search(seg_img, boxsize) feretdiam = feret_diameter(edges, boxsize, thetastart, thetastop) minferet1 = [np.min(feret) for feret in feretdiam] if unitLength and not embeddedpixelsize: minferet = [dt_pixel / pixelsPerunit for dt_pixel in minferet1] else: minferet = minferet1 - logger.debug('Completed extracting minferet for ' + seg_file_names1.name) + logger.debug("Completed extracting minferet for " + seg_file_names1.name) return minferet def poly_hex_score(seg_img, units): - """Calculate polygonality and hexagonality score for all the regions of interest in the image""" + """Calculate polygonality and hexagonality score for all the regions of interest in the image.""" poly_area = area(seg_img, units) poly_peri = perimeter(seg_img, units) poly_neighbor = neighbors(seg_img) poly_solidity = solidity(seg_img) poly_maxferet = maxferet(seg_img, units) poly_minferet = minferet(seg_img, units) - poly_hex= [polygonality_hexagonality(area_metric, perimeter_metric, int(neighbor_metric), solidity_metric, maxferet_metric, minferet_metric) for area_metric, perimeter_metric, neighbor_metric, solidity_metric, maxferet_metric, minferet_metric in zip(poly_area, poly_peri, poly_neighbor, poly_solidity, poly_maxferet, poly_minferet)] - return poly_hex + return [ + polygonality_hexagonality( + area_metric, + perimeter_metric, + int(neighbor_metric), + solidity_metric, + maxferet_metric, + minferet_metric, + ) + for area_metric, perimeter_metric, neighbor_metric, solidity_metric, maxferet_metric, minferet_metric in zip( + poly_area, + poly_peri, + poly_neighbor, + poly_solidity, + poly_maxferet, + poly_minferet, + ) + ] def polygonality_score(seg_img, units, *args): """Get polygonality score for all the regions of interest in the image.""" - if features == 'all': + if features == "all": poly_hex = poly_hex_score(seg_img, units) poly_hex = poly_hex_score(seg_img, units) polygonality_score = [poly[0] for poly in poly_hex] - logger.debug('Completed extracting polygonality score for ' + seg_file_names1.name) + logger.debug( + "Completed extracting polygonality score for " + seg_file_names1.name, + ) return polygonality_score def hexagonality_score(seg_img, units, *args): """Get hexagonality score for all the regions of interest in the image.""" poly_hex = poly_hex_score(seg_img, units) hexagonality_score = [poly[1] for poly in poly_hex] - logger.debug('Completed extracting hexagonality score for ' + seg_file_names1.name) + logger.debug( + "Completed extracting hexagonality score for " + seg_file_names1.name, + ) return hexagonality_score def hexagonality_sd(seg_img, units, *args): """Get hexagonality standard deviation for all the regions of interest in the image.""" poly_hex = poly_hex_score(seg_img, units) hexagonality_sd = [poly[2] for poly in poly_hex] - logger.debug('Completed extracting hexagonality standard deviation for ' + seg_file_names1.name) + logger.debug( + "Completed extracting hexagonality standard deviation for " + + seg_file_names1.name, + ) return hexagonality_sd - + def all(seg_img, units, int_img): """Calculate all features for all the regions of interest in the image.""" - #calculate area + # calculate area all_area = area(seg_img, units) - #calculate perimeter + # calculate perimeter all_peri = perimeter(seg_img, units) - #calculate neighbors + # calculate neighbors all_neighbor = neighbors(seg_img) - #calculate maxferet - edges= box_border_search(seg_img, boxsize) + # calculate maxferet + edges = box_border_search(seg_img, boxsize) feretdiam = feret_diameter(edges, boxsize, thetastart, thetastop) all_maxferet = [np.max(feret) for feret in feretdiam] - #calculate minferet - all_minferet = [np.min(feret) for feret in feretdiam] + # calculate minferet + all_minferet = [np.min(feret) for feret in feretdiam] if unitLength and not embeddedpixelsize: - maxferet = [dt_pixel / pixelsPerunit for dt_pixel in all_maxferet] - minferet = [dt_pixel / pixelsPerunit for dt_pixel in all_minferet] + [dt_pixel / pixelsPerunit for dt_pixel in all_maxferet] + [dt_pixel / pixelsPerunit for dt_pixel in all_minferet] else: - minferet = all_minferet - maxferet = all_maxferet - #calculate convex area + pass + # calculate convex area all_convex = convex_area(seg_img, units) - #calculate solidity - all_solidity = np.array(all_area)/np.array(all_convex) - #calculate orientation + # calculate solidity + all_solidity = np.array(all_area) / np.array(all_convex) + # calculate orientation all_orientation = orientation(seg_img) - #calculate centroid row value + # calculate centroid row value all_centroidx = centroid_x(seg_img) - #calculate centroid column value + # calculate centroid column value all_centroidy = centroid_y(seg_img) - #Calculate bounding box xmin + # Calculate bounding box xmin all_bboxxmin = bbox_xmin(seg_img) - #Calculate bounding box ymin + # Calculate bounding box ymin all_bboxymin = bbox_ymin(seg_img) - #Calculate bounding box width + # Calculate bounding box width all_bboxwidth = bbox_width(seg_img) - #Calculate bounding box height + # Calculate bounding box height all_bboxheight = bbox_height(seg_img) - #calculate eccentricity + # calculate eccentricity all_eccentricity = eccentricity(seg_img) - #calculate equivalent diameter + # calculate equivalent diameter all_equivalent_diameter = equivalent_diameter(seg_img, units) - #calculate euler number + # calculate euler number all_euler_number = euler_number(seg_img) - #calculate major axis length + # calculate major axis length all_major_axis_length = major_axis_length(seg_img, units) - #calculate minor axis length + # calculate minor axis length all_minor_axis_length = minor_axis_length(seg_img, units) - #calculate polygonality_score - all_polygon_score = [polygonality_hexagonality(area_metric, perimeter_metric, int(neighbor_metric), solidity_metric, maxferet_metric, minferet_metric) for area_metric, perimeter_metric, neighbor_metric, solidity_metric, maxferet_metric, minferet_metric in zip(all_area, all_peri, all_neighbor, all_solidity, all_maxferet, all_minferet)]#seg_img, units) + # calculate polygonality_score + all_polygon_score = [ + polygonality_hexagonality( + area_metric, + perimeter_metric, + int(neighbor_metric), + solidity_metric, + maxferet_metric, + minferet_metric, + ) + for area_metric, perimeter_metric, neighbor_metric, solidity_metric, maxferet_metric, minferet_metric in zip( + all_area, + all_peri, + all_neighbor, + all_solidity, + all_maxferet, + all_minferet, + ) + ] # seg_img, units) all_polygonality_score = [poly[0] for poly in all_polygon_score] - #calculate hexagonality_score + # calculate hexagonality_score all_hexagonality_score = [poly[1] for poly in all_polygon_score] - #calculate hexagonality standarddeviation + # calculate hexagonality standarddeviation all_hexagonality_sd = [poly[2] for poly in all_polygon_score] - #calculate mean intensity - all_mean_intensity = mean_intensity(seg_img, int_img) - #calculate maximum intensity value + # calculate mean intensity + all_mean_intensity = mean_intensity(seg_img, int_img) + # calculate maximum intensity value all_max_intensity = max_intensity(seg_img, int_img) - #calculate minimum intensity value + # calculate minimum intensity value all_min_intensity = min_intensity(seg_img, int_img) - #calculate median + # calculate median all_median = median(seg_img, int_img) - #calculate mode + # calculate mode all_mode = mode(seg_img, int_img) - #calculate standard deviation + # calculate standard deviation all_sd = standard_deviation(seg_img, int_img) - #calculate skewness - all_skewness= skewness(seg_img, int_img) - #calculate kurtosis + # calculate skewness + all_skewness = skewness(seg_img, int_img) + # calculate kurtosis all_kurtosis = kurtosis(seg_img, int_img) - logger.debug('Completed extracting all features for ' + seg_file_names1.name) - return (all_area, all_centroidx, all_centroidy, all_bboxxmin, all_bboxymin, all_bboxwidth, all_bboxheight, all_major_axis_length, all_minor_axis_length, all_eccentricity, all_orientation, all_convex, all_euler_number, all_equivalent_diameter, all_solidity, all_peri, all_maxferet, all_minferet, all_neighbor, all_polygonality_score, all_hexagonality_score, all_hexagonality_sd, all_kurtosis, all_max_intensity, all_mean_intensity, all_median, all_min_intensity, all_mode, all_sd, all_skewness) - - #Dictionary of input features - FEAT = {'area': area, - 'bbox_xmin': bbox_xmin, - 'bbox_ymin': bbox_ymin, - 'bbox_width': bbox_width, - 'bbox_height': bbox_height, - 'perimeter': perimeter, - 'orientation': orientation, - 'convex_area': convex_area, - 'centroid_x': centroid_x, - 'centroid_y': centroid_y, - 'eccentricity': eccentricity, - 'equivalent_diameter': equivalent_diameter, - 'euler_number': euler_number, - 'major_axis_length': major_axis_length, - 'minor_axis_length': minor_axis_length, - 'solidity': solidity, - 'mean_intensity': mean_intensity, - 'max_intensity': max_intensity, - 'min_intensity': min_intensity, - 'median': median, - 'mode': mode, - 'standard_deviation': standard_deviation, - 'skewness': skewness, - 'entropy': entropy, - 'kurtosis': kurtosis, - 'neighbors': neighbors, - 'maxferet': maxferet, - 'minferet': minferet, - 'polygonality_score': polygonality_score, - 'hexagonality_score': hexagonality_score, - 'hexagonality_sd': hexagonality_sd, - 'all': all} - + logger.debug("Completed extracting all features for " + seg_file_names1.name) + return ( + all_area, + all_centroidx, + all_centroidy, + all_bboxxmin, + all_bboxymin, + all_bboxwidth, + all_bboxheight, + all_major_axis_length, + all_minor_axis_length, + all_eccentricity, + all_orientation, + all_convex, + all_euler_number, + all_equivalent_diameter, + all_solidity, + all_peri, + all_maxferet, + all_minferet, + all_neighbor, + all_polygonality_score, + all_hexagonality_score, + all_hexagonality_sd, + all_kurtosis, + all_max_intensity, + all_mean_intensity, + all_median, + all_min_intensity, + all_mode, + all_sd, + all_skewness, + ) + + # Dictionary of input features + FEAT = { + "area": area, + "bbox_xmin": bbox_xmin, + "bbox_ymin": bbox_ymin, + "bbox_width": bbox_width, + "bbox_height": bbox_height, + "perimeter": perimeter, + "orientation": orientation, + "convex_area": convex_area, + "centroid_x": centroid_x, + "centroid_y": centroid_y, + "eccentricity": eccentricity, + "equivalent_diameter": equivalent_diameter, + "euler_number": euler_number, + "major_axis_length": major_axis_length, + "minor_axis_length": minor_axis_length, + "solidity": solidity, + "mean_intensity": mean_intensity, + "max_intensity": max_intensity, + "min_intensity": min_intensity, + "median": median, + "mode": mode, + "standard_deviation": standard_deviation, + "skewness": skewness, + "entropy": entropy, + "kurtosis": kurtosis, + "neighbors": neighbors, + "maxferet": maxferet, + "minferet": minferet, + "polygonality_score": polygonality_score, + "hexagonality_score": hexagonality_score, + "hexagonality_sd": hexagonality_sd, + "all": all, + } + if label_image is not None: - #Calculate features given as input for all images + # Calculate features given as input for all images regions = measure.regionprops(label_image, intensity_image) - #Remove the cells touching the border + # Remove the cells touching the border cleared = clear_border(label_image) - #pass the filename in csv + # pass the filename in csv title = seg_file_names1.name if label_image is None: title = int_file_name - - #If features parameter left empty then raise value error + + # If features parameter left empty then raise value error if not features: - raise ValueError('Select features for extraction.') - - #Check whether the pixels per unit contain values when embeddedpixelsize is not required and metric for unitlength is entered - if unitLength and not embeddedpixelsize: - if not pixelsPerunit: - raise ValueError('Enter pixels per unit value.') - if label_image is not None: - logger.info('Extracting features for ' + seg_file_names1.name) + msg = "Select features for extraction." + raise ValueError(msg) + + # Check whether the pixels per unit contain values when embeddedpixelsize is not required and metric for unitlength is entered + if unitLength and not embeddedpixelsize and not pixelsPerunit: + raise ValueError("Enter pixels per unit value.") + if label_image is not None: + logger.info("Extracting features for " + seg_file_names1.name) else: - logger.info('Extracting features for ' + int_file_name) - - #Centroid values shown separately in output as centroid_x and centroid_y - if 'centroid' in features: - features.remove('centroid') - features.append('centroid_x') - features.append('centroid_y') - #Bounding box location values shown separately in output as bbox_xmin and bbox_ymin - if 'boundingbox_location' in features: - features.remove('boundingbox_location') - features.append('bbox_xmin') - features.append('bbox_ymin') - #Bounding box dimension values shown separately in output as bbox_width and bbox_height - if 'boundingbox_dimension' in features: - features.remove('boundingbox_dimension') - features.append('bbox_width') - features.append('bbox_height') - + logger.info("Extracting features for " + int_file_name) + + # Centroid values shown separately in output as centroid_x and centroid_y + if "centroid" in features: + features.remove("centroid") + features.append("centroid_x") + features.append("centroid_y") + # Bounding box location values shown separately in output as bbox_xmin and bbox_ymin + if "boundingbox_location" in features: + features.remove("boundingbox_location") + features.append("bbox_xmin") + features.append("bbox_ymin") + # Bounding box dimension values shown separately in output as bbox_width and bbox_height + if "boundingbox_dimension" in features: + features.remove("boundingbox_dimension") + features.append("bbox_width") + features.append("bbox_height") + for each_feature in features: - #Dynamically call the function based on the features required + # Dynamically call the function based on the features required if label_image is not None: - feature_value = FEAT[each_feature](label_image,unitLength,intensity_image) + feature_value = FEAT[each_feature](label_image, unitLength, intensity_image) else: feature_value = FEAT[each_feature](intensity_image) - #get all features - if each_feature == 'all': - #create dataframe for all features - df=pd.DataFrame(feature_value) + # get all features + if each_feature == "all": + # create dataframe for all features + df = pd.DataFrame(feature_value) df = df.T # Change the units depending on selection @@ -984,56 +1309,66 @@ def all(seg_img, units, int_img): units = "pixels" columns = [ - f'area_{units}', - 'centroid_x', - 'centroid_y', - 'bbox_xmin', - 'bbox_ymin', - 'bbox_width', - 'bbox_height', - f'major_axis_length_{units}', - f'minor_axis_length_{units}', - 'eccentricity', - 'orientation', - f'convex_area_{units}', - 'euler_number', - f'equivalent_diameter_{units}', - 'solidity', - f'perimeter_{units}', - f'maxferet_{units}', - f'minferet_{units}', - 'neighbors', - 'polygonality_score', - 'hexagonality_score', - 'hexagonality_sd', - 'kurtosis', - 'maximum_intensity', - 'mean_intensity', - 'median', - 'minimum_intensity', - 'mode', - 'standard_deviation', - 'skewness', - + f"area_{units}", + "centroid_x", + "centroid_y", + "bbox_xmin", + "bbox_ymin", + "bbox_width", + "bbox_height", + f"major_axis_length_{units}", + f"minor_axis_length_{units}", + "eccentricity", + "orientation", + f"convex_area_{units}", + "euler_number", + f"equivalent_diameter_{units}", + "solidity", + f"perimeter_{units}", + f"maxferet_{units}", + f"minferet_{units}", + "neighbors", + "polygonality_score", + "hexagonality_score", + "hexagonality_sd", + "kurtosis", + "maximum_intensity", + "mean_intensity", + "median", + "minimum_intensity", + "mode", + "standard_deviation", + "skewness", ] - df.columns = [c+f'' for c in columns] + df.columns = [c + "" for c in columns] if unitLength and not embeddedpixelsize: - check_cols = [col for col in df.columns if 'area' in col] - df.columns = [x + '^2' if x in check_cols else x for x in df] - #Show channel values only when there is more than 1 channel + check_cols = [col for col in df.columns if "area" in col] + df.columns = [x + "^2" if x in check_cols else x for x in df] + # Show channel values only when there is more than 1 channel if channel is None: - df.columns = [c+f'' for c in df.columns] + df.columns = [c + "" for c in df.columns] else: - df.columns = [c+f'_channel{channel}' for c in df.columns] + df.columns = [c + f"_channel{channel}" for c in df.columns] df.columns = map(str.lower, df.columns) else: - #create dataframe for features selected + # create dataframe for features selected if label_image is None: - df = pd.DataFrame({each_feature: feature_value},index=[0]) + df = pd.DataFrame({each_feature: feature_value}, index=[0]) else: df = pd.DataFrame({each_feature: feature_value}) - if any({'area', 'convex_area', 'equivalent_diameter', 'major_axis_length', 'maxferet', 'minor_axis_length', 'minferet', 'perimeter'}.intersection (df.columns)): - #Change the units depending on selection + if any( + { + "area", + "convex_area", + "equivalent_diameter", + "major_axis_length", + "maxferet", + "minor_axis_length", + "minferet", + "perimeter", + }.intersection(df.columns), + ): + # Change the units depending on selection if embeddedpixelsize: units = img_emb_unit elif unitLength and not embeddedpixelsize: @@ -1041,336 +1376,447 @@ def all(seg_img, units, int_img): else: units = "pixels" - df.rename({ - "area": f"area_{units}", - "convex_area": f"convex_area_{units}", - "equivalent_diameter": f"equivalent_diameter_{units}", - "major_axis_length": f"major_axis_length_{units}", - "minor_axis_length": f"minor_axis_length_{units}", - "maxferet": f"maxferet_{units}", - "minferet": f"minferet_{units}", - "perimeter": f"perimeter_{units}" - }, axis='columns', inplace=True) - columns = [c+f'' for c in df.columns] + df.rename( + { + "area": f"area_{units}", + "convex_area": f"convex_area_{units}", + "equivalent_diameter": f"equivalent_diameter_{units}", + "major_axis_length": f"major_axis_length_{units}", + "minor_axis_length": f"minor_axis_length_{units}", + "maxferet": f"maxferet_{units}", + "minferet": f"minferet_{units}", + "perimeter": f"perimeter_{units}", + }, + axis="columns", + inplace=True, + ) + columns = [c + "" for c in df.columns] if unitLength and not embeddedpixelsize: - check_cols = [col for col in df.columns if 'Area' in col] + check_cols = [col for col in df.columns if "Area" in col] if check_cols: - df.columns = [col+'^2'for col in check_cols] + df.columns = [col + "^2" for col in check_cols] if channel is None: - df.columns = [c+f'' for c in df.columns] + df.columns = [c + "" for c in df.columns] else: - df.columns = [c+f'_channel{channel}' for c in df.columns] + df.columns = [c + f"_channel{channel}" for c in df.columns] df.columns = map(str.lower, df.columns) df_insert = pd.concat([df_insert, df], axis=1) - + if label_image is not None: - #Lists all the labels in the image + # Lists all the labels in the image label = [r.label for r in regions] - #Measure region props for only the object not touching the border + # Measure region props for only the object not touching the border regions1 = np.unique(cleared)[1:] - #List of labels for only objects that are not touching the border - label_nt_touching = regions1-1 - #Find whether the object is touching border or not - border_cells = np.full((len(regions)),True,dtype=bool) - label_nt_touching[label_nt_touching>=len(border_cells)] = len(border_cells)-1 # Limit it - border_cells[label_nt_touching]=False + # List of labels for only objects that are not touching the border + label_nt_touching = regions1 - 1 + # Find whether the object is touching border or not + border_cells = np.full((len(regions)), True, dtype=bool) + label_nt_touching[label_nt_touching >= len(border_cells)] = ( + len(border_cells) - 1 + ) # Limit it + border_cells[label_nt_touching] = False if intensity_image is None: - #Create column label and image - data = { 'mask_image':title, - 'label': label} - data1 = {'touching_border': border_cells} - df1 = pd.DataFrame(data,columns=['mask_image','label']) - df_values= ['mask_image','label'] + # Create column label and image + data = {"mask_image": title, "label": label} + data1 = {"touching_border": border_cells} + df1 = pd.DataFrame(data, columns=["mask_image", "label"]) + df_values = ["mask_image", "label"] else: - data = { 'mask_image':title, - 'intensity_image':int_file_name, - 'label': label} - data1 = {'touching_border': border_cells} - df1 = pd.DataFrame(data,columns=['mask_image','intensity_image','label']) - df_values= ['mask_image','intensity_image','label'] - #Create column touching border - df2 = pd.DataFrame(data1,columns=['touching_border']) - df_insert1 = pd.concat([df1,df_insert,df2],ignore_index=True, axis=1) + data = { + "mask_image": title, + "intensity_image": int_file_name, + "label": label, + } + data1 = {"touching_border": border_cells} + df1 = pd.DataFrame(data, columns=["mask_image", "intensity_image", "label"]) + df_values = ["mask_image", "intensity_image", "label"] + # Create column touching border + df2 = pd.DataFrame(data1, columns=["touching_border"]) + df_insert1 = pd.concat([df1, df_insert, df2], ignore_index=True, axis=1) dfch = df_insert.columns.tolist() - - df_values1 = ['touching_border'] - joinedlist= df_values + dfch + df_values1 + + df_values1 = ["touching_border"] + joinedlist = df_values + dfch + df_values1 df_insert = df_insert1 - df_insert.columns =joinedlist + df_insert.columns = joinedlist if label_image is None: - #Insert filename as 1st column - df_insert.insert(0, 'intensity_image', int_file_name) + # Insert filename as 1st column + df_insert.insert(0, "intensity_image", int_file_name) return df_insert, title + def labeling_is_blank(label_image): """Check if the label image is trivial (blank, missing, non-informative). - + Args: label_image (ndarray): Labeled image array. - + Returns: True if the labeling is non-informative - + """ - return (label_image.min()==0 and label_image.max()==0) + return label_image.min() == 0 and label_image.max() == 0 + # Setup the argument parsing def main(): logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='Everything you need to start a Feature Extraction plugin.') - parser.add_argument('--features', dest='features', type=str, - help='Features to calculate', required=True) - parser.add_argument('--filePattern', dest='filePattern', type=str, - help='The filepattern used to match files with each other.', required=True) - parser.add_argument('--csvfile', dest='csvfile', type=str, - help='Save csv as separate or single file', required=True) - parser.add_argument('--embeddedpixelsize', dest='embeddedpixelsize', type=str, - help='Embedded pixel size if present', required=False) - parser.add_argument('--pixelsPerunit', dest='pixelsPerunit', type=float, - help='Pixels per unit', required= False) - parser.add_argument('--unitLength', dest='unitLength', type=str, - help='Required units for features extracted', required= False) - parser.add_argument('--intDir', dest='intDir', type=str, - help='Intensity image collection', required=False) - parser.add_argument('--pixelDistance', dest='pixelDistance', type=int, - help='Pixel distance to calculate the neighbors touching cells', required=False) - parser.add_argument('--segDir', dest='segDir', type=str, - help='Segmented image collection', required=False) - parser.add_argument('--outDir', dest='outDir', type=str, - help='Output collection', required=True) + parser = argparse.ArgumentParser( + prog="main", + description="Everything you need to start a Feature Extraction plugin.", + ) + parser.add_argument( + "--features", + dest="features", + type=str, + help="Features to calculate", + required=True, + ) + parser.add_argument( + "--filePattern", + dest="filePattern", + type=str, + help="The filepattern used to match files with each other.", + required=True, + ) + parser.add_argument( + "--csvfile", + dest="csvfile", + type=str, + help="Save csv as separate or single file", + required=True, + ) + parser.add_argument( + "--embeddedpixelsize", + dest="embeddedpixelsize", + type=str, + help="Embedded pixel size if present", + required=False, + ) + parser.add_argument( + "--pixelsPerunit", + dest="pixelsPerunit", + type=float, + help="Pixels per unit", + required=False, + ) + parser.add_argument( + "--unitLength", + dest="unitLength", + type=str, + help="Required units for features extracted", + required=False, + ) + parser.add_argument( + "--intDir", + dest="intDir", + type=str, + help="Intensity image collection", + required=False, + ) + parser.add_argument( + "--pixelDistance", + dest="pixelDistance", + type=int, + help="Pixel distance to calculate the neighbors touching cells", + required=False, + ) + parser.add_argument( + "--segDir", + dest="segDir", + type=str, + help="Segmented image collection", + required=False, + ) + parser.add_argument( + "--outDir", dest="outDir", type=str, help="Output collection", required=True, + ) # Parse the arguments args = parser.parse_args() - #Parse the filepattern + # Parse the filepattern pattern = args.filePattern - logger.info('filePattern = {}'.format(pattern)) + logger.info(f"filePattern = {pattern}") - #List of features to be extracted - features = args.features.split(',') - logger.info('features = {}'.format(features)) + # List of features to be extracted + features = args.features.split(",") + logger.info(f"features = {features}") - #Save the features extracted (as single file for all images or 1 file for each image) in csvfile + # Save the features extracted (as single file for all images or 1 file for each image) in csvfile csvfile = args.csvfile - logger.info('csvfile = {}'.format(csvfile)) + logger.info(f"csvfile = {csvfile}") - #Embedded pixel size if true, get units from metadata + # Embedded pixel size if true, get units from metadata embeddedpixelsize = args.embeddedpixelsize - logger.info('embeddedpixelsize = {}'.format(embeddedpixelsize)) + logger.info(f"embeddedpixelsize = {embeddedpixelsize}") - #Required units for the features extracted + # Required units for the features extracted unitLength = args.unitLength - logger.info('unitLength = {}'.format(unitLength)) + logger.info(f"unitLength = {unitLength}") - #Pixels per unit vaue for the units mentined in unitLength + # Pixels per unit vaue for the units mentined in unitLength pixelsPerunit = args.pixelsPerunit - logger.info('pixels per unit = {}'.format(pixelsPerunit)) + logger.info(f"pixels per unit = {pixelsPerunit}") - #Path to intensity image directory + # Path to intensity image directory intDir = args.intDir - logger.info('intDir = {}'.format(intDir)) + logger.info(f"intDir = {intDir}") - #Pixel distance to calculate neighbors + # Pixel distance to calculate neighbors pixelDistance = args.pixelDistance - logger.info('pixelDistance = {}'.format(pixelDistance)) + logger.info(f"pixelDistance = {pixelDistance}") - #Path to labeled image directory + # Path to labeled image directory segDir = args.segDir - logger.info('segDir = {}'.format(segDir)) + logger.info(f"segDir = {segDir}") - #Path to save output csv files + # Path to save output csv files outDir = args.outDir - logger.info('outDir = {}'.format(outDir)) + logger.info(f"outDir = {outDir}") logger.info("Started") df_csv = pd.DataFrame([]) - - if not segDir and not intDir: - raise ValueError('No input image specified.') - intensity_features = ['mean_intensity','max_intensity','min_intensity','median','mode','skewness','kurtosis','standard_deviation','entropy'] + if not segDir and not intDir: + msg = "No input image specified." + raise ValueError(msg) + + intensity_features = [ + "mean_intensity", + "max_intensity", + "min_intensity", + "median", + "mode", + "skewness", + "kurtosis", + "standard_deviation", + "entropy", + ] if intDir and not segDir: - if 'all' in features: - raise ValueError('No labeled/segmented image specified.') - elif (all(fe not in intensity_features for fe in features)): - raise ValueError('No labeled/segmented image specified.') - elif (any(fe not in intensity_features for fe in features)): - logger.warning('No labeled/segmented image specified.') + if "all" in features: + msg = "No labeled/segmented image specified." + raise ValueError(msg) + elif all(fe not in intensity_features for fe in features): + msg = "No labeled/segmented image specified." + raise ValueError(msg) + elif any(fe not in intensity_features for fe in features): + logger.warning("No labeled/segmented image specified.") features = [i for i in features if i in intensity_features] elif segDir and not intDir: - if 'all' in features: - raise ValueError('No intensity image specified.') + if "all" in features: + msg = "No intensity image specified." + raise ValueError(msg) features = [i for i in features if i in intensity_features] - elif (all(fe in intensity_features for fe in features)): - raise ValueError('No intensity image specified.') - elif (any(fe in intensity_features for fe in features)): - logger.warning('No intensity image specified.') + elif all(fe in intensity_features for fe in features): + msg = "No intensity image specified." + raise ValueError(msg) + elif any(fe in intensity_features for fe in features): + logger.warning("No intensity image specified.") features = [i for i in features if i not in intensity_features] - - #Get list of .ome.tif files in the directory including sub folders for labeled images + + # Get list of .ome.tif files in the directory including sub folders for labeled images # Try to infer a filepattern from the files on disk for faster matching later if segDir: - configfiles_seg = filepattern.FilePattern(segDir,pattern) + configfiles_seg = filepattern.FilePattern(segDir, pattern) files_seg = list(configfiles_seg()) else: label_image = None - - files_int=[] - #Get list of .ome.tif files in the directory including sub folders for intensity images - if intDir: - configfiles_int = filepattern.FilePattern(intDir,pattern) + + files_int = [] + # Get list of .ome.tif files in the directory including sub folders for intensity images + if intDir: + configfiles_int = filepattern.FilePattern(intDir, pattern) files_int = list(configfiles_int()) else: intensity_image = None - #Check for matching filepattern + # Check for matching filepattern if segDir and intDir: - if len(files_seg) == 0 and len(files_int)==0 : - raise ValueError("Could not find files matching filepattern") + if len(files_seg) == 0 and len(files_int) == 0: + msg = "Could not find files matching filepattern" + raise ValueError(msg) elif segDir and not intDir: if len(files_seg) == 0: - raise ValueError("Could not find labeled/segmented image files matching filepattern") - elif intDir and not segDir: - if len(files_int) == 0: - raise ValueError("Could not find intensity image files matching filepattern") - - #Only intensity image as input + msg = "Could not find labeled/segmented image files matching filepattern" + raise ValueError( + msg, + ) + elif intDir and not segDir and len(files_int) == 0: + raise ValueError( + "Could not find intensity image files matching filepattern", + ) + + # Only intensity image as input if not segDir: for intfile in files_int: - df=None - channel=None - intensity_image,img_emb_unit = read(intfile[0]['file']) - int_name = intfile[0]['file'].name - df,title = feature_extraction(features, - embeddedpixelsize, - unitLength, - pixelsPerunit, - pixelDistance, - channel, - intensity_image, - img_emb_unit, - label_image=None, - seg_file_names1=None, - int_file_name=int_name) + df = None + channel = None + intensity_image, img_emb_unit = read(intfile[0]["file"]) + int_name = intfile[0]["file"].name + df, title = feature_extraction( + features, + embeddedpixelsize, + unitLength, + pixelsPerunit, + pixelDistance, + channel, + intensity_image, + img_emb_unit, + label_image=None, + seg_file_names1=None, + int_file_name=int_name, + ) os.chdir(outDir) - if csvfile == 'separatecsv': - logger.info('Saving dataframe to csv for ' + intfile[0]['file'].name) - export_csv = df.to_csv(r'%s.csv'%title, index=None, header=True, encoding='utf-8-sig') + if csvfile == "separatecsv": + logger.info("Saving dataframe to csv for " + intfile[0]["file"].name) + df.to_csv( + r"%s.csv" % title, index=None, header=True, encoding="utf-8-sig", + ) else: df_csv = df_csv.append(df) - elif segDir: - #Run analysis for each labeled image in the list - for img_file in itertools.zip_longest(files_seg,files_int): - label_image,img_emb_unit = read(img_file[0][0]['file']) + elif segDir: + # Run analysis for each labeled image in the list + for img_file in itertools.zip_longest(files_seg, files_int): + label_image, img_emb_unit = read(img_file[0][0]["file"]) - #Skip feature calculation and saving results for an image having trivial/blank/missing segmentation + # Skip feature calculation and saving results for an image having trivial/blank/missing segmentation if labeling_is_blank(label_image): - continue; + continue df = None - files='' - channel='' - #Both intensity and labeled image passed as input + files = "" + channel = "" + # Both intensity and labeled image passed as input if intDir: - #Get matching files - files = configfiles_int.get_matching(**{k.upper():v for k,v in img_file[0][0].items() if k not in ['file','c']}) - if files is not None: - if len(files) == 0 and(all(fe not in intensity_features for fe in features)): - intensity_image=None - - elif len(files) == 0 and(any(fe in intensity_features for fe in features)): - logger.warning(f"Could not find intensity files matching label image, {img_file[0][0]['file'].name}. Skipping...") - if df==None: - continue + # Get matching files + files = configfiles_int.get_matching( + **{ + k.upper(): v + for k, v in img_file[0][0].items() + if k not in ["file", "c"] + }, + ) + if files is not None: + if len(files) == 0 and ( + all(fe not in intensity_features for fe in features) + ): + intensity_image = None + + elif len(files) == 0 and ( + any(fe in intensity_features for fe in features) + ): + logger.warning( + f"Could not find intensity files matching label image, {img_file[0][0]['file'].name}. Skipping...", + ) + if df is None: + continue else: - intensity_image,img_emb_unit = read(files[0]['file']) - int_filename = files[0]['file'].name - - #Check length of files to mention channels in output only when there is more than one channel - if len(files)==1: + intensity_image, img_emb_unit = read(files[0]["file"]) + int_filename = files[0]["file"].name + + # Check length of files to mention channels in output only when there is more than one channel + if len(files) == 1: channel = None for file in files: - if channel != None: - channel=file['c'] - dfc,title = feature_extraction(features, - embeddedpixelsize, - unitLength, - pixelsPerunit, - pixelDistance, - channel, - intensity_image, - img_emb_unit, - label_image, - img_file[0][0]['file'], - int_filename) + if channel is not None: + channel = file["c"] + dfc, title = feature_extraction( + features, + embeddedpixelsize, + unitLength, + pixelsPerunit, + pixelDistance, + channel, + intensity_image, + img_emb_unit, + label_image, + img_file[0][0]["file"], + int_filename, + ) if df is None: df = dfc else: - df = pd.concat([df, dfc.iloc[:,2:]], axis=1,sort=False) + df = pd.concat([df, dfc.iloc[:, 2:]], axis=1, sort=False) - if csvfile == 'singlecsv': + if csvfile == "singlecsv": df_csv = df_csv.append(df) - + else: - if len(files_seg) != len(files_int) : - raise ValueError("Number of labeled/segmented images is not equal to number of intensity images") - #Read intensity image - intensity_image,img_emb_unit = read(img_file[1][0]['file']) - int_file = img_file[1][0]['file'].name - channel=None - - #Dataframe contains the features extracted from images - if not intDir or files==[] or files==None: - channel=None + if len(files_seg) != len(files_int): + msg = "Number of labeled/segmented images is not equal to number of intensity images" + raise ValueError( + msg, + ) + # Read intensity image + intensity_image, img_emb_unit = read(img_file[1][0]["file"]) + int_file = img_file[1][0]["file"].name + channel = None + + # Dataframe contains the features extracted from images + if not intDir or files == [] or files is None: + channel = None int_filename = None - if intDir and files==None: + if intDir and files is None: int_filename = int_file - df,title = feature_extraction(features, - embeddedpixelsize, - unitLength, - pixelsPerunit, - pixelDistance, - channel, - intensity_image, - img_emb_unit, - label_image, - seg_file_names1=img_file[0][0]['file'], - int_file_name=int_filename - ) - - #Save each csv file separately + df, title = feature_extraction( + features, + embeddedpixelsize, + unitLength, + pixelsPerunit, + pixelDistance, + channel, + intensity_image, + img_emb_unit, + label_image, + seg_file_names1=img_file[0][0]["file"], + int_file_name=int_filename, + ) + + # Save each csv file separately os.chdir(outDir) - - if csvfile == 'singlecsv' and (files ==''or files==None or files==[]): - df_csv = df_csv.append(df) - elif csvfile == 'separatecsv': + + if csvfile == "singlecsv" and (files == "" or files is None or files == []): + df_csv = df_csv.append(df) + elif csvfile == "separatecsv": if df.empty: - raise ValueError('No output to save as csv files') + msg = "No output to save as csv files" + raise ValueError(msg) else: - logger.info('Saving dataframe to csv for ' + img_file[0][0]['file'].name) - df = df.loc[:,~df.columns.duplicated()] - if 'touching_border' in df.columns: - last_column = df.pop('touching_border') - df.insert(len(df.columns), 'touching_border', last_column) - export_csv = df.to_csv(r'%s.csv'%title, index=None, header=True, encoding='utf-8-sig') - - #Save values for all images in single csv - if csvfile == 'singlecsv': - if df_csv.empty: - raise ValueError('No output to save as csv files') - else: - logger.info('Saving dataframe to csv file for all images in {}'.format(outDir)) - df_csv.dropna(inplace=True, axis=1, how='all') - df_csv = df_csv.loc[:,~df_csv.columns.duplicated()] - if 'touching_border' in df_csv.columns: - last_column = df_csv.pop('touching_border') - df_csv.insert(len(df_csv.columns), 'touching_border', last_column) - export_csv = df_csv.to_csv(r'Feature_Extraction.csv', index=None, header=True, encoding='utf-8-sig') + logger.info( + "Saving dataframe to csv for " + img_file[0][0]["file"].name, + ) + df = df.loc[:, ~df.columns.duplicated()] + if "touching_border" in df.columns: + last_column = df.pop("touching_border") + df.insert(len(df.columns), "touching_border", last_column) + df.to_csv( + r"%s.csv" % title, index=None, header=True, encoding="utf-8-sig", + ) + + # Save values for all images in single csv + if csvfile == "singlecsv": + if df_csv.empty: + msg = "No output to save as csv files" + raise ValueError(msg) + else: + logger.info( + f"Saving dataframe to csv file for all images in {outDir}", + ) + df_csv.dropna(inplace=True, axis=1, how="all") + df_csv = df_csv.loc[:, ~df_csv.columns.duplicated()] + if "touching_border" in df_csv.columns: + last_column = df_csv.pop("touching_border") + df_csv.insert(len(df_csv.columns), "touching_border", last_column) + df_csv.to_csv( + r"Feature_Extraction.csv", index=None, header=True, encoding="utf-8-sig", + ) + if __name__ == "__main__": main() diff --git a/features/polus-imagenet-model-featurization-plugin/Dockerfile b/features/polus-imagenet-model-featurization-plugin/Dockerfile index 466dfac00..23c680609 100644 --- a/features/polus-imagenet-model-featurization-plugin/Dockerfile +++ b/features/polus-imagenet-model-featurization-plugin/Dockerfile @@ -1,7 +1,7 @@ FROM tensorflow/tensorflow:2.11.0-gpu COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -15,4 +15,4 @@ WORKDIR ${EXEC_DIR} RUN pip3 install "bfio[all]==2.1.9" RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/features/polus-imagenet-model-featurization-plugin/README.md b/features/polus-imagenet-model-featurization-plugin/README.md index 6e7ab3bd5..d3b8f38a2 100644 --- a/features/polus-imagenet-model-featurization-plugin/README.md +++ b/features/polus-imagenet-model-featurization-plugin/README.md @@ -41,4 +41,3 @@ This plugin takes one input argument and one output argument: | `--model` | Pre-trained ImageNet model to use for featurization | Input | enum | | `--resolution` | Resolution to which the input images are scaled | Input | string | | `--outDir` | Output collection | Output | csvCollection | - diff --git a/features/polus-imagenet-model-featurization-plugin/VERSION b/features/polus-imagenet-model-featurization-plugin/VERSION index 7693c96bf..b1e80bb24 100644 --- a/features/polus-imagenet-model-featurization-plugin/VERSION +++ b/features/polus-imagenet-model-featurization-plugin/VERSION @@ -1 +1 @@ -0.1.3 \ No newline at end of file +0.1.3 diff --git a/features/polus-imagenet-model-featurization-plugin/build-docker.sh b/features/polus-imagenet-model-featurization-plugin/build-docker.sh index c0de187e2..61004e005 100755 --- a/features/polus-imagenet-model-featurization-plugin/build-docker.sh +++ b/features/polus-imagenet-model-featurization-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$(=1.19.2 pandas>=1.1.4 tqdm>=4.52.0 -scikit-image==0.17.2 \ No newline at end of file +scikit-image==0.17.2 diff --git a/features/polus-object-spectral-featurization-plugin/Dockerfile b/features/polus-object-spectral-featurization-plugin/Dockerfile index 049a4df5a..c884b8bcb 100644 --- a/features/polus-object-spectral-featurization-plugin/Dockerfile +++ b/features/polus-object-spectral-featurization-plugin/Dockerfile @@ -1,7 +1,7 @@ FROM polusai/bfio:2.1.9 COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -14,4 +14,4 @@ WORKDIR ${EXEC_DIR} RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/features/polus-object-spectral-featurization-plugin/README.md b/features/polus-object-spectral-featurization-plugin/README.md index 6e3311bc8..0b6fcb6b6 100644 --- a/features/polus-object-spectral-featurization-plugin/README.md +++ b/features/polus-object-spectral-featurization-plugin/README.md @@ -1,14 +1,14 @@ # Object Spectral Featurization -This plugin uses [Laplace-Beltrami](https://www.sciencedirect.com/science/article/abs/pii/S0010448509000463) [eigenvalues](https://www.mdpi.com/1999-4893/12/8/171) as shape descriptors for 3D objects. The advantage of these spectral features over traditional ones is that they are isometric, optionally scale invariant, and are robust to noise. +This plugin uses [Laplace-Beltrami](https://www.sciencedirect.com/science/article/abs/pii/S0010448509000463) [eigenvalues](https://www.mdpi.com/1999-4893/12/8/171) as shape descriptors for 3D objects. The advantage of these spectral features over traditional ones is that they are isometric, optionally scale invariant, and are robust to noise. -To use the spectral features plugin, you must specify the number of features you want to compute. Keep in mind that the features are in ordered by length scale, with the 50th capturing finer details compared to the 2nd feature. You also have the ability to specify if you want to calculate shape invariant features. Those are useful if you want to compare the same shapes at different sizes. +To use the spectral features plugin, you must specify the number of features you want to compute. Keep in mind that the features are in ordered by length scale, with the 50th capturing finer details compared to the 2nd feature. You also have the ability to specify if you want to calculate shape invariant features. Those are useful if you want to compare the same shapes at different sizes. -## Known limitations +## Known limitations -The current implementation of spectral featurization works by first meshing the object of interest. This means that the voxels comprising each individual object must fit into memory. Also, because meshes can get quite large and slow down the eigenvalue decomposition, there is an option to decimate the mesh to a fixed upper bound. A good number here is 10,000 or so faces. +The current implementation of spectral featurization works by first meshing the object of interest. This means that the voxels comprising each individual object must fit into memory. Also, because meshes can get quite large and slow down the eigenvalue decomposition, there is an option to decimate the mesh to a fixed upper bound. A good number here is 10,000 or so faces. -Another issue is that in some instances the graph Laplacian might be singular. In that case, this plugin automatically perturbs it and attempts to resolve the problem. Althought this often succeeds, it limit the ability to resolve the smallest eigenvalues which can affect the quality of the features. +Another issue is that in some instances the graph Laplacian might be singular. In that case, this plugin automatically perturbs it and attempts to resolve the problem. Althought this often succeeds, it limit the ability to resolve the smallest eigenvalues which can affect the quality of the features. Finally, because this plugin relies on meshing for feature generation, it currently does not support nested or hierarchical objects. Support for this will be added in the future. diff --git a/features/polus-object-spectral-featurization-plugin/VERSION b/features/polus-object-spectral-featurization-plugin/VERSION index 8294c1843..d917d3e26 100644 --- a/features/polus-object-spectral-featurization-plugin/VERSION +++ b/features/polus-object-spectral-featurization-plugin/VERSION @@ -1 +1 @@ -0.1.2 \ No newline at end of file +0.1.2 diff --git a/features/polus-object-spectral-featurization-plugin/build-docker.sh b/features/polus-object-spectral-featurization-plugin/build-docker.sh index 76f39dbf6..5532b46df 100755 --- a/features/polus-object-spectral-featurization-plugin/build-docker.sh +++ b/features/polus-object-spectral-featurization-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( np.ndarray: - """ Generate spectral features for a mesh, given mesh vertices and faces. - - A triangular mesh, described by vertices and faces, is featurized using the - Laplace-Beltrami eigenvalues of the graph Laplacian of the geometry. The featurization - can be scale invariant if desired. The user can request a specific number of - features. The algorithm attempts to iteratively identify the top `k` non-zero - eigenvalues. If the graph is singular, it will be perturbed, and this may - introduce artifacts in the final result. + vertices: np.ndarray, faces: np.ndarray, k: int = 50, scale_invariant: bool = False, +) -> np.ndarray: + """Generate spectral features for a mesh, given mesh vertices and faces. + + A triangular mesh, described by vertices and faces, is featurized using the + Laplace-Beltrami eigenvalues of the graph Laplacian of the geometry. The featurization + can be scale invariant if desired. The user can request a specific number of + features. The algorithm attempts to iteratively identify the top `k` non-zero + eigenvalues. If the graph is singular, it will be perturbed, and this may + introduce artifacts in the final result. Inputs: vertices - Nx3 Numpy array of mesh vertex coordinates. - faces - Mx3 Numpy array of vertex indices making up mesh faces. + faces - Mx3 Numpy array of vertex indices making up mesh faces. k - Number of requested features. scale_invariant - Specify if features should be scale invariant. Outputs: eigvals - k eigenvalues representing the featurized mesh. """ - i = np.concatenate([faces[:,0], faces[:,0], faces[:,1], faces[:,1], faces[:,2], faces[:,2]]) - j = np.concatenate([faces[:,1], faces[:,2], faces[:,0], faces[:,2], faces[:,0], faces[:,1]]) - - idx = np.c_[i,j] + i = np.concatenate( + [faces[:, 0], faces[:, 0], faces[:, 1], faces[:, 1], faces[:, 2], faces[:, 2]], + ) + j = np.concatenate( + [faces[:, 1], faces[:, 2], faces[:, 0], faces[:, 2], faces[:, 0], faces[:, 1]], + ) + + idx = np.c_[i, j] idx = np.unique(idx, axis=0) - - xi = vertices[idx[:,0]] - xj = vertices[idx[:,1]] - - dij = np.sum((xi - xj)**2, axis=1) - t = 2*dij.max() - - v = np.exp(-dij/t) - W = scipy.sparse.csr_matrix((v, (idx[:,0], idx[:,1]))) + + xi = vertices[idx[:, 0]] + xj = vertices[idx[:, 1]] + + dij = np.sum((xi - xj) ** 2, axis=1) + t = 2 * dij.max() + + v = np.exp(-dij / t) + W = scipy.sparse.csr_matrix((v, (idx[:, 0], idx[:, 1]))) D = scipy.sparse.diags(np.asarray(W.sum(axis=1)).flatten(), 0) L = D - W - - # Make sure we calculate enough eigenvalues. First we calculate some extra - # eigenvalues in case there are many separate components. + + # Make sure we calculate enough eigenvalues. First we calculate some extra + # eigenvalues in case there are many separate components. max_iter = 10 enough_eigvals = False k_orig = k n_singular = 0 etol = 1e-3 while enough_eigvals is not True and max_iter > 0: - try: - # Even though we're after the smallest eigenvalues (which='SM'), we use - # shift-invert mode (sigma=0) and solve for the largest. This is more efficient. + # Even though we're after the smallest eigenvalues (which='SM'), we use + # shift-invert mode (sigma=0) and solve for the largest. This is more efficient. # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.eigsh.html - eigvals = scipy.sparse.linalg.eigsh(L, sigma=0, which='LM', k=k + 10, return_eigenvectors=False) + eigvals = scipy.sparse.linalg.eigsh( + L, sigma=0, which="LM", k=k + 10, return_eigenvectors=False, + ) except RuntimeError: - # Runtime error is most likely due to singularity. Add small delta to matrix and try again. + # Runtime error is most likely due to singularity. Add small delta to matrix and try again. # This will burn through an iteration since we don't want to do this forever. eps = scipy.sparse.diags(np.zeros((W.shape[0],)) + etol, 0) L += eps n_singular += 1 - logger.warning('Graph laplacian likely singular. Perturbing matrix, but this may introduce inaccuracies.') + logger.warning( + "Graph laplacian likely singular. Perturbing matrix, but this may introduce inaccuracies.", + ) max_iter -= 1 continue # Remove zero eigenvalues. - eigvals = eigvals[~np.isclose(eigvals, 0, atol=1e-5 + etol*n_singular)] + eigvals = eigvals[~np.isclose(eigvals, 0, atol=1e-5 + etol * n_singular)] eigvals.sort() - + if len(eigvals) >= k_orig: eigvals = eigvals[:k_orig] enough_eigvals = True else: max_iter -= 1 - k += k - len(eigvals) + 1 # Add one for good measure. + k += k - len(eigvals) + 1 # Add one for good measure. if max_iter <= 0: - logger.error('Could not solve for the desired number of eigenvalues. Please check the number of connected components in your graph.') - + logger.error( + "Could not solve for the desired number of eigenvalues. Please check the number of connected components in your graph.", + ) + if scale_invariant: eigvals /= eigvals[0] @@ -98,19 +107,20 @@ def mesh_spectral_features( def mesh_and_featurize_image( image: BioReader, - chunk_size: Tuple[int, int, int] = (256, 256, 256), - num_features: int = 50, + chunk_size: tuple[int, int, int] = (256, 256, 256), + num_features: int = 50, scale_invariant: bool = False, - limit_mesh_size: int = None) -> Tuple[List[int], np.ndarray]: - """ Mesh and generate spectral features for all ROIs in a 3D image. + limit_mesh_size: Optional[int] = None, +) -> tuple[list[int], np.ndarray]: + """Mesh and generate spectral features for all ROIs in a 3D image. - Image is initially scanned for all ROIs and corresponding bounding boxes. - Each ROI is then loaded in its entirety and meshed. The mesh is then used - to generate spectral features using the graph Laplacian. + Image is initially scanned for all ROIs and corresponding bounding boxes. + Each ROI is then loaded in its entirety and meshed. The mesh is then used + to generate spectral features using the graph Laplacian. Inputs: image - BioReader handle to the image - chunk_size - Size of chunks used for image traversal + chunk_size - Size of chunks used for image traversal num_features - Number of spectral features to calculate scale_invariant - Specify if the calculated features should be scale invariant limit_mesh_size - If specified, the number of faces in generated meshes are limited @@ -119,56 +129,67 @@ def mesh_and_featurize_image( labels - The label IDs of each ROI features - An N x num_features matrix containing the spectral features for each ROI """ - # Store minimum and maximum bounds of every object. min_bounds = {} max_bounds = {} - - # Go through the image in chunks and determine extents of each ROI. + + # Go through the image in chunks and determine extents of each ROI. for y in range(0, image.Y, chunk_size[1]): for x in range(0, image.X, chunk_size[0]): for z in range(0, image.Z, chunk_size[2]): - x_step = np.min([x + chunk_size[0], image.X]) y_step = np.min([y + chunk_size[1], image.Y]) z_step = np.min([z + chunk_size[2], image.Z]) - chunk = np.squeeze(image[y:y_step, x:x_step, z:z_step]) - - labels = np.unique(chunk[chunk > 0]) - + chunk = np.squeeze(image[y:y_step, x:x_step, z:z_step]) + + labels = np.unique(chunk[chunk > 0]) + for label in labels: coords = np.argwhere(chunk == label) - # Add a one pixel padding so long as we're not on a boundary. + # Add a one pixel padding so long as we're not on a boundary. curr_min = np.min(coords, axis=0) + np.array([y, x, z]) - 1 curr_min = np.maximum([0, 0, 0], curr_min) - + curr_max = np.max(coords, axis=0) + np.array([y, x, z]) + 1 curr_max = np.minimum([image.Y, image.X, image.Z], curr_max) - - if label not in min_bounds: + + if label not in min_bounds: min_bounds[label] = curr_min else: - min_bounds[label] = np.min(np.stack([min_bounds[label], curr_min]), axis=0) - + min_bounds[label] = np.min( + np.stack([min_bounds[label], curr_min]), axis=0, + ) + if label not in max_bounds: max_bounds[label] = curr_max else: - max_bounds[label] = np.max(np.stack([max_bounds[label], curr_max]), axis=0) - + max_bounds[label] = np.max( + np.stack([max_bounds[label], curr_max]), axis=0, + ) - # Get subvolume for each ROI and generate mesh. + # Get subvolume for each ROI and generate mesh. # Note: we assume labels in min and max dicts are the same. labels = list(min_bounds.keys()) features = np.zeros((len(labels), num_features)) - for i, (label, min_bounds, max_bounds) in enumerate(zip(labels, min_bounds.values(), max_bounds.values())): - subvol = image[min_bounds[0]:max_bounds[0], min_bounds[1]:max_bounds[1], min_bounds[2]:max_bounds[2]] + for i, (label, min_bounds, max_bounds) in enumerate( + zip(labels, min_bounds.values(), max_bounds.values()), + ): + subvol = image[ + min_bounds[0] : max_bounds[0], + min_bounds[1] : max_bounds[1], + min_bounds[2] : max_bounds[2], + ] subvol = np.squeeze(subvol) - verts, faces, _, _ = measure.marching_cubes((subvol == label).astype(np.uint8), 0, allow_degenerate=False) + verts, faces, _, _ = measure.marching_cubes( + (subvol == label).astype(np.uint8), 0, allow_degenerate=False, + ) - if limit_mesh_size is not None and faces.shape[0] > limit_mesh_size: - mesh_obj = trimesh.Trimesh(verts, faces).simplify_quadratic_decimation(limit_mesh_size) + if limit_mesh_size is not None and faces.shape[0] > limit_mesh_size: + mesh_obj = trimesh.Trimesh(verts, faces).simplify_quadratic_decimation( + limit_mesh_size, + ) mesh_obj.remove_degenerate_faces() mesh_obj.remove_duplicate_faces() mesh_obj.remove_unreferenced_vertices() @@ -178,9 +199,13 @@ def mesh_and_featurize_image( verts = np.asarray(mesh_obj.vertices) faces = np.asarray(mesh_obj.faces) - logger.info(f'Featurizing ROI {label} ({i + 1}/{len(labels)}) with {verts.shape[0]} vertices.') + logger.info( + f"Featurizing ROI {label} ({i + 1}/{len(labels)}) with {verts.shape[0]} vertices.", + ) - feats = mesh_spectral_features(verts, faces, k=num_features, scale_invariant=scale_invariant) + feats = mesh_spectral_features( + verts, faces, k=num_features, scale_invariant=scale_invariant, + ) features[i] = feats return labels, features diff --git a/features/polus-object-spectral-featurization-plugin/src/requirements.txt b/features/polus-object-spectral-featurization-plugin/src/requirements.txt index 41403bb89..979aa5239 100644 --- a/features/polus-object-spectral-featurization-plugin/src/requirements.txt +++ b/features/polus-object-spectral-featurization-plugin/src/requirements.txt @@ -2,4 +2,4 @@ scipy>=1.3.3 scikit-image>=0.17.2 pandas>=1.1.4 trimesh==3.8.19 -open3d>=0.9.0 \ No newline at end of file +open3d>=0.9.0 diff --git a/features/polus-object-spectral-featurization-plugin/tests/featurization_test.py b/features/polus-object-spectral-featurization-plugin/tests/featurization_test.py index fcee7d4b2..fabf9a3fc 100644 --- a/features/polus-object-spectral-featurization-plugin/tests/featurization_test.py +++ b/features/polus-object-spectral-featurization-plugin/tests/featurization_test.py @@ -1,36 +1,41 @@ -import os, sys +import os +import sys import unittest + import numpy as np from bfio import BioReader from skimage import measure - dir_path = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(dir_path, '../src')) +sys.path.append(os.path.join(dir_path, "../src")) import mesh class TestScalableFeaturization(unittest.TestCase): def test_scalable_features(self): - # First, load entire image and calculate features. - bunny_file = os.path.join(dir_path, 'test_data/bunny.ome.tif') + # First, load entire image and calculate features. + bunny_file = os.path.join(dir_path, "test_data/bunny.ome.tif") with BioReader(bunny_file) as br: ref_img = np.squeeze(br[:]) - - verts, faces, _, _ = measure.marching_cubes((ref_img == 255).astype(np.uint8), 0, allow_degenerate=False) - ref_features = mesh.mesh_spectral_features(verts, faces, k=50, scale_invariant=False) - self.assertEquals(len(ref_features), 50) + verts, faces, _, _ = measure.marching_cubes( + (ref_img == 255).astype(np.uint8), 0, allow_degenerate=False, + ) + ref_features = mesh.mesh_spectral_features( + verts, faces, k=50, scale_invariant=False, + ) + + assert len(ref_features) == 50 - # Now calculate features using chunking. + # Now calculate features using chunking. with BioReader(bunny_file) as br: - labels, features = mesh.mesh_and_featurize_image(br, chunk_size=(20, 20, 20), num_features=50, scale_invariant=False) - - self.assertTrue( - np.allclose(ref_features, features.flatten(), atol=1.e-3) - ) - + labels, features = mesh.mesh_and_featurize_image( + br, chunk_size=(20, 20, 20), num_features=50, scale_invariant=False, + ) + + assert np.allclose(ref_features, features.flatten(), atol=0.001) + -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/__main__.py b/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/__main__.py index 3c6812ee6..900fe7100 100644 --- a/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/__main__.py +++ b/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/__main__.py @@ -2,12 +2,13 @@ import json import logging import pathlib -from typing import Any, Optional +from typing import Any +from typing import Optional import filepattern as fp import typer +from polus.images.features.region_segmentation_eval import evaluate from polus.images.features.region_segmentation_eval.evaluate import POLUS_TAB_EXT -from polus.images.features.region_segmentation_eval import evaluate as evaluate # Initialize the logger logging.basicConfig( @@ -59,11 +60,11 @@ def main( ), iou_score: Optional[float] = typer.Option(0.0, "--iouScore", help="IoU theshold"), file_pattern: Optional[str] = typer.Option( - ".+", "--filePattern", help="Filename pattern to filter data." + ".+", "--filePattern", help="Filename pattern to filter data.", ), out_dir: pathlib.Path = typer.Option(..., "--outDir", help="Output collection"), preview: Optional[bool] = typer.Option( - False, "--preview", help="Output a JSON preview of files" + False, "--preview", help="Output a JSON preview of files", ), ) -> None: """Convert bioformat supported image datatypes conversion to ome.tif or ome.zarr file format.""" @@ -125,7 +126,7 @@ def main( total_summary, radius_factor, iou_score, - file_pattern + file_pattern, ) diff --git a/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/evaluate.py b/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/evaluate.py index 751c81679..5a83fb798 100644 --- a/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/evaluate.py +++ b/features/region-segmentation-eval-tool/src/polus/images/features/region_segmentation_eval/evaluate.py @@ -1,11 +1,13 @@ """Region segmentation eval package.""" -import enum import logging import math import os import pathlib +from collections.abc import Sequence from multiprocessing import cpu_count -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any +from typing import Optional +from typing import Union import cv2 import filepattern @@ -87,7 +89,7 @@ ] -def ccl(img: np.ndarray) -> Tuple[np.ndarray, int, float, float]: +def ccl(img: np.ndarray) -> tuple[np.ndarray, int, float, float]: """Run connected component labeling function of opencv on input image. Args: @@ -105,7 +107,7 @@ def ccl(img: np.ndarray) -> Tuple[np.ndarray, int, float, float]: def get_image( - im: np.ndarray, tile_size: int, X: int, Y: int, x_max: int, y_max: int + im: np.ndarray, tile_size: int, X: int, Y: int, x_max: int, y_max: int, ) -> np.ndarray: """Get tiled images based on tile size and set all right and lower border cells to 0. @@ -176,8 +178,8 @@ def metrics(tp: Union[float, int], fp: int, fn: int) -> Sequence[float]: def find_over_under( - dict_result: Dict, data: List[List[Union[str, int, float]]] -) -> Tuple[List[List[Union[str, int, float]]], int, int]: + dict_result: dict, data: list[list[Union[str, int, float]]], +) -> tuple[list[list[Union[str, int, float]]], int, int]: """Find number of over and under segmented cells. Args: @@ -250,7 +252,8 @@ def evaluation( else: radius_factor = 1 else: - raise ValueError("radius_factor not provided") + msg = "radius_factor not provided" + raise ValueError(msg) total_files = 0 result = [] @@ -280,271 +283,238 @@ def evaluation( file_name = file[1][0] tile_grid_size = 1 tile_size = tile_grid_size * 2048 - with BioReader(file_name, max_workers=cpu_count()) as br_pred: - with BioReader( - pathlib.Path(gt_dir / file_name.name), - max_workers=cpu_count(), - ) as br_gt: - logger.info(f"Evaluating image {file_name}") - total_files += 1 - - if individual_summary: - mean_centroid = [0] * (input_classes + 1) - mean_iou = [0] * (input_classes + 1) - - totalCells = [0] * (input_classes + 1) - tp = [0] * (input_classes + 1) - fp = [0] * (input_classes + 1) - fn = [0] * (input_classes + 1) - over_segmented = [0] * (input_classes + 1) - under_segmented = [0] * (input_classes + 1) - for z in range(br_gt.Z): - # Loop across the length of the image - for y in range(0, br_gt.Y, tile_size): - y_max = min([br_gt.Y, y + tile_size]) - for x in range(0, br_gt.X, tile_size): # noqa - x_max = min([br_gt.X, x + tile_size]) - im_gt = np.squeeze( - br_gt[y:y_max, x:x_max, z : z + 1, 0, 0] # noqa - ) - im_pred = np.squeeze( - br_pred[y:y_max, x:x_max, z : z + 1, 0, 0] # noqa - ) + with BioReader(file_name, max_workers=cpu_count()) as br_pred, BioReader( + pathlib.Path(gt_dir / file_name.name), + max_workers=cpu_count(), + ) as br_gt: + logger.info(f"Evaluating image {file_name}") + total_files += 1 + + if individual_summary: + mean_centroid = [0] * (input_classes + 1) + mean_iou = [0] * (input_classes + 1) + + totalCells = [0] * (input_classes + 1) + tp = [0] * (input_classes + 1) + fp = [0] * (input_classes + 1) + fn = [0] * (input_classes + 1) + over_segmented = [0] * (input_classes + 1) + under_segmented = [0] * (input_classes + 1) + for z in range(br_gt.Z): + # Loop across the length of the image + for y in range(0, br_gt.Y, tile_size): + y_max = min([br_gt.Y, y + tile_size]) + for x in range(0, br_gt.X, tile_size): + x_max = min([br_gt.X, x + tile_size]) + im_gt = np.squeeze( + br_gt[y:y_max, x:x_max, z : z + 1, 0, 0] # noqa + ) + im_pred = np.squeeze( + br_pred[y:y_max, x:x_max, z : z + 1, 0, 0] # noqa + ) - if input_classes > 1: - classes = np.unique(im_gt) + classes = np.unique(im_gt) if input_classes > 1 else [1] + for cl in classes: + if len(classes) > 1: + im_pred = np.where(im_pred == cl, cl, 0) + im_gt = np.where(im_gt == cl, cl, 0) + im_pred, _, _, _ = ccl(np.uint8(im_pred)) + im_gt, _, _, _ = ccl(np.uint8(im_gt)) + + im_gt = get_image( + im_gt, + tile_size, + br_gt.X, + br_gt.Y, + x_max, + y_max, + ).astype(int) + im_pred = get_image( + im_pred, + tile_size, + br_pred.X, + br_pred.Y, + x_max, + y_max, + ).astype(int) + props = skimage.measure.regionprops(im_pred) + numLabels_pred = np.unique(im_pred) + + if numLabels_pred[0] != 0: + numLabels_pred = np.insert(numLabels_pred, 0, 0) + centroids_pred = np.zeros((len(numLabels_pred), 2)) + i = 1 + for prop in props: + centroids_pred[i] = prop.centroid[::-1] + i += 1 + + list_matches = [] + props = skimage.measure.regionprops(im_gt) + numLabels_gt = np.unique(im_gt) + + if numLabels_gt[0] != 0: + numLabels_gt = np.insert(numLabels_gt, 0, 0) + centroids_gt = np.zeros((len(numLabels_gt), 2)) + diameters = np.zeros(len(numLabels_gt)) + i = 1 + for prop in props: + centroids_gt[i] = prop.centroid[::-1] + diameters[i] = prop.minor_axis_length + i += 1 + + dict_result: dict[str, Any] = {} + data = [None] * (numLabels_gt.max() + 1) + + if len(centroids_pred) > 4: + numberofNeighbors = 5 else: - classes = [1] - for cl in classes: - if len(classes) > 1: - im_pred = np.where(im_pred == cl, cl, 0) - im_gt = np.where(im_gt == cl, cl, 0) - im_pred, _, _, _ = ccl(np.uint8(im_pred)) - im_gt, _, _, _ = ccl(np.uint8(im_gt)) - - im_gt = get_image( - im_gt, - tile_size, - br_gt.X, - br_gt.Y, - x_max, - y_max, - ).astype(int) - im_pred = get_image( - im_pred, - tile_size, - br_pred.X, - br_pred.Y, - x_max, - y_max, - ).astype(int) - props = skimage.measure.regionprops(im_pred) - numLabels_pred = np.unique(im_pred) - - if numLabels_pred[0] != 0: - numLabels_pred = np.insert(numLabels_pred, 0, 0) - centroids_pred = np.zeros((len(numLabels_pred), 2)) - i = 1 - for prop in props: - centroids_pred[i] = prop.centroid[::-1] - i += 1 - - list_matches = [] - props = skimage.measure.regionprops(im_gt) - numLabels_gt = np.unique(im_gt) - - if numLabels_gt[0] != 0: - numLabels_gt = np.insert(numLabels_gt, 0, 0) - centroids_gt = np.zeros((len(numLabels_gt), 2)) - diameters = np.zeros(len(numLabels_gt)) - i = 1 - for prop in props: - centroids_gt[i] = prop.centroid[::-1] - diameters[i] = prop.minor_axis_length - i += 1 - - dict_result: dict[str, Any] = {} - data = [None] * (numLabels_gt.max() + 1) - - if len(centroids_pred) > 4: - numberofNeighbors = 5 - else: - numberofNeighbors = len(centroids_pred) - nbrs = NearestNeighbors( - n_neighbors=numberofNeighbors, - algorithm="ball_tree", - ).fit(centroids_pred) - for i in range(1, len(centroids_gt)): - distance, index = nbrs.kneighbors( - np.array([centroids_gt[i]]) - ) - index = index.flatten() - componentMask_gt = ( - im_gt == numLabels_gt[i] - ).astype("uint8") * 1 - dict_result.setdefault(numLabels_gt[i], []) - for idx in index: - componentMask_pred_ = ( - im_pred == numLabels_pred[idx] - ).astype("uint8") * 1 - if ( - componentMask_pred_ > 0 - ).sum() > 2 and idx != 0: - if ( - componentMask_gt[ - int(centroids_pred[idx][1]), - int(centroids_pred[idx][0]), - ] - == 1 - or componentMask_pred_[ - int(centroids_gt[i][1]), - int(centroids_gt[i][0]), - ] - == 1 - ): - dict_result[numLabels_gt[i]].append( - numLabels_pred[idx] - ) - - for i in range(1, len(centroids_gt)): - distance, index = nbrs.kneighbors( - np.array([centroids_gt[i]]) - ) - index = index.flatten() - componentMask_gt = ( - im_gt == numLabels_gt[i] - ).astype("uint8") * 1 - match = index[0] - dis = distance.flatten()[0] - componentMask_pred = ( - im_pred == numLabels_pred[match] + numberofNeighbors = len(centroids_pred) + nbrs = NearestNeighbors( + n_neighbors=numberofNeighbors, + algorithm="ball_tree", + ).fit(centroids_pred) + for i in range(1, len(centroids_gt)): + distance, index = nbrs.kneighbors( + np.array([centroids_gt[i]]), + ) + index = index.flatten() + componentMask_gt = ( + im_gt == numLabels_gt[i] + ).astype("uint8") * 1 + dict_result.setdefault(numLabels_gt[i], []) + for idx in index: + componentMask_pred_ = ( + im_pred == numLabels_pred[idx] ).astype("uint8") * 1 - - intersection = np.logical_and( - componentMask_pred, componentMask_gt - ) - union = np.logical_or( - componentMask_pred, componentMask_gt - ) - iou_score_cell = np.sum(intersection) / np.sum( - union - ) if ( - dis < (diameters[i] / 2) * radius_factor - and match not in list_matches - and iou_score_cell > iou_score + componentMask_pred_ > 0 + ).sum() > 2 and idx != 0 and ( + componentMask_gt[ + int(centroids_pred[idx][1]), + int(centroids_pred[idx][0]), + ] + == 1 + or componentMask_pred_[ + int(centroids_gt[i][1]), + int(centroids_gt[i][0]), + ] + == 1 ): - tp[cl] += 1 - list_matches.append(match) - condition = "TP" - centroids_pred[match] = [0.0, 0.0] - totalCells[cl] += 1 - else: - fn[cl] += 1 - condition = "FN" - - if condition == "TP" and individual_summary: - mean_centroid[cl] += dis - mean_iou[cl] += iou_score_cell - - data[numLabels_gt[i]] = [ - dis, - cl, - iou_score_cell, - numLabels_gt[i], - dict_result.get(numLabels_gt[i]), - condition, # type: ignore - ] - - ( - data, - over_segmented_, - under_segmented_, - ) = find_over_under( # type: ignore - dict_result, data # type: ignore - ) - - over_segmented[cl] += over_segmented_ - under_segmented[cl] += under_segmented_ - - if individual_data: - ind_data: List[Any] = [] - for i in range(0, numLabels_gt.max() + 1): - if data[i] is not None: - ind_data.append(data[i]) - df_ind_data = pd.DataFrame(ind_data) - if df_ind_data.shape[1] == 6: - df_ind_data = pd.DataFrame( - ind_data, - columns=header_individual_data[:-1], - ) - else: - df_ind_data = pd.DataFrame( - ind_data, - columns=header_individual_data, + dict_result[numLabels_gt[i]].append( + numLabels_pred[idx], ) - vf_ind_data = vaex.from_pandas(df_ind_data) - outname_ind_data = pathlib.Path( - out_dir, - f"cells_{file_name.name}{POLUS_TAB_EXT}", + for i in range(1, len(centroids_gt)): + distance, index = nbrs.kneighbors( + np.array([centroids_gt[i]]), + ) + index = index.flatten() + componentMask_gt = ( + im_gt == numLabels_gt[i] + ).astype("uint8") * 1 + match = index[0] + dis = distance.flatten()[0] + componentMask_pred = ( + im_pred == numLabels_pred[match] + ).astype("uint8") * 1 + + intersection = np.logical_and( + componentMask_pred, componentMask_gt, + ) + union = np.logical_or( + componentMask_pred, componentMask_gt, + ) + iou_score_cell = np.sum(intersection) / np.sum( + union, + ) + if ( + dis < (diameters[i] / 2) * radius_factor + and match not in list_matches + and iou_score_cell > iou_score + ): + tp[cl] += 1 + list_matches.append(match) + condition = "TP" + centroids_pred[match] = [0.0, 0.0] + totalCells[cl] += 1 + else: + fn[cl] += 1 + condition = "FN" + + if condition == "TP" and individual_summary: + mean_centroid[cl] += dis + mean_iou[cl] += iou_score_cell + + data[numLabels_gt[i]] = [ + dis, + cl, + iou_score_cell, + numLabels_gt[i], + dict_result.get(numLabels_gt[i]), + condition, # type: ignore + ] + + ( + data, + over_segmented_, + under_segmented_, + ) = find_over_under( # type: ignore + dict_result, data, # type: ignore + ) + + over_segmented[cl] += over_segmented_ + under_segmented[cl] += under_segmented_ + + if individual_data: + ind_data: list[Any] = [] + for i in range(0, numLabels_gt.max() + 1): + if data[i] is not None: + ind_data.append(data[i]) + df_ind_data = pd.DataFrame(ind_data) + if df_ind_data.shape[1] == 6: + df_ind_data = pd.DataFrame( + ind_data, + columns=header_individual_data[:-1], ) - if f"{POLUS_TAB_EXT}" in [ - ".feather", - ".arrow", - ]: - vf_ind_data.export_feather(outname_ind_data) - else: - vf_ind_data.export_csv( - path=outname_ind_data, - chunk_size=chunk_size, - ) - logger.info( - f"cells_{file_name.name}{POLUS_TAB_EXT}" + else: + df_ind_data = pd.DataFrame( + ind_data, + columns=header_individual_data, ) - for i in range(1, len(centroids_pred)): - if ( - centroids_pred[i][0] != 0.0 - and centroids_pred[i][1] != 0.0 - ): - componentMask_pred = ( - im_pred == numLabels_pred[i] - ).astype("uint8") * 1 - if (componentMask_pred > 0).sum() > 2: - fp[cl] += 1 + vf_ind_data = vaex.from_pandas(df_ind_data) + outname_ind_data = pathlib.Path( + out_dir, + f"cells_{file_name.name}{POLUS_TAB_EXT}", + ) + if f"{POLUS_TAB_EXT}" in [ + ".feather", + ".arrow", + ]: + vf_ind_data.export_feather(outname_ind_data) + else: + vf_ind_data.export_csv( + path=outname_ind_data, + chunk_size=chunk_size, + ) + logger.info( + f"cells_{file_name.name}{POLUS_TAB_EXT}", + ) - for cl in range(1, input_classes + 1): - if tp[cl] == 0: - ( - iou, - tpr, - precision, - fnr, - fdr, - fscore, - f1_score, - fmi, - ) = metrics(1e-20, fp[cl], fn[cl]) - else: - ( - iou, - tpr, - precision, - fnr, - fdr, - fscore, - f1_score, - fmi, - ) = metrics(tp[cl], fp[cl], fn[cl]) - data_result = [ - file_name.name, - cl, - tp[cl], - fp[cl], - fn[cl], - over_segmented[cl], - under_segmented[cl], + for i in range(1, len(centroids_pred)): + if ( + centroids_pred[i][0] != 0.0 + and centroids_pred[i][1] != 0.0 + ): + componentMask_pred = ( + im_pred == numLabels_pred[i] + ).astype("uint8") * 1 + if (componentMask_pred > 0).sum() > 2: + fp[cl] += 1 + + for cl in range(1, input_classes + 1): + if tp[cl] == 0: + ( iou, tpr, precision, @@ -553,72 +523,100 @@ def evaluation( fscore, f1_score, fmi, - ] - - result.append(data_result) - df_result = pd.DataFrame(result, columns=header) - vf_result = vaex.from_pandas(df_result) - filename = pathlib.Path(out_dir, f"result{POLUS_TAB_EXT}") - if f"{POLUS_TAB_EXT}" in [".feather", ".arrow"]: - vf_result.export_feather(filename) + ) = metrics(1e-20, fp[cl], fn[cl]) + else: + ( + iou, + tpr, + precision, + fnr, + fdr, + fscore, + f1_score, + fmi, + ) = metrics(tp[cl], fp[cl], fn[cl]) + data_result = [ + file_name.name, + cl, + tp[cl], + fp[cl], + fn[cl], + over_segmented[cl], + under_segmented[cl], + iou, + tpr, + precision, + fnr, + fdr, + fscore, + f1_score, + fmi, + ] + + result.append(data_result) + df_result = pd.DataFrame(result, columns=header) + vf_result = vaex.from_pandas(df_result) + filename = pathlib.Path(out_dir, f"result{POLUS_TAB_EXT}") + if f"{POLUS_TAB_EXT}" in [".feather", ".arrow"]: + vf_result.export_feather(filename) + else: + vf_result.export_csv(path=filename, chunk_size=chunk_size) + logger.info(f"Saving result{POLUS_TAB_EXT}") + + if total_summary: + total_iou[cl] += iou + total_tpr[cl] += tpr + total_precision[cl] += precision + total_fnr[cl] += fnr + total_fdr[cl] += fdr + total_fscore[cl] += fscore + total_f1_score[cl] += f1_score + total_fmi[cl] += fmi + + if total_stats: + TP[cl] += tp[cl] + FP[cl] += fp[cl] + FN[cl] += fn[cl] + total_over_segmented[cl] += over_segmented[cl] + total_under_segmented[cl] += under_segmented[cl] + + if individual_summary: + for cl in range(1, input_classes + 1): + if totalCells[cl] == 0: + data_individualSummary = [ + file_name.name, + cl, + 0, + 0, + ] + ind_sum.append(data_individualSummary) else: - vf_result.export_csv(path=filename, chunk_size=chunk_size) - logger.info(f"Saving result{POLUS_TAB_EXT}") - - if total_summary: - total_iou[cl] += iou - total_tpr[cl] += tpr - total_precision[cl] += precision - total_fnr[cl] += fnr - total_fdr[cl] += fdr - total_fscore[cl] += fscore - total_f1_score[cl] += f1_score - total_fmi[cl] += fmi - - if total_stats: - TP[cl] += tp[cl] - FP[cl] += fp[cl] - FN[cl] += fn[cl] - total_over_segmented[cl] += over_segmented[cl] - total_under_segmented[cl] += under_segmented[cl] - - if individual_summary: - for cl in range(1, input_classes + 1): - if totalCells[cl] == 0: - data_individualSummary = [ - file_name.name, - cl, - 0, - 0, - ] - ind_sum.append(data_individualSummary) - else: - mean_centroid[cl] = mean_centroid[cl] / totalCells[cl] - mean_iou[cl] = mean_iou[cl] / totalCells[cl] - data_individualSummary = [ - file_name.name, - cl, - mean_centroid[cl], - mean_iou[cl], - ] - ind_sum.append(data_individualSummary) - df_ind_sum = pd.DataFrame( - ind_sum, columns=header_individual_summary + mean_centroid[cl] = mean_centroid[cl] / totalCells[cl] + mean_iou[cl] = mean_iou[cl] / totalCells[cl] + data_individualSummary = [ + file_name.name, + cl, + mean_centroid[cl], + mean_iou[cl], + ] + ind_sum.append(data_individualSummary) + df_ind_sum = pd.DataFrame( + ind_sum, columns=header_individual_summary, + ) + vf_ind_sum = vaex.from_pandas(df_ind_sum) + outname_individualSummary = pathlib.Path( + out_dir, f"individual_image_summary{POLUS_TAB_EXT}", + ) + if f"{POLUS_TAB_EXT}" in [".feather", ".arrow"]: + vf_ind_sum.export_feather(outname_individualSummary) + logger.info( + f"Saving individual_image_summary{POLUS_TAB_EXT}", ) - vf_ind_sum = vaex.from_pandas(df_ind_sum) - outname_individualSummary = pathlib.Path( - out_dir, f"individual_image_summary{POLUS_TAB_EXT}" + else: + vf_ind_sum.export_csv( + path=outname_individualSummary, + chunk_size=chunk_size, ) - if f"{POLUS_TAB_EXT}" in [".feather", ".arrow"]: - vf_ind_sum.export_feather(outname_individualSummary) - logger.info( - f"Saving individual_image_summary{POLUS_TAB_EXT}" - ) - else: - vf_ind_sum.export_csv( - path=outname_individualSummary, - chunk_size=chunk_size, - ) if total_summary and total_files != 0: for cl in range(1, input_classes + 1): @@ -696,7 +694,7 @@ def evaluation( df_total_stats.columns = header_total_stats vf_total_stats = vaex.from_pandas(df_total_stats) overall_file = pathlib.Path( - out_dir, f"total_stats_result{POLUS_TAB_EXT}" + out_dir, f"total_stats_result{POLUS_TAB_EXT}", ) if f"{POLUS_TAB_EXT}" in [".feather", ".arrow"]: vf_total_stats.export_feather(overall_file) diff --git a/features/rt-cetsa-intensity-extraction-tool/build-docker.sh b/features/rt-cetsa-intensity-extraction-tool/build-docker.sh index f1604762b..92d1121a7 100755 --- a/features/rt-cetsa-intensity-extraction-tool/build-docker.sh +++ b/features/rt-cetsa-intensity-extraction-tool/build-docker.sh @@ -36,4 +36,4 @@ echo "build docker image : $build_cmd" docker $build_cmd # # # clean up staging directory -rm -rf ${repo_root}/docker_build \ No newline at end of file +rm -rf ${repo_root}/docker_build diff --git a/formats/polus-czi-extract-plugin/Dockerfile b/formats/polus-czi-extract-plugin/Dockerfile index 377ec4c13..56a7456af 100644 --- a/formats/polus-czi-extract-plugin/Dockerfile +++ b/formats/polus-czi-extract-plugin/Dockerfile @@ -15,4 +15,4 @@ COPY src ${EXEC_DIR}/ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir # Default command. Additional arguments are provided through the command line -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/formats/polus-czi-extract-plugin/VERSION b/formats/polus-czi-extract-plugin/VERSION index 8cfbc905b..524cb5524 100644 --- a/formats/polus-czi-extract-plugin/VERSION +++ b/formats/polus-czi-extract-plugin/VERSION @@ -1 +1 @@ -1.1.1 \ No newline at end of file +1.1.1 diff --git a/formats/polus-czi-extract-plugin/build-docker.sh b/formats/polus-czi-extract-plugin/build-docker.sh index b8ce5f75f..f236c611a 100755 --- a/formats/polus-czi-extract-plugin/build-docker.sh +++ b/formats/polus-czi-extract-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: - - logger.info('Extracting tiffs and saving as ome.tif...') - files = [f for f in Path(input_dir).iterdir() if f.suffix=='.czi'] + + write_thread(out_file_path, data, metadata, chan_names[C]) + + +def main(input_dir: Path, output_dir: Path) -> None: + logger.info("Extracting tiffs and saving as ome.tif...") + files = [f for f in Path(input_dir).iterdir() if f.suffix == ".czi"] if not files: - logger.error('No CZI files found.') - raise ValueError('No CZI files found.') - + logger.error("No CZI files found.") + msg = "No CZI files found." + raise ValueError(msg) + ProcessManager.init_processes() - + for file in files: - ProcessManager.submit_process(extract_fovs,file,output_dir) - + ProcessManager.submit_process(extract_fovs, file, output_dir) + ProcessManager.join_processes() + if __name__ == "__main__": # Setup the Argument parsing logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='Extract individual fields of view from a czi file.') - - parser.add_argument('--inpDir', dest='input_dir', type=str, - help='Path to folder with CZI files', required=True) - parser.add_argument('--outDir', dest='output_dir', type=str, - help='The output directory for ome.tif files', required=True) - + parser = argparse.ArgumentParser( + prog="main", description="Extract individual fields of view from a czi file.", + ) + + parser.add_argument( + "--inpDir", + dest="input_dir", + type=str, + help="Path to folder with CZI files", + required=True, + ) + parser.add_argument( + "--outDir", + dest="output_dir", + type=str, + help="The output directory for ome.tif files", + required=True, + ) args = parser.parse_args() input_dir = Path(args.input_dir) output_dir = Path(args.output_dir) - logger.info('input_dir = {}'.format(input_dir)) - logger.info('output_dir = {}'.format(output_dir)) - - main(input_dir, - output_dir) \ No newline at end of file + logger.info(f"input_dir = {input_dir}") + logger.info(f"output_dir = {output_dir}") + + main(input_dir, output_dir) diff --git a/formats/polus-czi-extract-plugin/src/requirements.txt b/formats/polus-czi-extract-plugin/src/requirements.txt index ed31f8806..cd6a52d58 100644 --- a/formats/polus-czi-extract-plugin/src/requirements.txt +++ b/formats/polus-czi-extract-plugin/src/requirements.txt @@ -1,2 +1,2 @@ czifile==2019.7.2 -preadator==0.2.0 \ No newline at end of file +preadator==0.2.0 diff --git a/formats/polus-imaris-parser-plugin/Dockerfile b/formats/polus-imaris-parser-plugin/Dockerfile index 0306aa237..c884b8bcb 100644 --- a/formats/polus-imaris-parser-plugin/Dockerfile +++ b/formats/polus-imaris-parser-plugin/Dockerfile @@ -14,4 +14,4 @@ WORKDIR ${EXEC_DIR} RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/formats/polus-imaris-parser-plugin/README.md b/formats/polus-imaris-parser-plugin/README.md index 4d6d5a833..f43e9cb7f 100644 --- a/formats/polus-imaris-parser-plugin/README.md +++ b/formats/polus-imaris-parser-plugin/README.md @@ -1,13 +1,13 @@ # Imaris Parser -This WIPP plugin automatically extracts tracking statistics from the metadata of Imaris .ims files and organizes the data into a .csv format. This cleaned format enables quick visualization of statistical features in plotting software tools such as FlowJo and WIPP Plots as well as reduces the time spent on the manual formatting of Imaris files. +This WIPP plugin automatically extracts tracking statistics from the metadata of Imaris .ims files and organizes the data into a .csv format. This cleaned format enables quick visualization of statistical features in plotting software tools such as FlowJo and WIPP Plots as well as reduces the time spent on the manual formatting of Imaris files. It also outputs summary statistics as an .xlsx file, formatted similar to the Overall.csv file exported by the Imaris application. ## Run the script 1. Add Imaris .ims files to an image collection in WIPP. -2. Build a workflow in WIPP using the created image collection and the Imaris Parser Plugin. +2. Build a workflow in WIPP using the created image collection and the Imaris Parser Plugin. 3. Upon execution of the workflow, the Imaris file is read and needed data is extracted and stored in temporary csv files. Track ID and Object ID data get linked together and also stored in temporary csv files. Last, data within all temporary csv files is linked to produce the final output: a csv file for each channel and an xlsx file for the overall summary statistics. After the code runs completely, a complete message is logged, and the data is stored in both a csv collection and metadata collection in WIPP. Contact [Melanie Parham](mailto:melanie.parham@nih.gov) for more information. @@ -32,4 +32,3 @@ This plugin takes one input argument and one output argument: | `--inpdir` | Input image collection to be processed by this plugin | Input | collection | | `--metaoutdir` | Metadata directory that stores overall data | Output | collection | | `--outdir` | Output collection | Output | csvCollection | - diff --git a/formats/polus-imaris-parser-plugin/VERSION b/formats/polus-imaris-parser-plugin/VERSION index 87a087111..1c09c74e2 100644 --- a/formats/polus-imaris-parser-plugin/VERSION +++ b/formats/polus-imaris-parser-plugin/VERSION @@ -1 +1 @@ -0.3.3 \ No newline at end of file +0.3.3 diff --git a/formats/polus-imaris-parser-plugin/build-docker.sh b/formats/polus-imaris-parser-plugin/build-docker.sh index 5d2956cd0..1c8f739d9 100644 --- a/formats/polus-imaris-parser-plugin/build-docker.sh +++ b/formats/polus-imaris-parser-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: """Open .ims file for reading. Args: ims_filename (:obj:`str`): Selected .ims filename dir_name (:obj:`str`): Output csv collection """ - self.ims_filename = ims_filename self.dir_name = dir_name - self.f = h5py.File(self.ims_filename, 'r') + self.f = h5py.File(self.ims_filename, "r") #: Set up the logger logging.basicConfig( - format='%(asctime)s-%(name)s-%(levelname)s-%(message)s', - datefmt='%b-%d-%y %H:%M:%S') + format="%(asctime)s-%(name)s-%(levelname)s-%(message)s", + datefmt="%b-%d-%y %H:%M:%S", + ) self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) - + def get_factor(self, h5py_file, chan_name): """Save Factor data to dataframes. - - The h5py_file is a file object that serves as entry point into - Imaris file. The chan_name variable represents each attribute - name, located in the keys of the ``Scene8/Content`` folder in - the .ims file. Surface channel types are located in attributes - prefixed with 'MegaSurfaces' and Spot channel types are in - attributes prefixed with 'Points'. Within each chan_name might - be a ``Factor`` attribute. This function returns a Pandas - dataframe containing required data within the ``Factor`` - attribute of the Imaris metadata. Specifically, this opens the - ims file and places the contents of the attribute - ``Scene8/Content/.../Factor`` into a dataframe. - If ``Factor`` does not exist, an empty dataframe for factor is - created. The Factor df is rearranged so that the first column - ('ID List' becomes the index and items in the 'Name' column + + The h5py_file is a file object that serves as entry point into + Imaris file. The chan_name variable represents each attribute + name, located in the keys of the ``Scene8/Content`` folder in + the .ims file. Surface channel types are located in attributes + prefixed with 'MegaSurfaces' and Spot channel types are in + attributes prefixed with 'Points'. Within each chan_name might + be a ``Factor`` attribute. This function returns a Pandas + dataframe containing required data within the ``Factor`` + attribute of the Imaris metadata. Specifically, this opens the + ims file and places the contents of the attribute + ``Scene8/Content/.../Factor`` into a dataframe. + If ``Factor`` does not exist, an empty dataframe for factor is + created. The Factor df is rearranged so that the first column + ('ID List' becomes the index and items in the 'Name' column (Collection, Channel, Image, Overall) become new columns. Args: @@ -66,17 +62,17 @@ def get_factor(self, h5py_file, chan_name): Pandas dataframe containing ``Factor`` data """ #: Navigate to the Factor attribute of the hdf5 file - factors = h5py_file['Scene8']['Content'][chan_name]['Factor'][()] - + factors = h5py_file["Scene8"]["Content"][chan_name]["Factor"][()] + #: Create df of Factor data - factor_cols = ['ID_List', 'Name', 'Level'] - df_factor = pd.DataFrame.from_records(factors, columns=factor_cols) - + factor_cols = ["ID_List", "Name", "Level"] + df_factor = pd.DataFrame.from_records(factors, columns=factor_cols) + #: Create empty factor df if Factor data not in channel - if df_factor.empty: + if df_factor.empty: df_factor = pd.DataFrame( - [[0, 'Overall', 'Overall']], - columns = ['ID_List', 'Name', 'Level']) + [[0, "Overall", "Overall"]], columns=["ID_List", "Name", "Level"], + ) df_factor.index = df_factor.index + 1 #: Create empty final row in df_factor if factor df has data @@ -84,91 +80,97 @@ def get_factor(self, h5py_file, chan_name): df_factor.loc[-1] = [0, None, None] df_factor.index = df_factor.index + 1 df_factor = df_factor.sort_index() - + # Pivot w/ ID as index to make image/channel/collection new cols - df_factor = df_factor.pivot(index='ID_List', columns='Name')['Level'] + df_factor = df_factor.pivot(index="ID_List", columns="Name")["Level"] #: Drop empty column created by pivoting - df_factor.dropna(axis=1, how='all', inplace=True) + df_factor.dropna(axis=1, how="all", inplace=True) #: Add 'Image', 'Channel', or 'Collection' col if not in Factor - if str.encode('Image') not in df_factor: - df_factor[str.encode('Image')] = None - if str.encode('Channel') not in df_factor: - df_factor[str.encode('Channel')] = None - if str.encode('Collection') not in df_factor: - df_factor[str.encode('Collection')] = None + if str.encode("Image") not in df_factor: + df_factor[str.encode("Image")] = None + if str.encode("Channel") not in df_factor: + df_factor[str.encode("Channel")] = None + if str.encode("Collection") not in df_factor: + df_factor[str.encode("Collection")] = None return df_factor - + def get_statisticstype(self, h5py_file, chan): """Combine ``StatisticsType`` and ``Category`` and return as df. - This function reads the attributes 'Category' and - 'StatisticsType' of the .ims file. Then, it associates feature - information, which is stored in the StatisticsType attribute, - with data in the Category attribute, which contains info data - types (Surface, Track, or Overall). These data types are useful - for later understanding which values should be stored in + This function reads the attributes 'Category' and + 'StatisticsType' of the .ims file. Then, it associates feature + information, which is stored in the StatisticsType attribute, + with data in the Category attribute, which contains info data + types (Surface, Track, or Overall). These data types are useful + for later understanding which values should be stored in Overall.xlsx. Args: h5py_file: Opened .ims file object chan (:obj:`str`): attribute name in ``Scene8/Content`` - + Returns: Pandas df with ``StatisticsType`` and ``Category`` data """ - stat_types=h5py_file['Scene8']['Content'][chan]['StatisticsType'][()] - + stat_types = h5py_file["Scene8"]["Content"][chan]["StatisticsType"][()] + df_statistics_type = pd.DataFrame.from_records( - stat_types, - columns=['ID', 'ID_Category', 'ID_FactorList', 'Name', 'Unit']) + stat_types, columns=["ID", "ID_Category", "ID_FactorList", "Name", "Unit"], + ) df_statistics_type.drop_duplicates( - ['ID', 'ID_Category', 'ID_FactorList', 'Name', 'Unit'], - keep='first', inplace=True) - - df_statistics_type.sort_values( - by=['Name'], ascending=True, inplace=True) + ["ID", "ID_Category", "ID_FactorList", "Name", "Unit"], + keep="first", + inplace=True, + ) + + df_statistics_type.sort_values(by=["Name"], ascending=True, inplace=True) #: Store 'Category' in df (indicates Surface, Track, or Overall) - categories = h5py_file['Scene8']['Content'][chan]['Category'][()] + categories = h5py_file["Scene8"]["Content"][chan]["Category"][()] df_category = pd.DataFrame.from_records( - categories, columns=['ID', 'CategoryName', 'Name']) - + categories, columns=["ID", "CategoryName", "Name"], + ) + #: 'Name' and 'CategoryName' columns are copies in the file df_category.rename( - columns={'ID':'catID', 'Name':'redundant_cat_name'}, - inplace=True) - + columns={"ID": "catID", "Name": "redundant_cat_name"}, inplace=True, + ) + #: Associate category names to features (in statistics_type) df_statistics_type = pd.merge( - df_category, df_statistics_type, - left_on = 'catID', right_on = 'ID_Category', how = 'outer') - + df_category, + df_statistics_type, + left_on="catID", + right_on="ID_Category", + how="outer", + ) + #: Remove unneeded cols df_statistics_type.drop( - ['catID', 'redundant_cat_name', 'ID_Category'] , - axis=1, inplace=True) + ["catID", "redundant_cat_name", "ID_Category"], axis=1, inplace=True, + ) return df_statistics_type def convert_byte_to_string_and_format(self, f_st): """Clean/decode/format ``Factor``/``StatisticsType``/``Cat``. - - This function formats a merged dataframe containing ``Factor``, - ``StatisticsType``, and ``Category`` data by decoding byte data, - replacing special characters, and joining data from `Channel`, - `Image`, `Unit`, and `Name` columns into a single column. It - takes in input_df as an argument, which is a dataframe - containing data from the ``Factor``, ``Category``, and - ``StatisticsType`` attributes of the .ims file. It adds + + This function formats a merged dataframe containing ``Factor``, + ``StatisticsType``, and ``Category`` data by decoding byte data, + replacing special characters, and joining data from `Channel`, + `Image`, `Unit`, and `Name` columns into a single column. It + takes in input_df as an argument, which is a dataframe + containing data from the ``Factor``, ``Category``, and + ``StatisticsType`` attributes of the .ims file. It adds substrings that enable the data to be readable when the columns - are combined in a later step. It returns a dataframe containing - ``Factor`` and ``StatisticsType`` data with UTF-8 formatting, - no special characters, no excess columns, and an updated `Name` - column that includes units of measurement, channel and image + are combined in a later step. It returns a dataframe containing + ``Factor`` and ``StatisticsType`` data with UTF-8 formatting, + no special characters, no excess columns, and an updated `Name` + column that includes units of measurement, channel and image information. Args: @@ -177,60 +179,68 @@ def convert_byte_to_string_and_format(self, f_st): Returns: Pandas df with cleaned feature and data type information """ - #: Convert cols from Factors/StatisticsType df to string - f_st[str.encode('Channel')]=f_st[str.encode('Channel')].str.decode( - 'utf-8') + f_st[str.encode("Channel")] = f_st[str.encode("Channel")].str.decode("utf-8") + + f_st[str.encode("Image")] = f_st[str.encode("Image")].str.decode("utf-8") - f_st[str.encode('Image')]=f_st[str.encode('Image')].str.decode('utf-8') - #: Convert b'Channel' col from byte to string; prepend _Channel_ - f_st[str.encode('Channel')]='_Channel_'+f_st[str.encode('Channel')] + f_st[str.encode("Channel")] = "_Channel_" + f_st[str.encode("Channel")] #: Add substrings so cols have separators when combined later - f_st[str.encode('Image')] = "_" + f_st[str.encode('Image')] - f_st['Name'] = f_st['Name'].str.decode("utf-8") - f_st['Unit'] = "_" + f_st['Unit'].str.decode("utf-8") + f_st[str.encode("Image")] = "_" + f_st[str.encode("Image")] + f_st["Name"] = f_st["Name"].str.decode("utf-8") + f_st["Unit"] = "_" + f_st["Unit"].str.decode("utf-8") #: Convert np.nan to empty string to combine feature names/units - f_st.replace(np.nan, '', regex=True, inplace=True) + f_st.replace(np.nan, "", regex=True, inplace=True) #: Append unit, channel, and image information to 'Name' column - f_st['Name']=f_st['Name']+f_st['Unit']+f_st[str.encode( - 'Image')]+f_st[str.encode('Channel')] + f_st["Name"] = ( + f_st["Name"] + + f_st["Unit"] + + f_st[str.encode("Image")] + + f_st[str.encode("Channel")] + ) #: Remove columns that are no longer required. f_st.drop( - ['Unit', str.encode('Channel'), str.encode('Image'), - str.encode('Collection')], axis=1, inplace=True) + [ + "Unit", + str.encode("Channel"), + str.encode("Image"), + str.encode("Collection"), + ], + axis=1, + inplace=True, + ) #: Replace special characters in feature names - f_st.replace({' ': '_', '°': '_deg_'}, inplace=True) - f_st.replace(r'\/', '_per_', regex=True, inplace=True) - f_st.replace(r'\^2', '_sqd_', regex=True, inplace=True) - f_st.replace(r'\^3', '_cubed_', regex=True, inplace=True) - f_st.replace('[^0-9a-zA-z]+', '_', regex=True, inplace=True) - f_st.replace('', np.nan, regex=True, inplace=True) + f_st.replace({" ": "_", "°": "_deg_"}, inplace=True) + f_st.replace(r"\/", "_per_", regex=True, inplace=True) + f_st.replace(r"\^2", "_sqd_", regex=True, inplace=True) + f_st.replace(r"\^3", "_cubed_", regex=True, inplace=True) + f_st.replace("[^0-9a-zA-z]+", "_", regex=True, inplace=True) + f_st.replace("", np.nan, regex=True, inplace=True) return f_st def merge_stat_value(self, df, channel_name): - """ - Merge/organize StatisticsValue, StatisticsType/Cat, and Factor. + """Merge/organize StatisticsValue, StatisticsType/Cat, and Factor. - This function combines data from the ``StatisticsValue`` - attribute of the .ims file with the dataframe containing merged - ``Factor``, ``StatisticsType``, and ``Category`` data. Then, it + This function combines data from the ``StatisticsValue`` + attribute of the .ims file with the dataframe containing merged + ``Factor``, ``StatisticsType``, and ``Category`` data. Then, it isolates and organizes overall, track, and object datatypes by - ID. Track IDs get shifted to a new `TrackID` column, and - object IDs remain in `ID_Object` column. Overall data remains - in `ID_Object` column but gets assigned an `ID_Object` value of - -1. Merge StatisticsValue with feature names, which are located - in the Factor/StatisticsType df; In this step, Track IDs and - Object IDs get separated using factor_statisticstype column - labeled 'CategoryName'. It returns a Pandas dataframe containing - ``StatisticsValue``, ``Factor``, ``Category``, and - ``StatisticsType`` data, with separation of track, object, and + ID. Track IDs get shifted to a new `TrackID` column, and + object IDs remain in `ID_Object` column. Overall data remains + in `ID_Object` column but gets assigned an `ID_Object` value of + -1. Merge StatisticsValue with feature names, which are located + in the Factor/StatisticsType df; In this step, Track IDs and + Object IDs get separated using factor_statisticstype column + labeled 'CategoryName'. It returns a Pandas dataframe containing + ``StatisticsValue``, ``Factor``, ``Category``, and + ``StatisticsType`` data, with separation of track, object, and overall data. Args: @@ -240,86 +250,89 @@ def merge_stat_value(self, df, channel_name): Returns: Dataframe with separation of track, object, and overall. """ - - #: Get StatisticsValue + #: Get StatisticsValue statistics_value = pd.DataFrame.from_records( - self.f['Scene8']['Content'][channel_name]['StatisticsValue'][()], - columns=['ID_Time', 'ID_Object', 'ID_StatisticsType', 'Value']) - - #: Merge StatisticsValue with remaining data, join on ID (index) + self.f["Scene8"]["Content"][channel_name]["StatisticsValue"][()], + columns=["ID_Time", "ID_Object", "ID_StatisticsType", "Value"], + ) + + #: Merge StatisticsValue with remaining data, join on ID (index) df = pd.merge( - statistics_value, df.set_index('ID'), - left_on='ID_StatisticsType', right_index=True) - + statistics_value, + df.set_index("ID"), + left_on="ID_StatisticsType", + right_index=True, + ) + #: Kiss&Run ext. stores in 'Overall' instead of 'Category' col - if str.encode('Overall') in df.columns: - - #: Set ID equal to -1 for 'Overall' rows - df.loc[df[str.encode( - 'Overall')] == str.encode('Overall'), 'ID_Object'] = -1 - + if str.encode("Overall") in df.columns: + #: Set ID equal to -1 for 'Overall' rows + df.loc[df[str.encode("Overall")] == str.encode("Overall"), "ID_Object"] = -1 + #: Make 'TrackID' column, even if track data absent df["TrackID"] = None - + #: Move IDs from 'ID_Object' to 'TrackID' col if CatName==Track - df.loc[df['CategoryName'] == str.encode( - 'Track'), 'TrackID'] = df['ID_Object'] + df.loc[df["CategoryName"] == str.encode("Track"), "TrackID"] = df["ID_Object"] #: If CatName==b'Track, remove TrackID data from ID_Object col - df.loc[df['CategoryName'] == str.encode('Track'), 'ID_Object'] = None + df.loc[df["CategoryName"] == str.encode("Track"), "ID_Object"] = None #: Set ID_Object to -1 where CategoryName == Overall. - df.loc[df['CategoryName'] == str.encode('Overall'), 'ID_Object'] = -1 + df.loc[df["CategoryName"] == str.encode("Overall"), "ID_Object"] = -1 return df - + def create_object_csv(self, df, chan): - """ - After moving track data IDs to the `TrackID` column, this - isolates non-track data by copying rows where `TrackID` is null - into a new dataframe and storing the result in an intermediate - file called ``objectdf_channel_name.csv``. The arguments are df, - which is a dataframe of data merged from ``StatisticsValue``, - ``StatisticsType``, and ``Factor`` attributes of Imaris file, - and chan, which represents each attribute located within the - keys of the ``Scene8/Content`` groups of the Imaris file. - For example, ``MegaSurfaces0`` or ``Points0`` + """After moving track data IDs to the `TrackID` column, this + isolates non-track data by copying rows where `TrackID` is null + into a new dataframe and storing the result in an intermediate + file called ``objectdf_channel_name.csv``. The arguments are df, + which is a dataframe of data merged from ``StatisticsValue``, + ``StatisticsType``, and ``Factor`` attributes of Imaris file, + and chan, which represents each attribute located within the + keys of the ``Scene8/Content`` groups of the Imaris file. + For example, ``MegaSurfaces0`` or ``Points0``. Args: df: DF of StatisticsValue, StatisticsType, Category, Factor chan (:obj:`str`): attribute name in ``Scene8/Content`` """ - #: Move rows where TrackID is empty to object_df - object_df=df.loc[df.TrackID.isnull()].copy() + object_df = df.loc[df.TrackID.isnull()].copy() object_df.drop( - columns=['ID_StatisticsType', 'ID_FactorList'], - axis=1, inplace=True) - + columns=["ID_StatisticsType", "ID_FactorList"], axis=1, inplace=True, + ) + #: Select 1st row where ID_Object (-1)/Name/Time/TrackID is same object_df.drop_duplicates( - subset=['Name', 'ID_Object', 'ID_Time', 'TrackID'], - keep='first', inplace=True) + subset=["Name", "ID_Object", "ID_Time", "TrackID"], + keep="first", + inplace=True, + ) object_df = object_df.pivot_table( - index=['ID_Object', 'ID_Time'], columns='Name', - values='Value', fill_value=None) + index=["ID_Object", "ID_Time"], + columns="Name", + values="Value", + fill_value=None, + ) object_df.reset_index(inplace=True) - + #: Convert to csv temp_filename = "objectdf_" + chan + ".csv" - temp_path = self.dir_name/temp_filename + temp_path = self.dir_name / temp_filename object_df.to_csv(temp_path) def create_track_csv(self, df, chan): """Stores Track data in trackdf_chan.csv. - + After moving track data IDs to the `TrackID` column, - this function corrects isolates track data by copying rows - where `TrackID` is not null into a new dataframe. Then, - because TrackID dataframe lacks the correct time (all values are - set to -1 seconds), the function drops -1. These times are - then replaced using data that has object ids linked to - correct time. The result is stored in an + this function corrects isolates track data by copying rows + where `TrackID` is not null into a new dataframe. Then, + because TrackID dataframe lacks the correct time (all values are + set to -1 seconds), the function drops -1. These times are + then replaced using data that has object ids linked to + correct time. The result is stored in an intermediate file called ``trackdf_chan.csv`` Args: @@ -327,96 +340,102 @@ def create_track_csv(self, df, chan): chan (:obj:`str`): attribute name in ``Scene8/Content`` """ #: Move rows where TrackID is non-empty to track_df dataframe - track_df=df.loc[df.TrackID.notnull()].copy() - + track_df = df.loc[df.TrackID.notnull()].copy() + #: Fill track_df with missing empty ID_Object values - track_df = track_df.drop('ID_Object',1) - + track_df = track_df.drop("ID_Object", 1) + #: Correct Track data time - track_df.drop('ID_Time', axis=1, inplace=True) + track_df.drop("ID_Time", axis=1, inplace=True) track_df.drop( - columns=['ID_StatisticsType', 'ID_FactorList'], - axis=1, inplace=True) + columns=["ID_StatisticsType", "ID_FactorList"], axis=1, inplace=True, + ) + + track_df = track_df.pivot_table(index="TrackID", columns="Name", values="Value") - track_df = track_df.pivot_table( - index='TrackID', columns='Name', values='Value') - #: Convert to csv temp_filename = "trackdf_" + chan + ".csv" - temp_path = self.dir_name/temp_filename + temp_path = self.dir_name / temp_filename track_df.to_csv(temp_path) def link_data_fun(self): - """Main function that makes Track and Overall intermediate csv - - This is the main function that calls all remaining functions - within this class. It extracts data from the Imaris file, - cleans and organizes data using Pandas dataframes, and outputs - two intermediate csv files for both track and non-track info + """Main function that makes Track and Overall intermediate csv. + + This is the main function that calls all remaining functions + within this class. It extracts data from the Imaris file, + cleans and organizes data using Pandas dataframes, and outputs + two intermediate csv files for both track and non-track info that gets read in by the next module, ``link_ims_ids.py``. Note - that channel_names can start with either 'Points' - (signifies spot channel types) or 'MegaSurfaces' (signifies + that channel_names can start with either 'Points' + (signifies spot channel types) or 'MegaSurfaces' (signifies surface types). """ - #: Store attribute names (contain channel info) in channel_names self.logger.debug("Counting channels in Scene8/Content...") self.logger.info( - 'Processing file {} stage 1/3...'.format(str(self.ims_filename))) - channel_names_all = list(self.f['Scene8']['Content'].keys()) + f"Processing file {self.ims_filename!s} stage 1/3...", + ) + channel_names_all = list(self.f["Scene8"]["Content"].keys()) channel_names = [] #: Ignore non-spot, non-surface channels. for channel in channel_names_all: - if channel.startswith('Points') or channel.startswith( - 'MegaSurfaces'): + if channel.startswith("Points") or channel.startswith("MegaSurfaces"): channel_names.append(channel) - for i in range(0,len(channel_names)): + for i in range(0, len(channel_names)): #: Loop through each attribute in Scene8/Content/ self.logger.debug( - '\n\nITERATION {}/{} OF FILE {}'.format( - i+1, len(channel_names), self.ims_filename)) - + "\n\nITERATION {}/{} OF FILE {}".format( + i + 1, len(channel_names), self.ims_filename, + ), + ) + current_channel = channel_names[i] reading_chan = "Reading " + current_channel + "..." self.logger.debug(reading_chan) #: Check if attributes StatisticsType/StatisticsValue exist contains_stat_type = ( - self.f['Scene8']['Content'][current_channel]).__contains__( - 'StatisticsType') + self.f["Scene8"]["Content"][current_channel] + ).__contains__("StatisticsType") contains_stat_val = ( - self.f['Scene8']['Content'][current_channel]).__contains__( - 'StatisticsValue') - - if contains_stat_type == True and contains_stat_val == True: + self.f["Scene8"]["Content"][current_channel] + ).__contains__("StatisticsValue") + if contains_stat_type is True and contains_stat_val is True: #: Merge Rearranged Factors and StatisticsType - statisticstype_df = self.get_statisticstype( - self.f, current_channel) + statisticstype_df = self.get_statisticstype(self.f, current_channel) factor_df = self.get_factor(self.f, current_channel) factor_statisticstype = pd.merge( - factor_df, statisticstype_df, - left_on='ID_List', right_on='ID_FactorList', how='outer') + factor_df, + statisticstype_df, + left_on="ID_List", + right_on="ID_FactorList", + how="outer", + ) factor_statisticstype = self.convert_byte_to_string_and_format( - factor_statisticstype) + factor_statisticstype, + ) #: Separate Track IDs/Object IDs using CategoryName col - statisticsvalue_statisticstype_factor=self.merge_stat_value( - factor_statisticstype, current_channel) - + statisticsvalue_statisticstype_factor = self.merge_stat_value( + factor_statisticstype, current_channel, + ) + #: Create csvs for Track data and Non-Track ("object") self.create_object_csv( - statisticsvalue_statisticstype_factor, current_channel) + statisticsvalue_statisticstype_factor, current_channel, + ) self.create_track_csv( - statisticsvalue_statisticstype_factor, current_channel) - + statisticsvalue_statisticstype_factor, current_channel, + ) + #: Skip attrs w/o StatisticsType/StatisticsValue (no data) else: - pass \ No newline at end of file + pass diff --git a/formats/polus-imaris-parser-plugin/src/link_ims_ids.py b/formats/polus-imaris-parser-plugin/src/link_ims_ids.py index 113e735c6..5b0c86b39 100644 --- a/formats/polus-imaris-parser-plugin/src/link_ims_ids.py +++ b/formats/polus-imaris-parser-plugin/src/link_ims_ids.py @@ -1,26 +1,25 @@ -import csv -import h5py import logging -import numpy as np -import os + +import h5py import pandas as pd + def link_trackid_objectid(ims_filename, dir_name): """Link TrackIDs with correct Object IDs. - This function extracts and links track and object IDs from the .ims - file using metadata within the ``Track0`` & ``TrackObject0`` attrs. - located in 'Scene8' (note that there are 6 outer groups in the file: - 'DataSet', 'DataSetInfo', 'DataSetTimes', 'Scene', 'Scene8', + This function extracts and links track and object IDs from the .ims + file using metadata within the ``Track0`` & ``TrackObject0`` attrs. + located in 'Scene8' (note that there are 6 outer groups in the file: + 'DataSet', 'DataSetInfo', 'DataSetTimes', 'Scene', 'Scene8', 'Thumbnail'). It does this by linking TrackID to ID_Object - that if the TrackObject0 columns IndexBegin:IndexEnd are 0:3, then - TrackID gets inserted into trackobject0_df at indexes 0 through 2 - (IndexEnd - 1). The starting index and ending index are determined + that if the TrackObject0 columns IndexBegin:IndexEnd are 0:3, then + TrackID gets inserted into trackobject0_df at indexes 0 through 2 + (IndexEnd - 1). The starting index and ending index are determined using the last two columns of the track0_df. - It returns a temporary csv for each channel, named after dir_name - and channel_name. The file is a table linking `TrackID` data with - corresponding `ID_Object` data for each channel. A file is created - for each channel, even if there is no data for TrackID and + It returns a temporary csv for each channel, named after dir_name + and channel_name. The file is a table linking `TrackID` data with + corresponding `ID_Object` data for each channel. A file is created + for each channel, even if there is no data for TrackID and ObjectIDs, for consistency. Args: @@ -30,68 +29,69 @@ def link_trackid_objectid(ims_filename, dir_name): Returns: 1 CSV per channel linking `TrackID` to Object ID (`ID_Object`) """ - #: Open file - f = h5py.File(ims_filename, 'r') + f = h5py.File(ims_filename, "r") # Examine the 'Scene8' group as a Dataset object. - channel_names = list(f['Scene8']['Content'].keys()) + channel_names = list(f["Scene8"]["Content"].keys()) id_dict = {} for ims_channel in channel_names: #: Create Track0 df - contains_trackobject0 = ( - f['Scene8']['Content'][ims_channel]).__contains__('TrackObject0') - - contains_track0 = ( - f['Scene8']['Content'][ims_channel]).__contains__('Track0') - - if contains_trackobject0 == True and contains_track0 == True: - + contains_trackobject0 = (f["Scene8"]["Content"][ims_channel]).__contains__( + "TrackObject0", + ) + + contains_track0 = (f["Scene8"]["Content"][ims_channel]).__contains__("Track0") + + if contains_trackobject0 is True and contains_track0 is True: #: Get Track0 attribute from .ims file - track0_df = pd.DataFrame( - f['Scene8']['Content'][ims_channel]['Track0'][()]) - + track0_df = pd.DataFrame(f["Scene8"]["Content"][ims_channel]["Track0"][()]) + #: Drop unneeded columns track0_df = track0_df.drop( - columns=['IndexTrackEdgeBegin', 'IndexTrackEdgeEnd']) + columns=["IndexTrackEdgeBegin", "IndexTrackEdgeEnd"], + ) #: Get TrackObject0 attribute from .ims file trackobject0 = pd.DataFrame( - f['Scene8']['Content'][ims_channel]['TrackObject0'][()]) + f["Scene8"]["Content"][ims_channel]["TrackObject0"][()], + ) #: Add ID column from Track0 using indexes of TrackObject0 - track_object_ids = (pd.merge_asof( - trackobject0.reset_index(), track0_df, left_on='index', - right_on='IndexTrackObjectBegin').reindex( - ['ID_Object', 'ID'], axis=1)) - + track_object_ids = pd.merge_asof( + trackobject0.reset_index(), + track0_df, + left_on="index", + right_on="IndexTrackObjectBegin", + ).reindex(["ID_Object", "ID"], axis=1) + #: Col containing TrackID is labeled "ID"; change to TrackID - track_object_ids.rename(columns = {'ID':'TrackID'}, inplace = True) + track_object_ids.rename(columns={"ID": "TrackID"}, inplace=True) #: Dict key=channel name, value=df linking ObjectID/TrackID id_dict[ims_channel] = track_object_ids - + else: #: Create empty output file for each channel if no data - empty_track_object_ids_df = pd.DataFrame( - columns = ['ID_Object', 'TrackID']) + empty_track_object_ids_df = pd.DataFrame(columns=["ID_Object", "TrackID"]) id_dict[ims_channel] = empty_track_object_ids_df #: Set up the logger logging.basicConfig( - format='%(asctime)s-%(name)s-%(levelname)s-%(message)s', - datefmt='%b-%d-%y %H:%M:%S') + format="%(asctime)s-%(name)s-%(levelname)s-%(message)s", + datefmt="%b-%d-%y %H:%M:%S", + ) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger.debug("Creating intermediate csv for link_ims_ids module...") logger.info("Linking IDs (stage 2/3)...") - + #: Create ID_Object, Track ID .csv file using data above for channel_name in id_dict: temp_filename = channel_name + ".csv" - temp_path = dir_name/temp_filename + temp_path = dir_name / temp_filename id_dict[channel_name].to_csv(temp_path, index=False) - - logger.debug("Done with level 2") \ No newline at end of file + + logger.debug("Done with level 2") diff --git a/formats/polus-imaris-parser-plugin/src/main.py b/formats/polus-imaris-parser-plugin/src/main.py index 075c25990..976ccccb7 100644 --- a/formats/polus-imaris-parser-plugin/src/main.py +++ b/formats/polus-imaris-parser-plugin/src/main.py @@ -1,84 +1,93 @@ import argparse +import logging import pathlib + import extract_ims_data import link_ims_ids import merge_ids_to_features -import logging -import time -from pathlib import Path -import os - -if __name__=="__main__": +if __name__ == "__main__": #: Initialize the logger logging.basicConfig( - format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) #: Setup the argument parsing logger.info("Parsing arguments...") - + parser = argparse.ArgumentParser( - prog='main', - description='Parses .ims file metadata; organizes features in .csv.') - - parser.add_argument('--inpdir', - dest='inpdir', type=str, help='Input collection of ims files', - required=True) + prog="main", + description="Parses .ims file metadata; organizes features in .csv.", + ) + + parser.add_argument( + "--inpdir", + dest="inpdir", + type=str, + help="Input collection of ims files", + required=True, + ) parser.add_argument( - '--metaoutdir', dest='metaoutdir', type=str, - help='Output metadata collection that will hold overall .xlsx file', - required=True) + "--metaoutdir", + dest="metaoutdir", + type=str, + help="Output metadata collection that will hold overall .xlsx file", + required=True, + ) parser.add_argument( - '--outdir', dest='outdir', type=str, help='Output csv collection', - required=True) + "--outdir", dest="outdir", type=str, help="Output csv collection", required=True, + ) #: Parse the arguments args = parser.parse_args() #: Check for subfolders named images and switch to that subfolder inpdir = args.inpdir - logger.debug("Old input directory: {}".format(inpdir)) + logger.debug(f"Old input directory: {inpdir}") inpdir = pathlib.Path(inpdir) - parent_meta_path = inpdir / 'metadata_files' - + parent_meta_path = inpdir / "metadata_files" + try: # If given input directory points to images folder - if inpdir.name == 'images': + if inpdir.name == "images": logger.info("Searching parent subdirectories for metadata_files") # Check if a subdirectory of the parent contains /metadata_files p = inpdir.parent - q = p / 'metadata_files' - if q.exists() == True: + q = p / "metadata_files" + if q.exists() is True: # Then switch inpdir to that metadata_files directory logger.info("Switching to subdirectory metadata_files") inpdir = inpdir.with_name("metadata_files") - + # If given input directory points to parent of /metadata_files - elif parent_meta_path.exists() == True: + elif parent_meta_path.exists() is True: logger.info("Switching to subdirectory metadata_files") # Switch inpdir to metadata_files directory - inpdir = inpdir / 'metadata_files' - + inpdir = inpdir / "metadata_files" + else: - logger.error("Directory not found. Please check that the input \ - directory is an image collection with at least one .ims file") - raise FileNotFoundError("metadata_files directory not found.") - - logger.info('Navigated to metadata_files directory...') - logger.debug("New input directory (inpdir): {}".format(inpdir)) + logger.error( + "Directory not found. Please check that the input \ + directory is an image collection with at least one .ims file", + ) + msg = "metadata_files directory not found." + raise FileNotFoundError(msg) + + logger.info("Navigated to metadata_files directory...") + logger.debug(f"New input directory (inpdir): {inpdir}") #: outdir is the csv collection outdir = args.outdir - logger.debug('outdir = {}'.format(outdir)) + logger.debug(f"outdir = {outdir}") #: Overall.xlsx is stored in metadata collection metaoutdir = args.metaoutdir - logger.debug('metaoutdir = {}'.format(metaoutdir)) + logger.debug(f"metaoutdir = {metaoutdir}") logger.debug("Defining paths...") #: Define the path currentDirectory = inpdir @@ -86,14 +95,13 @@ metadataDirectory = pathlib.Path(metaoutdir) ims_exists = False for currentFile in currentDirectory.iterdir(): - - if currentFile.suffix == '.ims': + if currentFile.suffix == ".ims": ims_exists = True - logger.info("Parsing {}".format(currentFile)) - + logger.info(f"Parsing {currentFile}") + outputDirName = outputDirectory metadataDirectory = metadataDirectory - + #: ``extract_ims_data`` extracts metadata from **.ims** file hdf_to_csv = extract_ims_data.LinkData(currentFile, outputDirName) hdf_to_csv.link_data_fun() @@ -101,13 +109,14 @@ #: ``link_ims_ids`` extract/links track ID to object ID link_ims_ids.link_trackid_objectid(currentFile, outputDirName) - #: ``merge_ids_to_features`` combines linked IDs to features + #: ``merge_ids_to_features`` combines linked IDs to features create_final_output = merge_ids_to_features.CreateCsv( - currentFile, outputDirName, metadataDirectory) - + currentFile, outputDirName, metadataDirectory, + ) + create_final_output.create_csv_fun() - if ims_exists == False: - logger.error('Metadata directory of image collection lacks .ims files') + if ims_exists is False: + logger.error("Metadata directory of image collection lacks .ims files") except FileNotFoundError as error: - logger.error(error) \ No newline at end of file + logger.error(error) diff --git a/formats/polus-imaris-parser-plugin/src/merge_ids_to_features.py b/formats/polus-imaris-parser-plugin/src/merge_ids_to_features.py index f2bc27626..36b99e1db 100644 --- a/formats/polus-imaris-parser-plugin/src/merge_ids_to_features.py +++ b/formats/polus-imaris-parser-plugin/src/merge_ids_to_features.py @@ -1,22 +1,21 @@ -import csv -import h5py import logging import logging.config -import numpy as np -import os +import re from os import path + +import h5py +import numpy as np import pandas as pd -import pathlib -import re -import xlsxwriter + class CreateCsv: """Class combines csv linking IDs to csv containing feature info. - Merge linked track and object IDs to corresponding feature data to + Merge linked track and object IDs to corresponding feature data to produce a csv output file that can be visualized in FlowJo. """ - def __init__(self, ims_filename, dir_name, meta_dir_name, logger=None): + + def __init__(self, ims_filename, dir_name, meta_dir_name, logger=None) -> None: """Open .ims file for reading; h5py.File acts like a dictionary. Args: @@ -24,21 +23,21 @@ def __init__(self, ims_filename, dir_name, meta_dir_name, logger=None): dir_name (:obj:`str`): Output csv collection meta_dir_name (:obj:`str`): Output metadata directory """ - #: Set up the logger logging.basicConfig( - format='%(asctime)s-%(name)s-%(levelname)s-%(message)s', - datefmt='%b-%d-%y %H:%M:%S') + format="%(asctime)s-%(name)s-%(levelname)s-%(message)s", + datefmt="%b-%d-%y %H:%M:%S", + ) self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) self.ims_filename = ims_filename self.dir_name = dir_name self.meta_dir_name = meta_dir_name - self.f = h5py.File(self.ims_filename, 'r') + self.f = h5py.File(self.ims_filename, "r") def round_to_six(self, num): - """Round values to six significant figures + """Round values to six significant figures. Args: num (:obj:`int`): Num to be rounded to 6 significant figures @@ -46,30 +45,28 @@ def round_to_six(self, num): Returns: A number rounded to six significant figures """ - if num != 0: - if np.isnan(num) != True: + if np.isnan(num) is not True: num = np.round(num, -int(np.floor(np.log10(abs(num)))) + 5) elif num == 0: pass return num def get_df_from_csv(self, dirname, chan, chan_name, csv_substring): - - """Read intermediate csv files containing feature data - (``extract_ims_data.py`` output) or ID data (``link_ims_ids.py`` - output) for each channel, and store in dataframes. ``chan`` + """Read intermediate csv files containing feature data + (``extract_ims_data.py`` output) or ID data (``link_ims_ids.py`` + output) for each channel, and store in dataframes. ``chan`` represents attribute names within the ``Scene8/Content`` keys of the .ims files with names ``MegaSurfaces0`` or ``Points0``. Each attribute contains data belonging to particular channels. - The argument chan_name differs from chan because while chan - might have a more general name such as ``Points0``, chan_name is - extracted from the Imaris file's metadata, converted from byte to - string, and stored as **chan_name**. csv_substring is the + The argument chan_name differs from chan because while chan + might have a more general name such as ``Points0``, chan_name is + extracted from the Imaris file's metadata, converted from byte to + string, and stored as **chan_name**. csv_substring is the substring of the csv file that gets read in; can be `trackdf_`/ - `objectdf_` for outputs of the first module - (``extract_ims_data.py``), or empty string for outputs of the - second module, ``link_ims_ids.py`` (links `ID_Object` and + `objectdf_` for outputs of the first module + (``extract_ims_data.py``), or empty string for outputs of the + second module, ``link_ims_ids.py`` (links `ID_Object` and `TrackID`). Args: @@ -81,81 +78,85 @@ def get_df_from_csv(self, dirname, chan, chan_name, csv_substring): Returns: Pandas dataframe created from intermediate csv files. """ - - keepsame = {'ID_Time'} + keepsame = {"ID_Time"} #: Read csv temp_string = csv_substring + chan + ".csv" - temp_path = dirname/temp_string + temp_path = dirname / temp_string df = pd.read_csv(temp_path) #: Remove "Unnamed" columns: - df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + df = df.loc[:, ~df.columns.str.contains("^Unnamed")] #: suffix chan name to all headers except 'ID_Object, 'ID_Time' if len(df.columns) > 0: - #: Suffix col names except some columns (keep same=ID_Time) - df.columns = ['{}{}'.format( - c, '' if c in keepsame else chan_name) for c in df.columns] - df.columns = df.columns.str.replace('__', '_') + df.columns = [ + "{}{}".format(c, "" if c in keepsame else chan_name) for c in df.columns + ] + df.columns = df.columns.str.replace("__", "_") #: Remove intermediate csv file self.logger.debug("Remove line file_to_remove.unlink() to debug.") self.logger.debug( - "CSV files can indicate where in the process an issue begins.") + "CSV files can indicate where in the process an issue begins.", + ) file_to_remove = temp_path file_to_remove.unlink() return df - + def get_overall(self, overall_df, chan_name): - """Extract overall data from object data + """Extract overall data from object data. - This reads the output from the ``get_df_from_csv()`` function + This reads the output from the ``get_df_from_csv()`` function and extracts overall data from the df containing non-track data. - Note that in the second module, ``extract_ims_data.py``, + Note that in the second module, ``extract_ims_data.py``, data tagged `Overall` was assigned an `ID_Object` of -1 Args: - overall_df: Df obtained `ID_Object` of object df < 0. + overall_df: Df obtained `ID_Object` of object df < 0. chan_name (:obj:`str`): Channel name entered in Imaris Returns: DF containing overall data; formatted like Imaris version """ - - overall_df.dropna(axis=1, how='all', inplace=True) + overall_df.dropna(axis=1, how="all", inplace=True) #: All ID_Objects == -1 belong to Overall. Replace with np.NaN - overall_df['ID_Object' + chan_name] = \ - overall_df['ID_Object'+chan_name].replace( - -1.0, np.NaN, inplace=True) - + overall_df["ID_Object" + chan_name] = overall_df[ + "ID_Object" + chan_name + ].replace(-1.0, np.NaN, inplace=True) + #: Replace time = -1.0 with np.NaN - overall_df['ID_Time'].replace(-1.0, np.NaN, inplace=True) + overall_df["ID_Time"].replace(-1.0, np.NaN, inplace=True) overall_df.reset_index() - + #: Rearrange df to match exact format exported by Imaris file overall_df = pd.melt( - overall_df, id_vars=['ID_Time', 'ID_Object' + chan_name], - var_name='Variable', value_name='Value') + overall_df, + id_vars=["ID_Time", "ID_Object" + chan_name], + var_name="Variable", + value_name="Value", + ) overall_df = overall_df[ - ['Variable','Value','ID_Time','ID_Object'+chan_name]] + ["Variable", "Value", "ID_Time", "ID_Object" + chan_name] + ] overall_df.rename( - {'ID_Time': 'Time', 'ID_Object' + chan_name: 'ID'}, - axis='columns', inplace=True) - overall_df.dropna(subset=['Value'], inplace=True) - overall_df['Variable'] = overall_df['Variable'].str.replace('_', ' ') - overall_df=overall_df.dropna(axis=1,how='all') - return overall_df - - def create_overall_xlsx(self,imaris_filename,meta_dirname,all_overall_dict): + {"ID_Time": "Time", "ID_Object" + chan_name: "ID"}, + axis="columns", + inplace=True, + ) + overall_df.dropna(subset=["Value"], inplace=True) + overall_df["Variable"] = overall_df["Variable"].str.replace("_", " ") + return overall_df.dropna(axis=1, how="all") + + def create_overall_xlsx(self, imaris_filename, meta_dirname, all_overall_dict): """Create overall xlsx. Each sheet represents one channel. - This function merges all Overall DFs together and write each - channel to an xlsx notebook that uses sheets to represent + This function merges all Overall DFs together and write each + channel to an xlsx notebook that uses sheets to represent individual channels @@ -164,8 +165,6 @@ def create_overall_xlsx(self,imaris_filename,meta_dirname,all_overall_dict): meta_dirname (:obj:`str`): Output metadata directory all_overall_dict: Dict key=Imaris channel, value=overall df """ - - #: Get basename from imaris filename, to prepend to Overall.xlsx imaris_basename = imaris_filename.stem @@ -173,55 +172,60 @@ def create_overall_xlsx(self,imaris_filename,meta_dirname,all_overall_dict): imaris_basename = imaris_basename[:-4] #: Create a Pandas Excel writer using XlsxWriter as the engine - temp_string = imaris_basename + "_" + 'Overall.xlsx' - temp_path = meta_dirname/temp_string - writer = pd.ExcelWriter(temp_path, engine='xlsxwriter') + temp_string = imaris_basename + "_" + "Overall.xlsx" + temp_path = meta_dirname / temp_string + writer = pd.ExcelWriter(temp_path, engine="xlsxwriter") count = 1 for chan_name, overall_df_list in all_overall_dict.items(): for i in range(0, len(overall_df_list)): str_i = "_" if i >= 1: str_i = "_" + str(i) + "_" - str_channel_name = re.sub('[^A-Za-z0-9]+', '_', chan_name) + str_channel_name = re.sub("[^A-Za-z0-9]+", "_", chan_name) #: Convert the dataframe to an XlsxWriter Excel object str_channel_name_shortened = "" if len(str_channel_name) > 25: str_channel_name_shortened = str_channel_name[:25] - + else: str_channel_name_shortened = str_channel_name - + #: Round Overall "Values" column to 6 significant digits self.logger.debug("Converting data to 6 significant figures...") - overall_df_list[i]['Value'] = overall_df_list[i]\ - ['Value'].apply(self.round_to_six) - + overall_df_list[i]["Value"] = overall_df_list[i]["Value"].apply( + self.round_to_six, + ) + overall_df_list[i].to_excel( - writer, - sheet_name=str_channel_name_shortened + str_i + str(count), - index=False, startrow=2, startcol=0) - + writer, + sheet_name=str_channel_name_shortened + str_i + str(count), + index=False, + startrow=2, + startcol=0, + ) + #: Get the xlsxwriter workbook and worksheet objects worksheet = writer.sheets[ - str_channel_name_shortened + str_i + str(count)] - + str_channel_name_shortened + str_i + str(count) + ] + #: Add original, unmodified channel name to first row worksheet.write(0, 0, chan_name) - + #: Set the column width and format. - worksheet.set_column(0, 0, 50) #: 1st, last col, width - + worksheet.set_column(0, 0, 50) #: 1st, last col, width + #: Close the Pandas Excel writer and output the Excel file. count = count + 1 writer.save() def create_final_output(self, imaris_filename, non_overall_dfs, dirname): - """Stores non-overall data in dataframes + """Stores non-overall data in dataframes. - Store remaining non-overall data with `TrackID` (if applicable), - `ID_Object` (if applicable), and feature data in a Pandas + Store remaining non-overall data with `TrackID` (if applicable), + `ID_Object` (if applicable), and feature data in a Pandas dataframe. Args: @@ -234,9 +238,8 @@ def create_final_output(self, imaris_filename, non_overall_dfs, dirname): #: Remove .ims extension imaris_basename = imaris_basename[:-4] for chan_name, non_ov in non_overall_dfs.items(): - - #: Replace special characters from channel name (key) with _ - chan_mod = re.sub('[^0-9a-zA-Z]+', '_', chan_name) + #: Replace special characters from channel name (key) with _ + chan_mod = re.sub("[^0-9a-zA-Z]+", "_", chan_name) for i in range(0, len(non_ov)): str_i = "" @@ -249,10 +252,9 @@ def create_final_output(self, imaris_filename, non_overall_dfs, dirname): #: Remove _ from the front of file (due to some plugins) for col in non_ov[i].columns: - if col[:1] == "_": col_mod = col[1:] - non_ov[i].rename(columns={col:col_mod}, inplace=True) + non_ov[i].rename(columns={col: col_mod}, inplace=True) #: Sort header names alphabetically header_names = non_ov[i].columns @@ -260,224 +262,231 @@ def create_final_output(self, imaris_filename, non_overall_dfs, dirname): non_ov[i] = non_ov[i][header_names] for c in non_ov[i].columns: - #: Round all but ID, TrackID, Time to 6 sigfigs - if c != "TrackID_"+chan_mod and c != "ID_Object_"+chan_mod: - if c!="ID_Time" and "TrackID" not in c: - non_ov[i][c]=non_ov[i][c].apply(self.round_to_six) + if c != "TrackID_" + chan_mod and c != "ID_Object_" + chan_mod: + if c != "ID_Time" and "TrackID" not in c: + non_ov[i][c] = non_ov[i][c].apply(self.round_to_six) non_ov[i].columns = non_ov[i].columns.str.replace("___", "_") non_ov[i].columns = non_ov[i].columns.str.replace("__", "_") - non_ov[i].columns = non_ov[i].columns.str.replace( - "ID_Time", "Time") - non_ov[i].columns = non_ov[i].columns.str.replace( - "ID_Object", "ID") + non_ov[i].columns = non_ov[i].columns.str.replace("ID_Time", "Time") + non_ov[i].columns = non_ov[i].columns.str.replace("ID_Object", "ID") #: Display np.NaN values as as 'NaN' so FlowJo can view temp_string = imaris_basename + "_" + chan_name + str_i + ".csv" - temp_path = dirname/temp_string - non_ov[i].to_csv(temp_path, index=False, na_rep='NaN') + temp_path = dirname / temp_string + non_ov[i].to_csv(temp_path, index=False, na_rep="NaN") def create_csv_fun(self): """Main function; combines intermediate files to produce output. - - This function combines all intermediate files - (``extract_ims_data.py`` and ``link_ims_ids.py`` outputs) - to produce csv files that link IDs to features for each channel - and an xlsx file containing overall summary statistics. - It takes in as inputs the csv files created from - ``link_ims_ids.py`` and ``extract_ims_data.py``. It outputs an - ``Overall.xlsx`` file containing summary data for each channel. - The remaining feature data is exported within individual csv - files for each channel. For example: ``Red.csv``, ``Green.csv``, + + This function combines all intermediate files + (``extract_ims_data.py`` and ``link_ims_ids.py`` outputs) + to produce csv files that link IDs to features for each channel + and an xlsx file containing overall summary statistics. + It takes in as inputs the csv files created from + ``link_ims_ids.py`` and ``extract_ims_data.py``. It outputs an + ``Overall.xlsx`` file containing summary data for each channel. + The remaining feature data is exported within individual csv + files for each channel. For example: ``Red.csv``, ``Green.csv``, and ``ColocSurfaces.csv`` """ - #: Open the file for reading; h5py.File acts like a dictionary - self.logger.debug( - "Opening .ims file {}...".format(str(self.ims_filename))) - self.f = h5py.File(self.ims_filename, 'r') + self.logger.debug(f"Opening .ims file {self.ims_filename!s}...") + self.f = h5py.File(self.ims_filename, "r") #: Determine # of groups (channel_names) in 'Scene8/Content' logging.debug("Counting channel names in Scene8/Content...") - channel_names = list(self.f['Scene8']['Content'].keys()) + channel_names = list(self.f["Scene8"]["Content"].keys()) # Ignore irrelevant channel types channel_names = [ - chan for chan in channel_names if chan.startswith( - "Points") or chan.startswith("MegaSurfaces")] - + chan + for chan in channel_names + if chan.startswith("Points") or chan.startswith("MegaSurfaces") + ] + #: Combine objectdf, trackdf, track_id_object_df csv into 1 df all_overall_dfs = {} non_overall_dfs = {} - for i in range(0,len(channel_names)): - + for i in range(0, len(channel_names)): #: Loop through each attribute in Scene8/Content/ self.logger.debug( "\n\nITERATION {}/{} OF FILE {}".format( - i+1, len(channel_names), self.ims_filename)) + i + 1, len(channel_names), self.ims_filename, + ), + ) current_channel = channel_names[i] - self.logger.debug("Reading {}...".format(current_channel)) + self.logger.debug(f"Reading {current_channel}...") #: Read 'Name' attribute of each channel to get channel name - chan_name=self.f['Scene8']['Content'][current_channel].attrs['Name'] - chan_name = chan_name.tostring(order='C') - + chan_name = self.f["Scene8"]["Content"][current_channel].attrs["Name"] + chan_name = chan_name.tostring(order="C") + #: Convert channel name from class byte to string chan_name = str(chan_name, "utf-8") excel_channel = chan_name - - #: Remove special characters from channel name using regex - regex = re.compile('[^a-zA-Z0-9]+') + + #: Remove special characters from channel name using regex + regex = re.compile("[^a-zA-Z0-9]+") #: Replaces special characters with _ - chan_name = regex.sub('_', chan_name) + chan_name = regex.sub("_", chan_name) chan_name = "_" + chan_name - + #: Skip empty channels if chan_name == "__": pass - + #: Read the required input files else: temp_string1 = "trackdf_" + current_channel + ".csv" path1 = self.dir_name / temp_string1 temp_string2 = "objectdf_" + current_channel + ".csv" path2 = self.dir_name / temp_string2 - if path.exists(path1)==True and path.exists(path2)==True: - + if path.exists(path1) is True and path.exists(path2) is True: #: Load Track Data track_df = self.get_df_from_csv( - self.dir_name, current_channel, chan_name, "trackdf_") - + self.dir_name, current_channel, chan_name, "trackdf_", + ) + #: Load Object Data object_df = self.get_df_from_csv( - self.dir_name, current_channel, chan_name, "objectdf_") - + self.dir_name, current_channel, chan_name, "objectdf_", + ) + #: Load Track ID: Object ID data track_id_object_df = self.get_df_from_csv( - self.dir_name, current_channel, chan_name, "") - + self.dir_name, current_channel, chan_name, "", + ) + has_track = True has_object = True has_track_id_object = True - #: Determine if track_df or object_df is empty. - if track_df.empty == True: + #: Determine if track_df or object_df is empty. + if track_df.empty is True: #: If so, set has_object or has_track to False. has_track = False - - if object_df.empty == True: + + if object_df.empty is True: has_object = False - if track_id_object_df.empty == True: + if track_id_object_df.empty is True: track_id_object_df = pd.DataFrame( - {'TrackID' + chan_name:np.NaN, 'ID_Object' + \ - chan_name:np.NaN}, index=[0]) - has_track_id_object == True + { + "TrackID" + chan_name: np.NaN, + "ID_Object" + chan_name: np.NaN, + }, + index=[0], + ) + has_track_id_object is True #: Isolate "Overall" data - if (has_track_id_object == True and has_object == True) or \ - (has_track_id_object == True and has_object == False): - + if (has_track_id_object is True and has_object is True) or ( + has_track_id_object is True and has_object is False + ): #: Add 1 to all time chans (sets t=0 to t=1) - object_df['ID_Time'] = object_df['ID_Time'] + 1 - + object_df["ID_Time"] = object_df["ID_Time"] + 1 + #: Where Object ID < 0, save as "Overall" - overall_df = object_df.loc[object_df[ - 'ID_Object' + chan_name] < 0].copy() - + overall_df = object_df.loc[ + object_df["ID_Object" + chan_name] < 0 + ].copy() + #: Where Object ID > -1, save as "Object" object_df = object_df.loc[ - object_df['ID_Object' + chan_name] >= 0] - + object_df["ID_Object" + chan_name] >= 0 + ] + #: Flag empty dfs after moving object to overall - if object_df.empty == True: + if object_df.empty is True: has_object = False - + overall_df = self.get_overall(overall_df, chan_name) #: Make dict key=.ims channel, val=overall df if excel_channel in all_overall_dfs: all_overall_dfs[excel_channel].append(overall_df) - + else: all_overall_dfs[excel_channel] = [] all_overall_dfs[excel_channel].append(overall_df) - + #: Merge dict of IDs and tracks/objects together - if has_object == True: - + if has_object is True: #: Wherever Object ID >= 1, save as object data - object_df=object_df[object_df['ID_Object'+chan_name]>=0] - object_df.dropna(axis=1, how='all', inplace=True) - + object_df = object_df[object_df["ID_Object" + chan_name] >= 0] + object_df.dropna(axis=1, how="all", inplace=True) + #: Combine ID dictionary, Track, and/or Object data - if has_object == True and has_track == False: + if has_object is True and has_track is False: track_id_object_df = pd.merge( - track_id_object_df, object_df, - how='outer', on='ID_Object' + chan_name) + track_id_object_df, + object_df, + how="outer", + on="ID_Object" + chan_name, + ) + + track_id_object_df.dropna(axis=0, how="all", inplace=True) - track_id_object_df.dropna( - axis=0, how='all', inplace=True) + track_id_object_df.dropna(axis=1, how="all", inplace=True) - track_id_object_df.dropna( - axis=1, how='all', inplace=True) - #: Resolve overwrite for files sharing chan name if excel_channel in non_overall_dfs: - non_overall_dfs[excel_channel].append( - track_id_object_df) - + non_overall_dfs[excel_channel].append(track_id_object_df) + else: non_overall_dfs[excel_channel] = [] - non_overall_dfs[excel_channel].append( - track_id_object_df) + non_overall_dfs[excel_channel].append(track_id_object_df) - elif has_object == False and has_track == True: + elif has_object is False and has_track is True: track_id_object_df = pd.merge( - track_id_object_df, track_df, how='outer', - on='TrackID' + chan_name) - + track_id_object_df, + track_df, + how="outer", + on="TrackID" + chan_name, + ) + if excel_channel in non_overall_dfs: - non_overall_dfs[excel_channel].append( - track_id_object_df) - + non_overall_dfs[excel_channel].append(track_id_object_df) + else: non_overall_dfs[excel_channel] = [] - non_overall_dfs[excel_channel].append( - track_id_object_df) + non_overall_dfs[excel_channel].append(track_id_object_df) #: Fix issue overwrite for files sharing chan name - elif has_object == True and has_track == True: - + elif has_object is True and has_track is True: #: First merge ID dictionary to objects merged_object = pd.merge( - object_df, track_id_object_df, how='outer', - on='ID_Object' + chan_name) - + object_df, + track_id_object_df, + how="outer", + on="ID_Object" + chan_name, + ) + #: Second merge above df to tracks features_merged = pd.merge( - merged_object, track_df, how='outer', - on='TrackID' + chan_name) - + merged_object, + track_df, + how="outer", + on="TrackID" + chan_name, + ) + if excel_channel in non_overall_dfs: - non_overall_dfs[excel_channel].append( - features_merged) - + non_overall_dfs[excel_channel].append(features_merged) + else: non_overall_dfs[excel_channel] = [] - non_overall_dfs[excel_channel].append( - features_merged) - + non_overall_dfs[excel_channel].append(features_merged) + if all_overall_dfs: - #: Export overall data as xlsx file self.create_overall_xlsx( - self.ims_filename, self.meta_dir_name, all_overall_dfs) + self.ims_filename, self.meta_dir_name, all_overall_dfs, + ) #: Create final output self.logger.info("Creating final output (stage 3/3)...") - self.create_final_output( - self.ims_filename, non_overall_dfs, self.dir_name) + self.create_final_output(self.ims_filename, non_overall_dfs, self.dir_name) - self.logger.info("{} complete!".format(str(self.ims_filename))) \ No newline at end of file + self.logger.info(f"{self.ims_filename!s} complete!") diff --git a/formats/polus-multichannel-tiff-plugin/Dockerfile b/formats/polus-multichannel-tiff-plugin/Dockerfile index 049a4df5a..c884b8bcb 100644 --- a/formats/polus-multichannel-tiff-plugin/Dockerfile +++ b/formats/polus-multichannel-tiff-plugin/Dockerfile @@ -1,7 +1,7 @@ FROM polusai/bfio:2.1.9 COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -14,4 +14,4 @@ WORKDIR ${EXEC_DIR} RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/formats/polus-multichannel-tiff-plugin/README.md b/formats/polus-multichannel-tiff-plugin/README.md index 8d5325f32..35841024f 100644 --- a/formats/polus-multichannel-tiff-plugin/README.md +++ b/formats/polus-multichannel-tiff-plugin/README.md @@ -1,6 +1,6 @@ # Multichannel Tiff -The multi-channel tiff plugin uses a +The multi-channel tiff plugin uses a [filename pattern](https://github.com/LabShare/polus-plugins/tree/master/utils/polus-filepattern-util) to assign images to a multi-channel ome tiff. Only channels indicated in the `channelOrder` are included in the multi-channel tiff, and channels are placed @@ -44,4 +44,4 @@ docker run -v /path/to/data/:/data/ \ --inpDir /data/input \ --channelOrder 3,2,1 \ --outDir /data/output -``` \ No newline at end of file +``` diff --git a/formats/polus-multichannel-tiff-plugin/VERSION b/formats/polus-multichannel-tiff-plugin/VERSION index 373f8c6f0..717903969 100644 --- a/formats/polus-multichannel-tiff-plugin/VERSION +++ b/formats/polus-multichannel-tiff-plugin/VERSION @@ -1 +1 @@ -0.2.3 \ No newline at end of file +0.2.3 diff --git a/formats/polus-multichannel-tiff-plugin/build-docker.sh b/formats/polus-multichannel-tiff-plugin/build-docker.sh index 24772bbfe..35a51035b 100755 --- a/formats/polus-multichannel-tiff-plugin/build-docker.sh +++ b/formats/polus-multichannel-tiff-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( - 4.0.0 - axle.polus.wipp - tiledtiff-converter-plugin - 1.1.0 - - - - 1.8 - 1.8 - - - - - - commons-io - commons-io - 2.9.0 - - - ome - formats-gpl - 6.2.0 - compile - - - commons-cli - commons-cli - 1.4 - - - org.javatuples - javatuples - 1.2 - - - - - - - ome.external - http://artifacts.openmicroscopy.org/artifactory/ome.external - - - ome.releases - http://artifacts.openmicroscopy.org/artifactory/ome.releases - - - ome.snapshots - http://artifacts.openmicroscopy.org/artifactory/ome.snapshots - - - - - - - - maven-assembly-plugin - - - - axle.polus.wipp.plugins.TiledOmeTiffConverterMain - - - - jar-with-dependencies - - - - - - - \ No newline at end of file + + 4.0.0 + axle.polus.wipp + tiledtiff-converter-plugin + 1.1.0 + + + + 1.8 + 1.8 + + + + + + commons-io + commons-io + 2.9.0 + + + ome + formats-gpl + 6.2.0 + compile + + + commons-cli + commons-cli + 1.4 + + + org.javatuples + javatuples + 1.2 + + + + + + + ome.external + http://artifacts.openmicroscopy.org/artifactory/ome.external + + + ome.releases + http://artifacts.openmicroscopy.org/artifactory/ome.releases + + + ome.snapshots + http://artifacts.openmicroscopy.org/artifactory/ome.snapshots + + + + + + + + maven-assembly-plugin + + + + axle.polus.wipp.plugins.TiledOmeTiffConverterMain + + + + jar-with-dependencies + + + + + + + diff --git a/formats/polus-tiledtiff-converter-plugin/src/main/java/axle/polus/data/utils/converters/TiledOmeTiffConverter.java b/formats/polus-tiledtiff-converter-plugin/src/main/java/axle/polus/data/utils/converters/TiledOmeTiffConverter.java index 08b278231..715399e24 100644 --- a/formats/polus-tiledtiff-converter-plugin/src/main/java/axle/polus/data/utils/converters/TiledOmeTiffConverter.java +++ b/formats/polus-tiledtiff-converter-plugin/src/main/java/axle/polus/data/utils/converters/TiledOmeTiffConverter.java @@ -22,7 +22,7 @@ /** * Based off of the example from * https://docs.openmicroscopy.org/bio-formats/5.9.1/developers/tiling.html - * + * * One optimization this method uses is reading/writing multiple tiles * simultaneously (up to 16 at a time). This can be set using the xMulti and * yMulti variables. @@ -35,7 +35,7 @@ public class TiledOmeTiffConverter implements Runnable { */ private static final Logger LOG = Logger.getLogger(TiledOmeTiffConverter.class.getName()); - + private ImageReader reader; private String inputFile; private String outputFile; @@ -49,7 +49,7 @@ public class TiledOmeTiffConverter implements Runnable { /** * Class constructor - * + * * @param reader ImageReader to convert to .ome.tif * @param outputFile Complete path to export file * @param Z The z-slice to export @@ -63,10 +63,10 @@ public TiledOmeTiffConverter(ImageReader reader, String outputFile, int Z, int C this.C = C; this.T = T; } - + /** * Initialize the OME Tiff writer - * + * * @param omexml Base metadata to import * @return * @throws FormatException @@ -81,7 +81,7 @@ public OMETiffWriter init_writer(IMetadata omexml) { // set up the writer and associate it with the output file OMETiffWriter writer = new OMETiffWriter(); writer.setMetadataRetrieve(omexml); - + // set output file properties try { this.tileSizeX = writer.setTileSizeX(tileSizeX); @@ -92,28 +92,28 @@ public OMETiffWriter init_writer(IMetadata omexml) { // TODO Auto-generated catch block e.printStackTrace(); } - + omexml = (IMetadata) writer.getMetadataRetrieve(); omexml.setPixelsSizeZ(new PositiveInteger(1), 0); omexml.setPixelsSizeC(new PositiveInteger(1), 0); omexml.setPixelsSizeT(new PositiveInteger(1), 0); - + return writer; } /** * Read input file and write output file in tiles - * + * * The input files is read in tiles of size (tileSizeX, tileSizeY), with * only one z-slice at a time. Each channel and time-point (if present) are * saved to separate files. - * + * * It is important to save images in tiles, since Bioformats indexes pixels * using signed integers (32 bits). This means that loading a full image * plane larger than 2GB will throw an indexing error. Saving in tiles also * has the benefit of being memory efficient, so it can be run on small * nodes. - * + * * @throws FormatException * @throws DependencyException * @throws ServiceException @@ -136,9 +136,9 @@ public void readWriteTiles() throws FormatException, DependencyException, Servic int bpp = FormatTools.getBytesPerPixel(reader.getPixelType()); int tilePlaneSize = xMulti * yMulti * tileSizeX * tileSizeY * reader.getRGBChannelCount() * bpp; byte[] buf = new byte[tilePlaneSize]; - + OMETiffWriter writer = this.init_writer(omexml); - + int width = reader.getSizeX(); int height = reader.getSizeY(); @@ -147,15 +147,15 @@ public void readWriteTiles() throws FormatException, DependencyException, Servic int nYTiles = height / (yMulti * tileSizeY); if (nXTiles * tileSizeX * xMulti != width) nXTiles++; if (nYTiles * tileSizeY * yMulti != height) nYTiles++; - + int index = reader.getIndex(this.Z, this.C, this.T); for (int y=0; y1) { outFile = outFile.replace(".ome.tif", "_c" + String.format("%0" + width + "d", c) + ".ome.tif"); @@ -197,7 +197,7 @@ public boolean accept(File dir, String name) { if (reader.getSizeZ()>1) { outFile = outFile.replace(".ome.tif", "_z" + String.format("%0" + width + "d", z) + ".ome.tif"); } - + TiledOmeTiffConverter tiledReadWriter = new TiledOmeTiffConverter(reader, outFile, z, c, t); pool.execute(tiledReadWriter); } @@ -209,13 +209,13 @@ public boolean accept(File dir, String name) { int exitVal = 0; String err = ""; - + if (exitVal != 0){ throw new RuntimeException(err); } - + pool.shutdown(); - LOG.info("The end of tile tiff conversion!!"); + LOG.info("The end of tile tiff conversion!!"); } } @@ -233,5 +233,5 @@ public String format(LogRecord record) { sb.append(record.getLevel()).append(":"); sb.append(record.getMessage()).append('\n'); return sb.toString(); - } -} \ No newline at end of file + } +} diff --git a/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/__main__.py b/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/__main__.py index 2e241d0ca..ecccbdc1f 100644 --- a/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/__main__.py +++ b/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/__main__.py @@ -2,12 +2,14 @@ import json import logging import pathlib -from typing import Any, Optional +from typing import Any +from typing import Optional import filepattern as fp import typer - -from polus.images.segmentation.mesmer_inference.padded import Extension, Model, run +from polus.images.segmentation.mesmer_inference.padded import Extension +from polus.images.segmentation.mesmer_inference.padded import Model +from polus.images.segmentation.mesmer_inference.padded import run # Initialize the logger logging.basicConfig( @@ -29,16 +31,16 @@ def main( help="Input testing image collection to be processed by this plugin.", ), tile_size: Optional[str] = typer.Option( - "256", "--tileSize", help="Input image tile size. Default 256x256." + "256", "--tileSize", help="Input image tile size. Default 256x256.", ), model_path: Optional[pathlib.Path] = typer.Option( - None, "--modelPath", help="Path to weights file." + None, "--modelPath", help="Path to weights file.", ), file_pattern_test: str = typer.Option( - ..., "--filePatternTest", help="Filename pattern to filter data." + ..., "--filePatternTest", help="Filename pattern to filter data.", ), file_pattern_whole_cell: Optional[str] = typer.Option( - None, "--filePatternWholeCell", help="Filename pattern to filter nuclear data." + None, "--filePatternWholeCell", help="Filename pattern to filter nuclear data.", ), file_extension: Extension = typer.Option( Extension.Default, @@ -48,7 +50,7 @@ def main( model: Model = typer.Option(Model.Default, "--model", help="Model name."), out_dir: pathlib.Path = typer.Option(..., "--outDir", help="Output collection"), preview: Optional[bool] = typer.Option( - False, "--preview", help="Output a JSON preview of files" + False, "--preview", help="Output a JSON preview of files", ), ) -> None: """Mesmer Plugin image segmentation using PanopticNet model.""" diff --git a/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/padded.py b/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/padded.py index 854722866..62de02b18 100644 --- a/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/padded.py +++ b/segmentation/mesmer-inference-tool/src/polus/images/segmentation/mesmer_inference/padded.py @@ -4,15 +4,18 @@ import math import os import pathlib +from collections.abc import Sequence from timeit import default_timer -from typing import List, Sequence, Tuple import cv2 import filepattern import numpy as np import tensorflow as tf -from bfio import BioReader, BioWriter -from deepcell.applications import CytoplasmSegmentation, Mesmer, NuclearSegmentation +from bfio import BioReader +from bfio import BioWriter +from deepcell.applications import CytoplasmSegmentation +from deepcell.applications import Mesmer +from deepcell.applications import NuclearSegmentation from deepcell.model_zoo.panopticnet import PanopticNet from deepcell.utils.data_utils import reshape_matrix from deepcell_toolbox.deep_watershed import deep_watershed @@ -48,20 +51,22 @@ class Model(str, enum.Enum): def padding( - image: np.ndarray, shape_1: int, shape_2: int, second: bool, size: int -) -> Tuple[np.ndarray, Sequence[int]]: + image: np.ndarray, shape_1: int, shape_2: int, second: bool, size: int, +) -> tuple[np.ndarray, Sequence[int]]: """Image padding. UNET expects height and width of the image to be 256 x 256 This function adds the required reflective padding to make the image dimensions a multiple of 256 x 256. This will enable us to extract tiles of size 256 x 256 which can be processed by the network'. + Args: image: Intensity images. shape_1: Y image dimension. shape_2:: X image dimension. second: If True, height and width of padding is determined from image dimension otherwise calculated from input arguments shape_1 and shape_2. size: Desired size of padded image. + Returns: final_image: padded image. pad_dimensions: Number of pixels added to (top, bottom, left, right) of padded image. @@ -105,7 +110,7 @@ def padding( # Add relective Padding final_image = cv2.copyMakeBorder( - image, top, bottom, left, right, cv2.BORDER_REFLECT + image, top, bottom, left, right, cv2.BORDER_REFLECT, ) # return padded image and pad dimensions @@ -127,6 +132,7 @@ def get_data( shape_2:: X image dimension. second: if True, height and width of padding is determined from image dimension otherwise calculated from input arguments shape_1 and shape_2. size: Desired size of padded image. + Returns: final_image: padded image. pad_dimensions: Number of pixels added to (top, bottom, left, right) of padded image. @@ -147,7 +153,7 @@ def get_data( tile = np.squeeze( br[y_min:y_max, x_min:x_max, z : z + 1, 0, 0] # noqa ) - if tile.shape[0] < shape_1 or tile.shape[1] < shape_2: # noqa + if tile.shape[0] < shape_1 or tile.shape[1] < shape_2: shape_1 = max(tile.shape[0], shape_1) shape_2 = max(tile.shape[1], shape_2) second = False @@ -167,20 +173,20 @@ def get_data( br_whole[ y_min:y_max, x_min:x_max, - z : z + 1, # noqa + z : z + 1, 0, - 0, # noqa - ] + 0, + ], ) padded_img_cyto, _ = padding( - tile_whole, shape_1, shape_2, second, size + tile_whole, shape_1, shape_2, second, size, ) image = np.stack( - (padded_img, padded_img_cyto), axis=-1 + (padded_img, padded_img_cyto), axis=-1, ) else: im1 = np.zeros( - (padded_img.shape[0], padded_img.shape[1]) + (padded_img.shape[0], padded_img.shape[1]), ) image = np.stack((padded_img, im1), axis=-1) elif f"{model}" == "mesmerWholeCell": @@ -193,16 +199,16 @@ def get_data( br_whole[ y_min:y_max, x_min:x_max, - z : z + 1, # noqa + z : z + 1, + 0, 0, - 0, # noqa ] # noqa ) padded_img_nuclear, _ = padding( - tile_whole, shape_1, shape_2, second, size + tile_whole, shape_1, shape_2, second, size, ) image = np.stack( - (padded_img_nuclear, padded_img), axis=-1 + (padded_img_nuclear, padded_img), axis=-1, ) else: image = np.expand_dims(padded_img, axis=-1) @@ -212,7 +218,7 @@ def get_data( def save_data( inp_dir: pathlib.Path, - y_pred: List[np.ndarray], + y_pred: list[np.ndarray], size: int, file_pattern: str, model: Model, @@ -261,11 +267,11 @@ def save_data( shape_1, shape_2 = tile.shape[0], tile.shape[1] padded_img, pad_dimensions = padding( - tile, shape_1, shape_2, second, size + tile, shape_1, shape_2, second, size, ) out_img = np.zeros( - (padded_img.shape[0], padded_img.shape[1]) + (padded_img.shape[0], padded_img.shape[1]), ) if f"{model}" == "BYOM": @@ -273,8 +279,8 @@ def save_data( for j in range(int(padded_img.shape[1] / size)): new_img = np.squeeze(y_pred[ind]) out_img[ - i * size : (i + 1) * size, # noqa - j * size : (j + 1) * size, # noqa + i * size : (i + 1) * size, + j * size : (j + 1) * size, ] = new_img ind += 1 else: @@ -283,8 +289,8 @@ def save_data( top_pad, bottom_pad, left_pad, right_pad = pad_dimensions output = out_img[ - top_pad : out_img.shape[0] - bottom_pad, # noqa - left_pad : out_img.shape[1] - right_pad, # noqa + top_pad : out_img.shape[0] - bottom_pad, + left_pad : out_img.shape[1] - right_pad, ] output = output.astype(np.uint16) @@ -300,8 +306,8 @@ def save_data( ) final = output[ - y_overlap : y_max - y_min + y_overlap, # noqa - x_overlap : x_max - x_min + x_overlap, # noqa + y_overlap : y_max - y_min + y_overlap, + x_overlap : x_max - x_min + x_overlap, ] output_image_5channel = np.zeros( (final.shape[0], final.shape[1], 1, 1, 1), @@ -310,7 +316,7 @@ def save_data( output_image_5channel[:, :, 0, 0, 0] = final bw[ - y_min:y_max, x_min:x_max, 0:1, 0, 0 + y_min:y_max, x_min:x_max, 0:1, 0, 0, ] = output_image_5channel @@ -366,7 +372,7 @@ def predict_( watershed_time = default_timer() - start logger.info( - f"Watershed segmentation of shape {outputs[0].shape} in {watershed_time} seconds." + f"Watershed segmentation of shape {outputs[0].shape} in {watershed_time} seconds.", ) y_pred = [] @@ -432,7 +438,7 @@ def run( output = app.predict(X_test) save_data( - inp_dir, output, size, file_pattern_1, model, file_extension, out_path + inp_dir, output, size, file_pattern_1, model, file_extension, out_path, ) logger.info("Segmentation complete.") elif f"{model}" == "BYOM": diff --git a/segmentation/mesmer-training-tool/run-plugin.sh b/segmentation/mesmer-training-tool/run-plugin.sh index 693b3a2b5..3051fef5b 100644 --- a/segmentation/mesmer-training-tool/run-plugin.sh +++ b/segmentation/mesmer-training-tool/run-plugin.sh @@ -31,4 +31,4 @@ docker run --mount type=bind,source=${datapath},target=/data/ \ --tilesize ${tilesize} \ --iterations ${iterations} \ --batchSize ${batchSize} \ - --outDir ${outDir} \ No newline at end of file + --outDir ${outDir} diff --git a/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/__main__.py b/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/__main__.py index 87f9da9f1..57953440c 100644 --- a/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/__main__.py +++ b/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/__main__.py @@ -1,13 +1,13 @@ """Mesmer Training.""" import json -import os import logging +import os import pathlib -from typing import Any, Optional +from typing import Any +from typing import Optional import typer - -from polus.images.segmentation.mesmer_training import train as train +from polus.images.segmentation.mesmer_training import train # Initialize the logger logging.basicConfig( @@ -44,23 +44,23 @@ def main( help="Input testing label collection to be processed by this plugin.", ), model_backbone: train.BACKBONES = typer.Option( - train.BACKBONES.DEFAULT, "--modelBackbone", help="DeepCell model backbones." + train.BACKBONES.DEFAULT, "--modelBackbone", help="DeepCell model backbones.", ), file_pattern: Optional[str] = typer.Option( - ".+", "--filePattern", help="Pattern to parse file names." + ".+", "--filePattern", help="Pattern to parse file names.", ), tile_size: Optional[int] = typer.Option( - 256, "--tileSize", help="Input image tile size. Default 256x256." + 256, "--tileSize", help="Input image tile size. Default 256x256.", ), iterations: Optional[int] = typer.Option( - 10, "--iterations", help="Number of training iterations. Default is 10." + 10, "--iterations", help="Number of training iterations. Default is 10.", ), batch_size: Optional[int] = typer.Option( - 1, "--batchSize", help="Batch Size. Default is 1.." + 1, "--batchSize", help="Batch Size. Default is 1..", ), out_dir: pathlib.Path = typer.Option(..., "--outDir", help="Output collection"), preview: Optional[bool] = typer.Option( - False, "--preview", help="Output a JSON preview of files" + False, "--preview", help="Output a JSON preview of files", ), ) -> None: """Mesmer training.""" diff --git a/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/train.py b/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/train.py index 5af93a15a..ad3bc614c 100644 --- a/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/train.py +++ b/segmentation/mesmer-training-tool/src/polus/images/segmentation/mesmer_training/train.py @@ -8,10 +8,13 @@ import filepattern import numpy as np from bfio import BioReader -from deepcell import image_generators, losses +from deepcell import image_generators +from deepcell import losses from deepcell.model_zoo.panopticnet import PanopticNet from deepcell.utils.data_utils import reshape_matrix -from deepcell.utils.train_utils import count_gpus, get_callbacks, rate_scheduler +from deepcell.utils.train_utils import count_gpus +from deepcell.utils.train_utils import get_callbacks +from deepcell.utils.train_utils import rate_scheduler from tensorflow.keras.losses import MSE from tensorflow.keras.optimizers import Adam @@ -88,7 +91,7 @@ def __init__( iterations: int, batch_size: int, out_dir: pathlib.Path, - ): + ) -> None: """Define Instance attributes.""" self.xtrain_path = xtrain_path self.ytrain_path = ytrain_path @@ -143,7 +146,7 @@ def semantic_loss(self, n_classes: int): def _semantic_loss(y_true, y_pred): if n_classes > 1: return 0.01 * losses.weighted_categorical_crossentropy( - y_true, y_pred, n_classes=n_classes + y_true, y_pred, n_classes=n_classes, ) return MSE(y_true, y_pred) @@ -176,7 +179,6 @@ def run(self) -> None: include_top=True, ) - # norm_method = 'whole_image' # data normalization lr = 1e-5 optimizer = Adam(lr=lr, clipnorm=0.001) lr_sched = rate_scheduler(lr=lr, decay=0.99) diff --git a/segmentation/polus-aics-classic-seg-plugin/Dockerfile b/segmentation/polus-aics-classic-seg-plugin/Dockerfile index 552539720..9c15190a9 100644 --- a/segmentation/polus-aics-classic-seg-plugin/Dockerfile +++ b/segmentation/polus-aics-classic-seg-plugin/Dockerfile @@ -5,7 +5,7 @@ RUN apt-get update && apt-get install build-essential -y \ && apt-get -y install git COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -18,4 +18,4 @@ WORKDIR ${EXEC_DIR} RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/segmentation/polus-aics-classic-seg-plugin/README.md b/segmentation/polus-aics-classic-seg-plugin/README.md index 02cc3d9cf..fd634cd45 100644 --- a/segmentation/polus-aics-classic-seg-plugin/README.md +++ b/segmentation/polus-aics-classic-seg-plugin/README.md @@ -6,12 +6,12 @@ Contact [Gauhar Bains](mailto:gauhar.bains@labshare.org) or for more information For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). -## Using the plugin -The plugin takes two inputs: +## Using the plugin +The plugin takes two inputs: 1. Image collection to be segmented. -2. Configuration file: The configuration files contains the following information i) Name of the workflow to be implemented ii) Values of the hyper parameters needed to execute the workflow. The config file can be generated using the interactive notebooks for each workflow. - -`Interactive notebooks for classic workflows`: The notebooks serve as a starting point to use the this plugin. The notebooks enable the user to tune the workflow hyper parameters by testing the algorithm on multiple images. Following this the user can save the settings in a config file that can be provided as an input to this plugin. Contact the Polus team to get access to the notebooks. A sample configuraton file is shown below: +2. Configuration file: The configuration files contains the following information i) Name of the workflow to be implemented ii) Values of the hyper parameters needed to execute the workflow. The config file can be generated using the interactive notebooks for each workflow. + +`Interactive notebooks for classic workflows`: The notebooks serve as a starting point to use the this plugin. The notebooks enable the user to tune the workflow hyper parameters by testing the algorithm on multiple images. Following this the user can save the settings in a config file that can be provided as an input to this plugin. Contact the Polus team to get access to the notebooks. A sample configuraton file is shown below: ``` { "workflow_name": "Playground4_Curvi", @@ -49,4 +49,3 @@ This plugin takes two input argument and one output argument: | `--configFile` | Configuration file for the workflow | Input | collection | | `--inpDir` | Input image collection to be processed by this plugin | Input | collection | | `--outDir` | Output collection | Output | collection | - diff --git a/segmentation/polus-aics-classic-seg-plugin/VERSION b/segmentation/polus-aics-classic-seg-plugin/VERSION index a34eaa5d0..20f49513e 100644 --- a/segmentation/polus-aics-classic-seg-plugin/VERSION +++ b/segmentation/polus-aics-classic-seg-plugin/VERSION @@ -1 +1 @@ -0.1.11 \ No newline at end of file +0.1.11 diff --git a/segmentation/polus-aics-classic-seg-plugin/build-docker.sh b/segmentation/polus-aics-classic-seg-plugin/build-docker.sh index 8017530f5..40fb0c566 100644 --- a/segmentation/polus-aics-classic-seg-plugin/build-docker.sh +++ b/segmentation/polus-aics-classic-seg-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$(0, min_size=minArea, connectivity=1, in_place=False) - seg = seg >0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + minArea = config_data["minArea"] + seg = remove_small_objects( + bw > 0, min_size=minArea, connectivity=1, in_place=False, + ) + seg = seg > 0 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_dots.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_dots.py index 37927ec2c..7a5f2ba82 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_dots.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_dots.py @@ -1,77 +1,100 @@ -import numpy as np -import cv2 +import logging import os -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage -from aicssegmentation.core.seg_dot import dot_3d, dot_3d_wrapper -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_slice_by_slice, image_smoothing_gaussian_3d -from skimage.morphology import remove_small_objects, watershed, dilation, erosion, ball + +import numpy as np +from aicssegmentation.core.pre_processing_utils import image_smoothing_gaussian_3d +from aicssegmentation.core.pre_processing_utils import ( + image_smoothing_gaussian_slice_by_slice, +) +from aicssegmentation.core.pre_processing_utils import intensity_normalization +from aicssegmentation.core.seg_dot import dot_3d_wrapper +from bfio import BioReader +from bfio import BioWriter +from scipy.ndimage import distance_transform_edt from skimage.feature import peak_local_max from skimage.measure import label -from scipy.ndimage import distance_transform_edt +from skimage.morphology import ball +from skimage.morphology import dilation +from skimage.morphology import remove_small_objects +from skimage.morphology import watershed -def segment_images(inpDir, outDir, config_data): - """ Workflow for dot like shapes such as - Centrin-2, Desmoplakin, PMP34. +def segment_images(inpDir, outDir, config_data): + """Workflow for dot like shapes such as + Centrin-2, Desmoplakin, PMP34. Args: inpDir : path to the input directory outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load an image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] if config_data["gaussian_smoothing"] == "gaussian_slice_by_slice": - structure_img_smooth = image_smoothing_gaussian_slice_by_slice(struct_img, sigma=gaussian_smoothing_sigma) + structure_img_smooth = image_smoothing_gaussian_slice_by_slice( + struct_img, sigma=gaussian_smoothing_sigma, + ) else: - structure_img_smooth = image_smoothing_gaussian_3d(struct_img, sigma=gaussian_smoothing_sigma) - s3_param = config_data['s3_param'] + structure_img_smooth = image_smoothing_gaussian_3d( + struct_img, sigma=gaussian_smoothing_sigma, + ) + s3_param = config_data["s3_param"] bw = dot_3d_wrapper(structure_img_smooth, s3_param) - minArea = config_data['minArea'] - Mask = remove_small_objects(bw>0, min_size=minArea, connectivity=1, in_place=False) - Seed = dilation(peak_local_max(struct_img,labels=label(Mask), min_distance=2, indices=False), selem=ball(1)) - Watershed_Map = -1*distance_transform_edt(bw) + minArea = config_data["minArea"] + Mask = remove_small_objects( + bw > 0, min_size=minArea, connectivity=1, in_place=False, + ) + Seed = dilation( + peak_local_max( + struct_img, labels=label(Mask), min_distance=2, indices=False, + ), + selem=ball(1), + ) + Watershed_Map = -1 * distance_transform_edt(bw) seg = watershed(Watershed_Map, label(Seed), mask=Mask, watershed_line=True) - seg = remove_small_objects(seg>0, min_size=minArea, connectivity=1, in_place=False) - seg = seg >0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + seg = remove_small_objects( + seg > 0, min_size=minArea, connectivity=1, in_place=False, + ) + seg = seg > 0 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_filament3d.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_filament3d.py index 55f1449e2..b1464b98f 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_filament3d.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_filament3d.py @@ -1,17 +1,18 @@ -import numpy as np +import logging import os -import cv2 -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage + +import numpy as np +from aicssegmentation.core.pre_processing_utils import edge_preserving_smoothing_3d +from aicssegmentation.core.pre_processing_utils import image_smoothing_gaussian_3d +from aicssegmentation.core.pre_processing_utils import intensity_normalization from aicssegmentation.core.vessel import filament_3d_wrapper -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_3d, edge_preserving_smoothing_3d -from skimage.morphology import remove_small_objects +from bfio import BioReader +from bfio import BioWriter +from skimage.morphology import remove_small_objects -def segment_images(inpDir, outDir, config_data): - """ Workflow for data with filamentous structures +def segment_images(inpDir, outDir, config_data): + """Workflow for data with filamentous structures such as ZO1, Beta Actin, Titin, Troponin 1. Args: @@ -19,53 +20,61 @@ def segment_images(inpDir, outDir, config_data): outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] - if config_data['preprocessing_function'] == 'image_smoothing_gaussian_3d': - structure_img_smooth = image_smoothing_gaussian_3d(struct_img, sigma=gaussian_smoothing_sigma) - elif config_data['preprocessing_function'] == 'edge_preserving_smoothing_3d': - structure_img_smooth = edge_preserving_smoothing_3d(struct_img) + if config_data["preprocessing_function"] == "image_smoothing_gaussian_3d": + structure_img_smooth = image_smoothing_gaussian_3d( + struct_img, sigma=gaussian_smoothing_sigma, + ) + elif config_data["preprocessing_function"] == "edge_preserving_smoothing_3d": + structure_img_smooth = edge_preserving_smoothing_3d(struct_img) - f3_param = config_data['f3_param'] + f3_param = config_data["f3_param"] bw = filament_3d_wrapper(structure_img_smooth, f3_param) - minArea = config_data['minArea'] - seg = remove_small_objects(bw>0, min_size=minArea, connectivity=1, in_place=False) - seg = seg >0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + minArea = config_data["minArea"] + seg = remove_small_objects( + bw > 0, min_size=minArea, connectivity=1, in_place=False, + ) + seg = seg > 0 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_gja1.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_gja1.py index ccfec0228..be2313c9a 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_gja1.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_gja1.py @@ -1,67 +1,76 @@ -import numpy as np +import logging import os -import cv2 -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage -from aicssegmentation.core.seg_dot import dot_3d, dot_3d_wrapper + +import numpy as np +from aicssegmentation.core.pre_processing_utils import ( + image_smoothing_gaussian_slice_by_slice, +) +from aicssegmentation.core.pre_processing_utils import intensity_normalization from aicssegmentation.core.seg_dot import dot_3d_wrapper -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_slice_by_slice +from bfio import BioReader +from bfio import BioWriter from skimage.morphology import remove_small_objects -def segment_images(inpDir, outDir, config_data): - """ Workflow for data with similar morphology - as connexin-43 + +def segment_images(inpDir, outDir, config_data): + """Workflow for data with similar morphology + as connexin-43. Args: inpDir : path to the input directory outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) - + inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] - structure_img_smooth = image_smoothing_gaussian_slice_by_slice(struct_img, sigma=gaussian_smoothing_sigma) - s3_param = config_data['s3_param'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] + structure_img_smooth = image_smoothing_gaussian_slice_by_slice( + struct_img, sigma=gaussian_smoothing_sigma, + ) + s3_param = config_data["s3_param"] bw = dot_3d_wrapper(structure_img_smooth, s3_param) - minArea = config_data['minArea'] - seg = remove_small_objects(bw>0, min_size=minArea, connectivity=1, in_place=False) - seg = seg >0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + minArea = config_data["minArea"] + seg = remove_small_objects( + bw > 0, min_size=minArea, connectivity=1, in_place=False, + ) + seg = seg > 0 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_lamp1.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_lamp1.py index 6f6237b0c..e2dc5d7ae 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_lamp1.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_lamp1.py @@ -1,77 +1,88 @@ +import logging import os -import cv2 + import numpy as np -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage -from aicssegmentation.core.vessel import filament_2d_wrapper +from aicssegmentation.core.pre_processing_utils import ( + image_smoothing_gaussian_slice_by_slice, +) +from aicssegmentation.core.pre_processing_utils import intensity_normalization from aicssegmentation.core.seg_dot import dot_2d_slice_by_slice_wrapper from aicssegmentation.core.utils import hole_filling -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_slice_by_slice -from skimage.morphology import remove_small_objects, watershed, dilation, erosion, ball +from aicssegmentation.core.vessel import filament_2d_wrapper +from bfio import BioReader +from bfio import BioWriter +from skimage.morphology import remove_small_objects + -def segment_images(inpDir, outDir, config_data): - """ Workflow for data with similar morphology - as LAMP-1 +def segment_images(inpDir, outDir, config_data): + """Workflow for data with similar morphology + as LAMP-1. Args: inpDir : path to the input directory outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] - structure_img_smooth = image_smoothing_gaussian_slice_by_slice(struct_img, sigma=gaussian_smoothing_sigma) - s2_param = config_data['s2_param'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] + structure_img_smooth = image_smoothing_gaussian_slice_by_slice( + struct_img, sigma=gaussian_smoothing_sigma, + ) + s2_param = config_data["s2_param"] bw_spot = dot_2d_slice_by_slice_wrapper(structure_img_smooth, s2_param) - f2_param = config_data['f2_param'] + f2_param = config_data["f2_param"] bw_filament = filament_2d_wrapper(structure_img_smooth, f2_param) bw = np.logical_or(bw_spot, bw_filament) - fill_2d = config_data['fill_2d'] - if fill_2d == 'True': + fill_2d = config_data["fill_2d"] + if fill_2d == "True": fill_2d = True - elif fill_2d =='False': + elif fill_2d == "False": fill_2d = False - fill_max_size = config_data['fill_max_size'] - minArea = config_data['minArea'] + fill_max_size = config_data["fill_max_size"] + minArea = config_data["minArea"] bw_fill = hole_filling(bw, 0, fill_max_size, False) - seg = remove_small_objects(bw_fill>0, min_size=minArea, connectivity=1, in_place=False) - seg = seg >0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + seg = remove_small_objects( + bw_fill > 0, min_size=minArea, connectivity=1, in_place=False, + ) + seg = seg > 0 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 - # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + # create output image + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_npm1.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_npm1.py index 04cde60c9..553598a63 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_npm1.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_npm1.py @@ -1,75 +1,89 @@ +import logging import os -import cv2 + import numpy as np -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage -from scipy.ndimage.morphology import binary_fill_holes -from aicssegmentation.core.seg_dot import dot_2d_slice_by_slice_wrapper -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_3d -from skimage.morphology import remove_small_objects, binary_closing, ball, disk, erosion, dilation # function for post-processing (size filter) + +# function for post-processing (size filter) from aicssegmentation.core.MO_threshold import MO +from aicssegmentation.core.pre_processing_utils import image_smoothing_gaussian_3d +from aicssegmentation.core.pre_processing_utils import intensity_normalization +from aicssegmentation.core.seg_dot import dot_2d_slice_by_slice_wrapper +from bfio import BioReader +from bfio import BioWriter +from skimage.morphology import remove_small_objects -def segment_images(inpDir, outDir, config_data): - """ Workflow for data with similar morphology - as Nucleophosmin +def segment_images(inpDir, outDir, config_data): + """Workflow for data with similar morphology + as Nucleophosmin. Args: inpDir : path to the input directory outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] - structure_img_smooth = image_smoothing_gaussian_3d(struct_img, sigma=gaussian_smoothing_sigma) - global_thresh_method = config_data['global_thresh_method'] - object_minArea = config_data['object_minArea'] - bw, object_for_debug = MO(structure_img_smooth, global_thresh_method=global_thresh_method, object_minArea=object_minArea, return_object=True) - s2_param_bright = config_data['s2_param_bright'] - s2_param_dark = config_data['s2_param_dark'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] + structure_img_smooth = image_smoothing_gaussian_3d( + struct_img, sigma=gaussian_smoothing_sigma, + ) + global_thresh_method = config_data["global_thresh_method"] + object_minArea = config_data["object_minArea"] + bw, object_for_debug = MO( + structure_img_smooth, + global_thresh_method=global_thresh_method, + object_minArea=object_minArea, + return_object=True, + ) + s2_param_bright = config_data["s2_param_bright"] + s2_param_dark = config_data["s2_param_dark"] bw_extra = dot_2d_slice_by_slice_wrapper(structure_img_smooth, s2_param_bright) - bw_dark = dot_2d_slice_by_slice_wrapper(1-structure_img_smooth, s2_param_dark) + bw_dark = dot_2d_slice_by_slice_wrapper(1 - structure_img_smooth, s2_param_dark) bw_merge = np.logical_or(bw, bw_extra) - bw_merge[bw_dark>0]=0 - minArea = config_data['minArea'] - seg = remove_small_objects(bw_merge>0, min_size=minArea, connectivity=1, in_place=False) + bw_merge[bw_dark > 0] = 0 + minArea = config_data["minArea"] + seg = remove_small_objects( + bw_merge > 0, min_size=minArea, connectivity=1, in_place=False, + ) seg = seg > 0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_shell.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_shell.py index e37cc0b1f..439174488 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_shell.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_shell.py @@ -1,74 +1,85 @@ +import logging import os -import cv2 + import numpy as np -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage +from aicssegmentation.core.pre_processing_utils import image_smoothing_gaussian_3d +from aicssegmentation.core.pre_processing_utils import intensity_normalization +from aicssegmentation.core.utils import get_3dseed_from_mid_frame +from aicssegmentation.core.utils import get_middle_frame +from aicssegmentation.core.utils import hole_filling from aicssegmentation.core.vessel import filament_2d_wrapper -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_3d -from aicssegmentation.core.utils import get_middle_frame, hole_filling, get_3dseed_from_mid_frame -from skimage.morphology import remove_small_objects, watershed, dilation, ball +from bfio import BioReader +from bfio import BioWriter +from skimage.morphology import ball +from skimage.morphology import dilation +from skimage.morphology import watershed -def segment_images(inpDir, outDir, config_data): - """ Workflow for data with shell like shapes - such as lamin B1 (interphase-specific) +def segment_images(inpDir, outDir, config_data): + """Workflow for data with shell like shapes + such as lamin B1 (interphase-specific). Args: inpDir : path to the input directory outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] - structure_img_smooth = image_smoothing_gaussian_3d(struct_img, sigma=gaussian_smoothing_sigma) - middle_frame_method = config_data['middle_frame_method'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] + structure_img_smooth = image_smoothing_gaussian_3d( + struct_img, sigma=gaussian_smoothing_sigma, + ) + middle_frame_method = config_data["middle_frame_method"] mid_z = get_middle_frame(structure_img_smooth, method=middle_frame_method) - f2_param = config_data['f2_param'] - bw_mid_z = filament_2d_wrapper(structure_img_smooth[mid_z,:,:], f2_param) - hole_max = config_data['hole_max'] - hole_min = config_data['hole_min'] + f2_param = config_data["f2_param"] + bw_mid_z = filament_2d_wrapper(structure_img_smooth[mid_z, :, :], f2_param) + hole_max = config_data["hole_max"] + hole_min = config_data["hole_min"] bw_fill_mid_z = hole_filling(bw_mid_z, hole_min, hole_max) - seed = get_3dseed_from_mid_frame(np.logical_xor(bw_fill_mid_z, bw_mid_z), struct_img.shape, mid_z, hole_min) - bw_filled = watershed(struct_img, seed.astype(int), watershed_line=True)>0 + seed = get_3dseed_from_mid_frame( + np.logical_xor(bw_fill_mid_z, bw_mid_z), struct_img.shape, mid_z, hole_min, + ) + bw_filled = watershed(struct_img, seed.astype(int), watershed_line=True) > 0 seg = np.logical_xor(bw_filled, dilation(bw_filled, selem=ball(1))) seg = seg > 0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_spotty.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_spotty.py index cc45d828e..edfda702a 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_spotty.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_spotty.py @@ -1,17 +1,17 @@ +import logging import os -import cv2 + import numpy as np -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage +from aicssegmentation.core.pre_processing_utils import image_smoothing_gaussian_3d +from aicssegmentation.core.pre_processing_utils import intensity_normalization from aicssegmentation.core.seg_dot import dot_2d_slice_by_slice_wrapper -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_3d -from skimage.morphology import remove_small_objects, binary_closing, ball , dilation +from bfio import BioReader +from bfio import BioWriter +from skimage.morphology import remove_small_objects -def segment_images(inpDir, outDir, config_data): - """ Workflow for data with a spotty appearance +def segment_images(inpDir, outDir, config_data): + """Workflow for data with a spotty appearance in each 2d frame such as fibrillarin and beta catenin. Args: @@ -19,48 +19,56 @@ def segment_images(inpDir, outDir, config_data): outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] - structure_img_smooth = image_smoothing_gaussian_3d(struct_img, sigma=gaussian_smoothing_sigma) - s2_param = config_data['s2_param'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] + structure_img_smooth = image_smoothing_gaussian_3d( + struct_img, sigma=gaussian_smoothing_sigma, + ) + s2_param = config_data["s2_param"] bw = dot_2d_slice_by_slice_wrapper(structure_img_smooth, s2_param) - minArea = config_data['minArea'] - seg = remove_small_objects(bw>0, min_size=minArea, connectivity=1, in_place=False) + minArea = config_data["minArea"] + seg = remove_small_objects( + bw > 0, min_size=minArea, connectivity=1, in_place=False, + ) seg = seg > 0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_st6gal1.py b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_st6gal1.py index d61b17c81..8847103dd 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_st6gal1.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/Workflows/Playground_st6gal1.py @@ -1,19 +1,19 @@ +import logging import os -import cv2 + import numpy as np -import logging, sys -from bfio import BioReader, BioWriter -from pathlib import Path -from aicsimageio import AICSImage +from aicssegmentation.core.MO_threshold import MO +from aicssegmentation.core.pre_processing_utils import image_smoothing_gaussian_3d +from aicssegmentation.core.pre_processing_utils import intensity_normalization from aicssegmentation.core.seg_dot import dot_3d_wrapper -from aicssegmentation.core.pre_processing_utils import intensity_normalization, image_smoothing_gaussian_3d -from skimage.morphology import remove_small_objects, binary_closing, ball , dilation from aicssegmentation.core.utils import topology_preserving_thinning -from aicssegmentation.core.MO_threshold import MO +from bfio import BioReader +from bfio import BioWriter +from skimage.morphology import remove_small_objects -def segment_images(inpDir, outDir, config_data): - """ Workflow for data with similar morphology +def segment_images(inpDir, outDir, config_data): + """Workflow for data with similar morphology as sialyltransferase 1. Args: @@ -21,55 +21,68 @@ def segment_images(inpDir, outDir, config_data): outDir : path to the output directory config_data : path to the configuration file """ - - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) inpDir_files = os.listdir(inpDir) - for i,f in enumerate(inpDir_files): - logger.info('Segmenting image : {}'.format(f)) - + for i, f in enumerate(inpDir_files): + logger.info(f"Segmenting image : {f}") + # Load image - br = BioReader(os.path.join(inpDir,f)) + br = BioReader(os.path.join(inpDir, f)) image = br.read_image() - structure_channel = 0 - struct_img0 = image[:,:,:,structure_channel,0] - struct_img0 = struct_img0.transpose(2,0,1).astype(np.float32) + structure_channel = 0 + struct_img0 = image[:, :, :, structure_channel, 0] + struct_img0 = struct_img0.transpose(2, 0, 1).astype(np.float32) # main algorithm - intensity_scaling_param = config_data['intensity_scaling_param'] - struct_img = intensity_normalization(struct_img0, scaling_param=intensity_scaling_param) - gaussian_smoothing_sigma = config_data['gaussian_smoothing_sigma'] - structure_img_smooth = image_smoothing_gaussian_3d(struct_img, sigma=gaussian_smoothing_sigma) - global_thresh_method = config_data['global_thresh_method'] - object_minArea = config_data['object_minArea'] - bw, object_for_debug = MO(structure_img_smooth, global_thresh_method=global_thresh_method, object_minArea=object_minArea, return_object=True) - thin_dist_preserve = config_data['thin_dist_preserve'] - thin_dist = config_data['thin_dist'] - bw_thin = topology_preserving_thinning(bw>0, thin_dist_preserve, thin_dist) - s3_param = config_data['s3_param'] + intensity_scaling_param = config_data["intensity_scaling_param"] + struct_img = intensity_normalization( + struct_img0, scaling_param=intensity_scaling_param, + ) + gaussian_smoothing_sigma = config_data["gaussian_smoothing_sigma"] + structure_img_smooth = image_smoothing_gaussian_3d( + struct_img, sigma=gaussian_smoothing_sigma, + ) + global_thresh_method = config_data["global_thresh_method"] + object_minArea = config_data["object_minArea"] + bw, object_for_debug = MO( + structure_img_smooth, + global_thresh_method=global_thresh_method, + object_minArea=object_minArea, + return_object=True, + ) + thin_dist_preserve = config_data["thin_dist_preserve"] + thin_dist = config_data["thin_dist"] + bw_thin = topology_preserving_thinning(bw > 0, thin_dist_preserve, thin_dist) + s3_param = config_data["s3_param"] bw_extra = dot_3d_wrapper(structure_img_smooth, s3_param) - bw_combine = np.logical_or(bw_extra>0, bw_thin) - minArea = config_data['minArea'] - seg = remove_small_objects(bw_combine>0, min_size=minArea, connectivity=1, in_place=False) + bw_combine = np.logical_or(bw_extra > 0, bw_thin) + minArea = config_data["minArea"] + seg = remove_small_objects( + bw_combine > 0, min_size=minArea, connectivity=1, in_place=False, + ) seg = seg > 0 - out_img=seg.astype(np.uint8) - out_img[out_img>0]=255 - + out_img = seg.astype(np.uint8) + out_img[out_img > 0] = 255 + # create output image - out_img = out_img.transpose(1,2,0) - out_img = out_img.reshape((out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1)) + out_img = out_img.transpose(1, 2, 0) + out_img = out_img.reshape( + (out_img.shape[0], out_img.shape[1], out_img.shape[2], 1, 1), + ) # write image using BFIO - bw = BioWriter(os.path.join(outDir,f), metadata=br.read_metadata()) + bw = BioWriter(os.path.join(outDir, f), metadata=br.read_metadata()) bw.num_x(out_img.shape[1]) bw.num_y(out_img.shape[0]) bw.num_z(out_img.shape[2]) bw.num_c(out_img.shape[3]) bw.num_t(out_img.shape[4]) - bw.pixel_type(dtype='uint8') + bw.pixel_type(dtype="uint8") bw.write_image(out_img) bw.close_image() - diff --git a/segmentation/polus-aics-classic-seg-plugin/src/log4j.properties b/segmentation/polus-aics-classic-seg-plugin/src/log4j.properties index b6682b3d8..bdcc5504b 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/log4j.properties +++ b/segmentation/polus-aics-classic-seg-plugin/src/log4j.properties @@ -9,4 +9,4 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n diff --git a/segmentation/polus-aics-classic-seg-plugin/src/main.py b/segmentation/polus-aics-classic-seg-plugin/src/main.py index 60d00157a..2b62fe396 100644 --- a/segmentation/polus-aics-classic-seg-plugin/src/main.py +++ b/segmentation/polus-aics-classic-seg-plugin/src/main.py @@ -1,111 +1,132 @@ -import os +import argparse import json +import logging +import os +import sys +import traceback from pathlib import Path -import javabridge as jutil + import bioformats -import traceback +import javabridge as jutil from Workflows import Playground_CurvyLinear from Workflows import Playground_dots +from Workflows import Playground_filament3d from Workflows import Playground_gja1 from Workflows import Playground_lamp1 from Workflows import Playground_npm1 +from Workflows import Playground_shell from Workflows import Playground_spotty -from Workflows import Playground_filament3d from Workflows import Playground_st6gal1 -from Workflows import Playground_shell -import argparse, logging, subprocess, time, multiprocessing, sys - -if __name__=="__main__": +if __name__ == "__main__": # Initialize the logger - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) - ''' Argument parsing ''' - + """ Argument parsing """ + logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='The plugin integrates the allen cell structure segmenter into WIPP') - + parser = argparse.ArgumentParser( + prog="main", + description="The plugin integrates the allen cell structure segmenter into WIPP", + ) + # Input arguments - parser.add_argument('--configFile', dest='configFile', type=str, - help='Configuration file for the workflow', required=True) - parser.add_argument('--inpDir', dest='inpDir', type=str, - help='Input image collection to be processed by this plugin', required=True) + parser.add_argument( + "--configFile", + dest="configFile", + type=str, + help="Configuration file for the workflow", + required=True, + ) + parser.add_argument( + "--inpDir", + dest="inpDir", + type=str, + help="Input image collection to be processed by this plugin", + required=True, + ) # Output arguments - parser.add_argument('--outDir', dest='outDir', type=str, - help='Output collection', required=True) - + parser.add_argument( + "--outDir", dest="outDir", type=str, help="Output collection", required=True, + ) + # Parse the arguments args = parser.parse_args() configFile = args.configFile - logger.info('configFile = {}'.format(configFile)) + logger.info(f"configFile = {configFile}") inpDir = args.inpDir - if (Path.is_dir(Path(args.inpDir).joinpath('images'))): + if Path.is_dir(Path(args.inpDir).joinpath("images")): # switch to images folder if present - fpath = str(Path(args.inpDir).joinpath('images').absolute()) - logger.info('inpDir = {}'.format(inpDir)) + fpath = str(Path(args.inpDir).joinpath("images").absolute()) + logger.info(f"inpDir = {inpDir}") outDir = args.outDir - logger.info('outDir = {}'.format(outDir)) - + logger.info(f"outDir = {outDir}") + # load config file - config_file_path = os.path.join(configFile[:-7],'metadata_files') - metafiles=os.listdir(config_file_path) - with open(os.path.join(config_file_path,metafiles[0])) as json_file: + config_file_path = os.path.join(configFile[:-7], "metadata_files") + metafiles = os.listdir(config_file_path) + with open(os.path.join(config_file_path, metafiles[0])) as json_file: config_data = json.load(json_file) # initialize javabridge - logger.info('Initializing the javabridge...') + logger.info("Initializing the javabridge...") log_config = Path(__file__).parent.joinpath("log4j.properties") - jutil.start_vm(args=["-Dlog4j.configuration=file:{}".format(str(log_config.absolute()))],class_path=bioformats.JARS) - + jutil.start_vm( + args=[f"-Dlog4j.configuration=file:{log_config.absolute()!s}"], + class_path=bioformats.JARS, + ) + try: # execute the desired workflow - if config_data['workflow_name'] == 'Playground4_Curvi': - logger.info('executing {}'.format(config_data['workflow_name'] )) + if config_data["workflow_name"] == "Playground4_Curvi": + logger.info("executing {}".format(config_data["workflow_name"])) Playground_CurvyLinear.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_dots': - logger.info('executing {}'.format(config_data['workflow_name'] )) + elif config_data["workflow_name"] == "Playground_dots": + logger.info("executing {}".format(config_data["workflow_name"])) Playground_dots.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_gja1': - logger.info('executing {}'.format(config_data['workflow_name'] )) + elif config_data["workflow_name"] == "Playground_gja1": + logger.info("executing {}".format(config_data["workflow_name"])) Playground_gja1.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_lamp1': - logger.info('executing {}'.format(config_data['workflow_name'] )) + elif config_data["workflow_name"] == "Playground_lamp1": + logger.info("executing {}".format(config_data["workflow_name"])) Playground_lamp1.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_npm1': - logger.info('executing {}'.format(config_data['workflow_name'] )) - Playground_npm1.segment_images(inpDir, outDir, config_data) + elif config_data["workflow_name"] == "Playground_npm1": + logger.info("executing {}".format(config_data["workflow_name"])) + Playground_npm1.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_spotty': - logger.info('executing {}'.format(config_data['workflow_name'] )) - Playground_spotty.segment_images(inpDir, outDir, config_data) + elif config_data["workflow_name"] == "Playground_spotty": + logger.info("executing {}".format(config_data["workflow_name"])) + Playground_spotty.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_filament3d': - logger.info('executing {}'.format(config_data['workflow_name'] )) - Playground_filament3d.segment_images(inpDir, outDir, config_data) + elif config_data["workflow_name"] == "Playground_filament3d": + logger.info("executing {}".format(config_data["workflow_name"])) + Playground_filament3d.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_st6gal1': - logger.info('executing {}'.format(config_data['workflow_name'] )) - Playground_st6gal1.segment_images(inpDir, outDir, config_data) + elif config_data["workflow_name"] == "Playground_st6gal1": + logger.info("executing {}".format(config_data["workflow_name"])) + Playground_st6gal1.segment_images(inpDir, outDir, config_data) - elif config_data['workflow_name'] == 'Playground_shell': - logger.info('executing {}'.format(config_data['workflow_name'] )) - Playground_shell.segment_images(inpDir, outDir, config_data) + elif config_data["workflow_name"] == "Playground_shell": + logger.info("executing {}".format(config_data["workflow_name"])) + Playground_shell.segment_images(inpDir, outDir, config_data) except Exception: traceback.print_exc() finally: # Close the javabridge regardless of successful completion - logger.info('Closing the javabridge') + logger.info("Closing the javabridge") jutil.kill_vm() - + # Exit the program - sys.exit() \ No newline at end of file + sys.exit() diff --git a/segmentation/polus-smp-training-plugin/VERSION b/segmentation/polus-smp-training-plugin/VERSION index cacdf3bd9..69626fb92 100644 --- a/segmentation/polus-smp-training-plugin/VERSION +++ b/segmentation/polus-smp-training-plugin/VERSION @@ -1 +1 @@ -0.5.11 \ No newline at end of file +0.5.11 diff --git a/segmentation/polus-smp-training-plugin/generate_plugin_manifest.py b/segmentation/polus-smp-training-plugin/generate_plugin_manifest.py index 6ad831176..5abef4406 100644 --- a/segmentation/polus-smp-training-plugin/generate_plugin_manifest.py +++ b/segmentation/polus-smp-training-plugin/generate_plugin_manifest.py @@ -3,207 +3,193 @@ from src import utils INPUTS = { - 'inferenceMode': { - 'description': '\'active\' or \'inactive\' for whether to run in inference mode.', - 'type': 'enum', - 'required': True, - 'options': {'values': ['active', 'inactive']}, + "inferenceMode": { + "description": "'active' or 'inactive' for whether to run in inference mode.", + "type": "enum", + "required": True, + "options": {"values": ["active", "inactive"]}, }, - 'imagesInferenceDir': { - 'description': 'Collection containing images on which to run inference.', - 'type': 'collection', + "imagesInferenceDir": { + "description": "Collection containing images on which to run inference.", + "type": "collection", }, - 'inferencePattern': { - 'description': 'Filename pattern for images on which to run inference.', - 'type': 'string', + "inferencePattern": { + "description": "Filename pattern for images on which to run inference.", + "type": "string", }, - - 'pretrainedModel': { - 'description': " ".join([ - 'Path to a model that was previously trained with this plugin.', - 'If starting fresh, you must instead provide:', - '\'modelName\',', - '\'encoderBase\',', - '\'encoderVariant\',', - '\'encoderWeights\',', - 'and \'optimizerName\'.', - 'See the README for available options.' - ]), - 'type': 'genericData', + "pretrainedModel": { + "description": "Path to a model that was previously trained with this plugin. If starting fresh, you must instead provide: 'modelName', 'encoderBase', 'encoderVariant', 'encoderWeights', and 'optimizerName'. See the README for available options.", + "type": "genericData", }, - 'modelName': { - 'description': 'Model architecture to use. Required if starting fresh.', - 'type': 'enum', - 'options': {'values': utils.MODEL_NAMES}, + "modelName": { + "description": "Model architecture to use. Required if starting fresh.", + "type": "enum", + "options": {"values": utils.MODEL_NAMES}, }, - 'encoderBase': { - 'description': 'The name of the base encoder to use.', - 'type': 'enum', - 'options': {'values': utils.BASE_ENCODERS}, + "encoderBase": { + "description": "The name of the base encoder to use.", + "type": "enum", + "options": {"values": utils.BASE_ENCODERS}, }, - 'encoderVariant': { - 'description': 'The name of the specific variant to use.', - 'type': 'enum', - 'options': {'values': utils.ENCODER_VARIANTS}, + "encoderVariant": { + "description": "The name of the specific variant to use.", + "type": "enum", + "options": {"values": utils.ENCODER_VARIANTS}, }, - 'encoderWeights': { - 'description': 'The name of the pretrained weights to use.', - 'type': 'enum', - 'options': {'values': list(sorted(utils.ENCODER_WEIGHTS))}, + "encoderWeights": { + "description": "The name of the pretrained weights to use.", + "type": "enum", + "options": {"values": sorted(utils.ENCODER_WEIGHTS)}, }, - 'optimizerName': { - 'description': ( - 'Name of optimization algorithm to use for training the model. ' - 'Required if starting fresh.' + "optimizerName": { + "description": ( + "Name of optimization algorithm to use for training the model. " + "Required if starting fresh." ), - 'type': 'enum', - 'options': {'values': utils.OPTIMIZER_NAMES}, + "type": "enum", + "options": {"values": utils.OPTIMIZER_NAMES}, }, - - 'batchSize': { - 'description': ( - 'Size of each batch for training. ' - 'If left unspecified, we use the maximum possible based on memory constraints.' + "batchSize": { + "description": ( + "Size of each batch for training. " + "If left unspecified, we use the maximum possible based on memory constraints." ), - 'type': 'integer', + "type": "integer", }, - - 'imagesTrainDir': { - 'description': 'Collection containing images to use for training.', - 'type': 'collection', + "imagesTrainDir": { + "description": "Collection containing images to use for training.", + "type": "collection", }, - 'labelsTrainDir': { - 'description': 'Collection containing labels, i.e. the ground-truth, for the training images.', - 'type': 'collection', + "labelsTrainDir": { + "description": "Collection containing labels, i.e. the ground-truth, for the training images.", + "type": "collection", }, - 'trainPattern': { - 'description': 'Filename pattern for training images and labels.', - 'type': 'string', + "trainPattern": { + "description": "Filename pattern for training images and labels.", + "type": "string", }, - - 'imagesValidDir': { - 'description': 'Collection containing images to use for validation.', - 'type': 'collection', + "imagesValidDir": { + "description": "Collection containing images to use for validation.", + "type": "collection", }, - 'labelsValidDir': { - 'description': 'Collection containing labels, i.e. the ground-truth, for the validation images.', - 'type': 'collection', + "labelsValidDir": { + "description": "Collection containing labels, i.e. the ground-truth, for the validation images.", + "type": "collection", }, - 'validPattern': { - 'description': 'Filename pattern for validation images and labels.', - 'type': 'string', + "validPattern": { + "description": "Filename pattern for validation images and labels.", + "type": "string", }, - - 'device': { - 'description': 'Which device to use for the model', - 'type': 'string', + "device": { + "description": "Which device to use for the model", + "type": "string", }, - 'checkpointFrequency': { - 'description': 'How often to save model checkpoints', - 'type': 'integer', + "checkpointFrequency": { + "description": "How often to save model checkpoints", + "type": "integer", }, - - 'lossName': { - 'description': 'Name of loss function to use.', - 'type': 'enum', - 'options': {'values': utils.LOSS_NAMES}, + "lossName": { + "description": "Name of loss function to use.", + "type": "enum", + "options": {"values": utils.LOSS_NAMES}, }, - 'maxEpochs': { - 'description': 'Maximum number of epochs for which to continue training the model.', - 'type': 'integer', + "maxEpochs": { + "description": "Maximum number of epochs for which to continue training the model.", + "type": "integer", }, - 'patience': { - 'description': 'Maximum number of epochs to wait for model to improve.', - 'type': 'integer', + "patience": { + "description": "Maximum number of epochs to wait for model to improve.", + "type": "integer", }, - 'minDelta': { - 'description': 'Minimum improvement in loss to reset patience.', - 'type': 'number', + "minDelta": { + "description": "Minimum improvement in loss to reset patience.", + "type": "number", }, } -OUTPUTS = [{ - 'name': 'outputDir', - 'type': 'genericData', - 'description': 'In training mode, this contains the trained model and checkpoints. ' - 'In inference mode, this contains the output labels.' -}] +OUTPUTS = [ + { + "name": "outputDir", + "type": "genericData", + "description": "In training mode, this contains the trained model and checkpoints. " + "In inference mode, this contains the output labels.", + }, +] DEFAULTS = { - 'inferenceMode': 'inactive', - 'inferencePattern': '.*', - 'modelName': 'Unet', - 'encoderBase': 'ResNet', - 'encoderVariant': 'resnet34', - 'encoderWeights': 'imagenet', - 'optimizerName': 'Adam', - 'trainPattern': '.*', - 'validPattern': '.*', - 'device': 'gpu', - 'lossName': 'JaccardLoss', - 'maxEpochs': 100, - 'patience': 10, - 'minDelta': 1e-4, + "inferenceMode": "inactive", + "inferencePattern": ".*", + "modelName": "Unet", + "encoderBase": "ResNet", + "encoderVariant": "resnet34", + "encoderWeights": "imagenet", + "optimizerName": "Adam", + "trainPattern": ".*", + "validPattern": ".*", + "device": "gpu", + "lossName": "JaccardLoss", + "maxEpochs": 100, + "patience": 10, + "minDelta": 1e-4, } INFERENCE_ARGS = { - 'imagesInferenceDir', - 'inferencePattern', + "imagesInferenceDir", + "inferencePattern", } COMMON_ARGS = { - 'inferenceMode', - 'device', - 'pretrainedModel', - 'modelName', - 'encoderBase', - 'encoderVariant', - 'encoderWeights', + "inferenceMode", + "device", + "pretrainedModel", + "modelName", + "encoderBase", + "encoderVariant", + "encoderWeights", } def bump_version(debug: bool) -> str: - with open('VERSION', 'r') as infile: + with open("VERSION") as infile: version = infile.read() if debug: - if 'debug' in version: - [version, debug] = version.split('debug') - version = f'{version}debug{str(1 + int(debug))}' + if "debug" in version: + [version, debug] = version.split("debug") + version = f"{version}debug{1 + int(debug)!s}" else: - version = f'{version}debug1' + version = f"{version}debug1" else: - numbering = version.split('.') - minor = int(numbering[-1].split('debug')[0]) + numbering = version.split(".") + minor = int(numbering[-1].split("debug")[0]) minor += 1 numbering[-1] = str(minor) - version = '.'.join(numbering) + version = ".".join(numbering) - with open('VERSION', 'w') as outfile: + with open("VERSION", "w") as outfile: outfile.write(version) return version def create_ui(): - ui = list() + ui = [] for key, values in INPUTS.items(): field = { - 'key': f'inputs.{key}', - 'title': key, - 'description': values['description'], + "key": f"inputs.{key}", + "title": key, + "description": values["description"], } if key in DEFAULTS: - field['default'] = DEFAULTS[key] + field["default"] = DEFAULTS[key] - if key not in COMMON_ARGS: - if key != 'inferenceMode': - if key in INFERENCE_ARGS: - field['condition'] = 'model.inputs.inferenceMode==active' - else: - field['condition'] = 'model.inputs.inferenceMode==inactive' + if key not in COMMON_ARGS and key != "inferenceMode": + if key in INFERENCE_ARGS: + field["condition"] = "model.inputs.inferenceMode==active" + else: + field["condition"] = "model.inputs.inferenceMode==inactive" ui.append(field) @@ -211,40 +197,52 @@ def create_ui(): def variants_conditionals(): - validator = list() + validator = [] for base, variant in utils.ENCODERS.items(): - validator.append({ - 'condition': [{ - 'input': 'encoderBase', - 'value': base, - 'eval': '==', - }], - 'then': [{ - 'action': 'show', - 'input': 'encoderVariant', - 'values': list(variant.keys()), - }] - }) + validator.append( + { + "condition": [ + { + "input": "encoderBase", + "value": base, + "eval": "==", + }, + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": list(variant.keys()), + }, + ], + }, + ) return validator def weights_conditionals(): - validator = list() + validator = [] for base, variants in utils.ENCODERS.items(): for variant, weights in variants.items(): - validator.append({ - 'condition': [{ - 'input': 'encoderVariant', - 'value': variant, - 'eval': '==', - }], - 'then': [{ - 'action': 'show', - 'input': 'encoderWeights', - 'values': [*weights, 'random'], - }], - }) + validator.append( + { + "condition": [ + { + "input": "encoderVariant", + "value": variant, + "eval": "==", + }, + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [*weights, "random"], + }, + ], + }, + ) return validator @@ -253,34 +251,33 @@ def generate_manifest(debug: bool): version = bump_version(debug) for key, value in INPUTS.items(): - if key != 'inferenceMode': - value['required'] = False - if 'options' not in value.keys(): + if key != "inferenceMode": + value["required"] = False + if "options" not in value.keys(): # noinspection PyTypeChecker - value['options'] = None + value["options"] = None manifest = { - 'name': 'Demo SMP Training/Inference', - 'version': f'{version}', - 'title': 'Segmentation Models Training and Inference', - 'description': 'Segmentation models training and inference plugin.', - 'author': 'Gauhar Bains (gauhar.bains@labshare.org), Najib Ishaq (najib.ishaq@axleinfo.com), Madhuri Vihani (madhuri.vihani@nih.gov), Benjamin Houghton (benjamin.houghton@axleinfo.com)', - 'institution': 'National Center for Advancing Translational Sciences, National Institutes of Health', - 'repository': 'https://github.com/PolusAI/polus-plugins/tree/dev/segmentation', - 'website': 'https://ncats.nih.gov/preclinical/core/informatics', - 'citation': '', - 'containerId': f'polusai/smp-training-plugin:{version}', - 'inputs': [{'name': key, **value} for key, value in INPUTS.items()], - 'outputs': OUTPUTS, - 'ui': create_ui(), - 'validators': variants_conditionals() + weights_conditionals(), + "name": "Demo SMP Training/Inference", + "version": f"{version}", + "title": "Segmentation Models Training and Inference", + "description": "Segmentation models training and inference plugin.", + "author": "Gauhar Bains (gauhar.bains@labshare.org), Najib Ishaq (najib.ishaq@axleinfo.com), Madhuri Vihani (madhuri.vihani@nih.gov), Benjamin Houghton (benjamin.houghton@axleinfo.com)", + "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", + "repository": "https://github.com/PolusAI/polus-plugins/tree/dev/segmentation", + "website": "https://ncats.nih.gov/preclinical/core/informatics", + "citation": "", + "containerId": f"polusai/smp-training-plugin:{version}", + "inputs": [{"name": key, **value} for key, value in INPUTS.items()], + "outputs": OUTPUTS, + "ui": create_ui(), + "validators": variants_conditionals() + weights_conditionals(), } - with open('plugin.json', 'w') as outfile: + with open("plugin.json", "w") as outfile: json.dump(manifest, outfile, indent=4) - return -if __name__ == '__main__': +if __name__ == "__main__": generate_manifest(debug=False) diff --git a/segmentation/polus-smp-training-plugin/misc/preprocess_tissuenet.py b/segmentation/polus-smp-training-plugin/misc/preprocess_tissuenet.py index e1f7b8abc..26b8720e6 100644 --- a/segmentation/polus-smp-training-plugin/misc/preprocess_tissuenet.py +++ b/segmentation/polus-smp-training-plugin/misc/preprocess_tissuenet.py @@ -7,11 +7,13 @@ from skimage import morphology from skimage import segmentation - NPZ_DIR = "/home/vihanimm/SegmentationModelToolkit/Data/" OUTPUT_DIR = "/home/vihanimm/SegmentationModelToolkit/Data/tif_data/" if not os.path.isdir(OUTPUT_DIR): - raise ValueError("Output Directory ({}) does not exist; please create manually".format(OUTPUT_DIR)) + msg = f"Output Directory ({OUTPUT_DIR}) does not exist; please create manually" + raise ValueError( + msg, + ) def convert_to_boundarymaps(groundtruth_boundary_array): @@ -20,27 +22,31 @@ def convert_to_boundarymaps(groundtruth_boundary_array): boundaries = segmentation.find_boundaries(anno) dilated_boundaries = morphology.binary_dilation(boundaries) - label_binary = np.zeros((anno.shape + (3,))) + label_binary = np.zeros((*anno.shape, 3)) label_binary[(anno == 0) & (boundaries == 0), 0] = 1 label_binary[(anno != 0) & (boundaries == 0), 1] = 1 label_binary[boundaries == 1, 2] = 1 - - dilated_label_binary = np.zeros((anno.shape + (3,))) + + dilated_label_binary = np.zeros((*anno.shape, 3)) dilated_label_binary[(anno == 0) & (dilated_boundaries == 0), 0] = 1 dilated_label_binary[(anno != 0) & (dilated_boundaries == 0), 1] = 1 dilated_label_binary[dilated_boundaries == 1, 2] = 1 # erode away the center - gt_cntrbnry_2pxlerro_arr = np.zeros(anno.shape) - gt_cntrbnry_2pxlerro_arr[label_binary[:, :, 1] == 1] = 1 # center values are set to 1 - # gt_cntrbnry_2pxlerro_arr = morphology.binary_erosion(gt_cntrbnry_2pxlerro_arr) + gt_cntrbnry_2pxlerro_arr = np.zeros(anno.shape) + gt_cntrbnry_2pxlerro_arr[ + label_binary[:, :, 1] == 1 + ] = 1 # center values are set to 1 print("NUMBER OF 1s in EROSION: ", np.sum(gt_cntrbnry_2pxlerro_arr)) - # three pixel values in this output groundtruth_multiclass_array = np.zeros(anno.shape) - groundtruth_multiclass_array[dilated_label_binary[:, :, 2] == 1] = 1 # border values are set to 1 - groundtruth_multiclass_array[dilated_label_binary[:, :, 1] == 1] = 2 # center values are set to 2 + groundtruth_multiclass_array[ + dilated_label_binary[:, :, 2] == 1 + ] = 1 # border values are set to 1 + groundtruth_multiclass_array[ + dilated_label_binary[:, :, 1] == 1 + ] = 2 # center values are set to 2 count_twos = np.count_nonzero((groundtruth_multiclass_array == 2).all) if count_twos == 0: groundtruth_multiclass_array[groundtruth_multiclass_array == 0] = 2 @@ -54,14 +60,19 @@ def convert_to_boundarymaps(groundtruth_boundary_array): gt_cntrbnry_3pxlerro_arr[groundtruth_multiclass_array == 2] = 1 print("NUMBER OF 1s in Border Dilation: ", np.sum(gt_cntrbnry_3pxlerro_arr)) - return groundtruth_multiclass_array, groundtruth_borderbinary_array, gt_cntrbnry_3pxlerro_arr, gt_cntrbnry_2pxlerro_arr + return ( + groundtruth_multiclass_array, + groundtruth_borderbinary_array, + gt_cntrbnry_3pxlerro_arr, + gt_cntrbnry_2pxlerro_arr, + ) def save_file(npz_location, classofdata, typeofdata): # mapping out all the outputs diction = np.load(os.path.join(NPZ_DIR, npz_location)) - X, y = diction['X'], diction['y'] - tissue_list, platform_list = diction['tissue_list'], diction['platform_list'] + X, y = diction["X"], diction["y"] + tissue_list, platform_list = diction["tissue_list"], diction["platform_list"] class_directory = os.path.join(OUTPUT_DIR, classofdata) if not os.path.isdir(class_directory): @@ -85,21 +96,28 @@ def save_file(npz_location, classofdata, typeofdata): groundtruth_directory = os.path.join(output_directory, "groundtruth") if not os.path.isdir(groundtruth_directory): os.mkdir(groundtruth_directory) - groundtruth_multiclass_directory = os.path.join(output_directory, "groundtruth_multiclass") + groundtruth_multiclass_directory = os.path.join( + output_directory, "groundtruth_multiclass", + ) if not os.path.isdir(groundtruth_multiclass_directory): os.mkdir(groundtruth_multiclass_directory) - groundtruth_borderbinary_directory = os.path.join(output_directory, "groundtruth_borderbinary") + groundtruth_borderbinary_directory = os.path.join( + output_directory, "groundtruth_borderbinary", + ) if not os.path.isdir(groundtruth_borderbinary_directory): os.mkdir(groundtruth_borderbinary_directory) - groundtruth_centerbinary3_directory = os.path.join(output_directory, "groundtruth_centerbinary_2pixelsmaller") + groundtruth_centerbinary3_directory = os.path.join( + output_directory, "groundtruth_centerbinary_2pixelsmaller", + ) if not os.path.isdir(groundtruth_centerbinary3_directory): os.mkdir(groundtruth_centerbinary3_directory) - groundtruth_centerbinary2_directory = os.path.join(output_directory, "groundtruth_centerbinary_1pixelsmaller") + groundtruth_centerbinary2_directory = os.path.join( + output_directory, "groundtruth_centerbinary_1pixelsmaller", + ) if not os.path.isdir(groundtruth_centerbinary2_directory): os.mkdir(groundtruth_centerbinary2_directory) for ex in range(num_examples): - # separate out the nuclear data from the cell's data if classofdata == "nuclear": image_array = X[ex, :, :, 0].squeeze() @@ -107,30 +125,44 @@ def save_file(npz_location, classofdata, typeofdata): else: image_array = X[ex, :, :, 1].squeeze() groundtruth_array = y[ex, :, :, 0].squeeze() - # Other ground truth + # Other ground truth groundtruth_multiclass_array = copy.deepcopy(groundtruth_array) - groundtruth_multiclass_array, groundtruth_borderbinary_array, \ - gt_cntrbnry_3pxlerro_arr, gt_cntrbnry_2pxlerro_arr = convert_to_boundarymaps(groundtruth_multiclass_array) + ( + groundtruth_multiclass_array, + groundtruth_borderbinary_array, + gt_cntrbnry_3pxlerro_arr, + gt_cntrbnry_2pxlerro_arr, + ) = convert_to_boundarymaps(groundtruth_multiclass_array) # all output (images and groundtruths will have the same name, just saved in different directories) - file_outputname = "{0}_{1}_{2}.tif".format(classofdata, typeofdata, ex) + file_outputname = f"{classofdata}_{typeofdata}_{ex}.tif" # the images range from 0 to 1, therefore multiply by 255 if need to view the image in matplotlib image_file = os.path.join(image_directory, file_outputname) groundtruth_file = os.path.join(groundtruth_directory, file_outputname) - groundtruth_multiclass_file = os.path.join(groundtruth_multiclass_directory, file_outputname) - groundtruth_borderbinary_file = os.path.join(groundtruth_borderbinary_directory, file_outputname) - groundtruth_centerbinary3_file = os.path.join(groundtruth_centerbinary3_directory, file_outputname) - groundtruth_centerbinary2_file = os.path.join(groundtruth_centerbinary2_directory, file_outputname) + groundtruth_multiclass_file = os.path.join( + groundtruth_multiclass_directory, file_outputname, + ) + groundtruth_borderbinary_file = os.path.join( + groundtruth_borderbinary_directory, file_outputname, + ) + groundtruth_centerbinary3_file = os.path.join( + groundtruth_centerbinary3_directory, file_outputname, + ) + groundtruth_centerbinary2_file = os.path.join( + groundtruth_centerbinary2_directory, file_outputname, + ) # Save the images Image.fromarray(image_array).save(image_file) Image.fromarray(groundtruth_array).save(groundtruth_file) Image.fromarray(groundtruth_multiclass_array).save(groundtruth_multiclass_file) - Image.fromarray(groundtruth_borderbinary_array).save(groundtruth_borderbinary_file) + Image.fromarray(groundtruth_borderbinary_array).save( + groundtruth_borderbinary_file, + ) Image.fromarray(gt_cntrbnry_3pxlerro_arr).save(groundtruth_centerbinary3_file) Image.fromarray(gt_cntrbnry_2pxlerro_arr).save(groundtruth_centerbinary2_file) - + if ex == 0: # checking the first image qualitatively fig, axes = plt.subplots(3, 2, figsize=(16, 24)) axes[0, 0].imshow(image_array) @@ -140,18 +172,34 @@ def save_file(npz_location, classofdata, typeofdata): axes[2, 0].imshow(groundtruth_borderbinary_array) axes[2, 1].imshow(gt_cntrbnry_3pxlerro_arr) axes[0, 0].set_title("Image") - axes[0, 1].set_title(f"Ground Truth: {np.min(groundtruth_array)}-{np.max(groundtruth_array)}") - axes[1, 1].set_title(f"Ground Truth Binary Centers 1 pixel errosion: {np.min(gt_cntrbnry_2pxlerro_arr)}-{np.max(gt_cntrbnry_2pxlerro_arr)}") - axes[1, 0].set_title(f"Ground Truth Multiclass: {np.min(groundtruth_multiclass_array)}-{np.max(groundtruth_multiclass_array)}") - axes[2, 0].set_title(f"Ground Truth Binary Borders: {np.min(groundtruth_borderbinary_array)}-{np.max(groundtruth_borderbinary_array)}") - axes[2, 1].set_title(f"Ground Truth Binary Centers 2 pixel errosion: {np.min(gt_cntrbnry_3pxlerro_arr)}-{np.max(gt_cntrbnry_3pxlerro_arr)}") - - fig.suptitle("Example Plot for {}'s Data ({})".format(classofdata, typeofdata)) - plot_name = "{}_{}.jpg".format(classofdata, typeofdata) + axes[0, 1].set_title( + f"Ground Truth: {np.min(groundtruth_array)}-{np.max(groundtruth_array)}", + ) + axes[1, 1].set_title( + f"Ground Truth Binary Centers 1 pixel errosion: {np.min(gt_cntrbnry_2pxlerro_arr)}-{np.max(gt_cntrbnry_2pxlerro_arr)}", + ) + axes[1, 0].set_title( + f"Ground Truth Multiclass: {np.min(groundtruth_multiclass_array)}-{np.max(groundtruth_multiclass_array)}", + ) + axes[2, 0].set_title( + f"Ground Truth Binary Borders: {np.min(groundtruth_borderbinary_array)}-{np.max(groundtruth_borderbinary_array)}", + ) + axes[2, 1].set_title( + f"Ground Truth Binary Centers 2 pixel errosion: {np.min(gt_cntrbnry_3pxlerro_arr)}-{np.max(gt_cntrbnry_3pxlerro_arr)}", + ) + + fig.suptitle( + f"Example Plot for {classofdata}'s Data ({typeofdata})", + ) + plot_name = f"{classofdata}_{typeofdata}.jpg" plt.savefig(os.path.join(OUTPUT_DIR, plot_name)) - print("Saved {}'s {} data ({}/{})".format(classofdata, typeofdata, ex, num_examples - 1)) - print("Saved all of {}'s {} data".format(classofdata, typeofdata)) + print( + "Saved {}'s {} data ({}/{})".format( + classofdata, typeofdata, ex, num_examples - 1, + ), + ) + print(f"Saved all of {classofdata}'s {typeofdata} data") print(" ") diff --git a/segmentation/polus-smp-training-plugin/misc/train_smp_DICEloss.py b/segmentation/polus-smp-training-plugin/misc/train_smp_DICEloss.py index 100fadf98..3b42ae734 100644 --- a/segmentation/polus-smp-training-plugin/misc/train_smp_DICEloss.py +++ b/segmentation/polus-smp-training-plugin/misc/train_smp_DICEloss.py @@ -1,105 +1,98 @@ # rudimentary libraries for basic commands -import json, os, sys import copy +import json +import os + +# update to this for multiprocessing +import albumentations as albu +import matplotlib.pyplot as plt +import numpy as np # most important library import segmentation_models_pytorch as smp - # pytorch functions import torch +from PIL import Image +from sklearn.metrics import fbeta_score +from sklearn.metrics import jaccard_score +from torch import nn from torch.utils.data import DataLoader from torch.utils.data import Dataset -import torch.nn as nn -import torch.nn.functional as F - from torchsummary import summary - -# update to this for multiprocessing -from torch.utils.data import IterableDataset -import albumentations as albu - -import numpy as np -from PIL import Image -import matplotlib.pyplot as plt - -from sklearn.metrics import fbeta_score, jaccard_score - from tqdm import tqdm -from tqdm import trange CUDA_LAUNCH_BLOCKING = 1 + # https://pytorch.org/tutorials/beginner/data_loading_tutorial.html # for pytorch, you need to create an abstract Dataset Class class DatasetforPytorch(Dataset): - - def __init__(self, - images_dir, - masks_dir, - preprocessing=None, - augmentations=None): - - self.images_fps = [os.path.join(images_dir, image) for image in os.listdir(images_dir)] - self.masks_fps = [os.path.join(masks_dir, mask) for mask in os.listdir(masks_dir)] - self.preprocessing = preprocessing # this is a function that is getting intialized - self.augmentations = augmentations # this is a function that is getting initialized + def __init__(self, images_dir, masks_dir, preprocessing=None, augmentations=None) -> None: + self.images_fps = [ + os.path.join(images_dir, image) for image in os.listdir(images_dir) + ] + self.masks_fps = [ + os.path.join(masks_dir, mask) for mask in os.listdir(masks_dir) + ] + self.preprocessing = ( + preprocessing # this is a function that is getting intialized + ) + self.augmentations = ( + augmentations # this is a function that is getting initialized + ) def __getitem__(self, i): - - image = np.array(Image.open(self.images_fps[i])) - mask = np.array(Image.open(self.masks_fps[i])) - # mask[mask > 0] = 1 + image = np.array(Image.open(self.images_fps[i])) + mask = np.array(Image.open(self.masks_fps[i])) if self.augmentations: sample = self.augmentations(image=image, mask=mask) - image, mask = sample['image'], sample['mask'] - - image = np.reshape(image, (1, image.shape[0], image.shape[1])).astype("float32") - assert np.isnan(image).any() == False - assert np.isinf(image).any() == False - - - mask = np.reshape(mask, (1, mask.shape[0], mask.shape[1])).astype("float32") - assert np.isnan(mask).any() == False - assert np.isinf(mask).any() == False + image, mask = sample["image"], sample["mask"] + image = np.reshape(image, (1, image.shape[0], image.shape[1])).astype("float32") + assert np.isnan(image).any() is False + assert np.isinf(image).any() is False + mask = np.reshape(mask, (1, mask.shape[0], mask.shape[1])).astype("float32") + assert np.isnan(mask).any() is False + assert np.isinf(mask).any() is False return image, mask - def __len__(self): - return(len(self.images_fps)) + def __len__(self) -> int: + return len(self.images_fps) + class DiceLoss(nn.Module): - def __init__(self, weight=None, size_average=True): - super(DiceLoss, self).__init__() + def __init__(self, weight=None, size_average=True) -> None: + super().__init__() def forward(self, inputs, targets, smooth=1): # Make sure inputs are probits - #flatten label and prediction tensors + # flatten label and prediction tensors inputs = inputs.view(-1) targets = targets.view(-1) - - intersection = (inputs * targets).sum() - dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth) - + + intersection = (inputs * targets).sum() + dice = (2.0 * intersection + smooth) / (inputs.sum() + targets.sum() + smooth) + return 1 - dice + def get_training_augmentation(): train_transform = [ - albu.HorizontalFlip(p=0.5), - - albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0), - - albu.PadIfNeeded(min_height=256, min_width=256, always_apply=True, border_mode=0), + albu.ShiftScaleRotate( + scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0, + ), + albu.PadIfNeeded( + min_height=256, min_width=256, always_apply=True, border_mode=0, + ), albu.RandomCrop(height=256, width=256, always_apply=True), - albu.GaussNoise(p=0.2), albu.Perspective(p=0.5), - albu.OneOf( [ albu.RandomBrightnessContrast(p=1), @@ -107,7 +100,6 @@ def get_training_augmentation(): ], p=0.9, ), - albu.OneOf( [ albu.Sharpen(p=1), @@ -116,27 +108,27 @@ def get_training_augmentation(): ], p=0.9, ), - ] return albu.Compose(train_transform) + def get_validation_augmentation(): - """Add paddings to make image shape divisible by 32""" - test_transform = [ - albu.PadIfNeeded(256, 256) - ] + """Add paddings to make image shape divisible by 32.""" + test_transform = [albu.PadIfNeeded(256, 256)] return albu.Compose(test_transform) -def plot_fromnohup(file_name): +def plot_fromnohup(file_name): train_log_losses = [] - train_log_iou = [] - train_log_f = [] + train_log_iou = [] + train_log_f = [] valid_log_losses = [] - valid_log_iou = [] - valid_log_f = [] - nohupfile = "/home/vihanimm/SegmentationModelToolkit/workdir/pytorch_binary/nohup.out" - with open(nohupfile, 'r') as nohup: + valid_log_iou = [] + valid_log_f = [] + nohupfile = ( + "/home/vihanimm/SegmentationModelToolkit/workdir/pytorch_binary/nohup.out" + ) + with open(nohupfile) as nohup: for line in nohup: line = line.rstrip() if line.startswith("TRAIN EPOCH"): @@ -144,8 +136,8 @@ def plot_fromnohup(file_name): line = line[1] loss, f, iou = line.split(",") loss = float((loss.lstrip().split(" "))[1]) - f = float((f.lstrip().split(" "))[2]) - iou = float((iou.lstrip().split(" "))[2]) + f = float((f.lstrip().split(" "))[2]) + iou = float((iou.lstrip().split(" "))[2]) train_log_losses.append(loss) train_log_iou.append(iou) train_log_f.append(f) @@ -154,8 +146,8 @@ def plot_fromnohup(file_name): line = line[1] loss, f, iou = line.split(",") loss = float((loss.lstrip().split(" "))[1]) - f = float((f.lstrip().split(" "))[2]) - iou = float((iou.lstrip().split(" "))[2]) + f = float((f.lstrip().split(" "))[2]) + iou = float((iou.lstrip().split(" "))[2]) valid_log_losses.append(loss) valid_log_iou.append(iou) valid_log_f.append(f) @@ -163,187 +155,194 @@ def plot_fromnohup(file_name): continue fig, axs = plt.subplots(2, 3, figsize=(24, 16), tight_layout=True) - - axs[0,0].plot(train_log_losses) - axs[0,0].set_title("Training Data - Dice Loss") - axs[0,0].set_ylabel("BCE Loss") - axs[0,1].plot(train_log_iou) - axs[0,1].set_title("Training Data - IOU Score") - axs[0,1].set_ylabel("IOU Score") - axs[0,2].plot(train_log_f) - axs[0,2].set_title("Training Data - F Score") - axs[0,2].set_ylabel("F Score") - - axs[1,0].plot(valid_log_losses) - axs[1,0].set_title("Validation Data - Dice Loss") - axs[1,0].set_ylabel("BCE Loss") - axs[1,1].plot(valid_log_iou) - axs[1,1].set_title("Validation Data - IOU Score") - axs[1,1].set_ylabel("IOU Score") - axs[1,2].plot(valid_log_f) - axs[1,2].set_title("Validation Data - F Score") - axs[1,2].set_ylabel("F Score") + + axs[0, 0].plot(train_log_losses) + axs[0, 0].set_title("Training Data - Dice Loss") + axs[0, 0].set_ylabel("BCE Loss") + axs[0, 1].plot(train_log_iou) + axs[0, 1].set_title("Training Data - IOU Score") + axs[0, 1].set_ylabel("IOU Score") + axs[0, 2].plot(train_log_f) + axs[0, 2].set_title("Training Data - F Score") + axs[0, 2].set_ylabel("F Score") + + axs[1, 0].plot(valid_log_losses) + axs[1, 0].set_title("Validation Data - Dice Loss") + axs[1, 0].set_ylabel("BCE Loss") + axs[1, 1].plot(valid_log_iou) + axs[1, 1].set_title("Validation Data - IOU Score") + axs[1, 1].set_ylabel("IOU Score") + axs[1, 2].plot(valid_log_f) + axs[1, 2].set_title("Validation Data - F Score") + axs[1, 2].set_ylabel("F Score") fig.suptitle("Loss, Fscore, IOU Scores") for x in axs.flat: - x.set(xlabel='EPOCHS') - + x.set(xlabel="EPOCHS") plt.savefig(file_name) + def plot_histories(train_logs, valid_logs, file_name): - fig, axs = plt.subplots(2, 3, figsize=(24, 16), tight_layout=True) - - axs[0,0].plot(train_logs["losses"]) - axs[0,0].set_title("Training Data - BCE Loss") - axs[0,0].set_ylabel("BCE Loss") - axs[0,1].plot(train_logs["iou_scores"]) - axs[0,1].set_title("Training Data - IOU Score") - axs[0,1].set_ylabel("IOU Score") - axs[0,2].plot(train_logs["f_scores"]) - axs[0,2].set_title("Training Data - F Score") - axs[0,2].set_ylabel("F Score") - - axs[1,0].plot(valid_logs["losses"]) - axs[1,0].set_title("Validation Data - BCE Loss") - axs[1,0].set_ylabel("BCE Loss") - axs[1,1].plot(valid_logs["iou_scores"]) - axs[1,1].set_title("Validation Data - IOU Score") - axs[1,1].set_ylabel("IOU Score") - axs[1,2].plot(valid_logs["f_scores"]) - axs[1,2].set_title("Validation Data - F Score") - axs[1,2].set_ylabel("F Score") + + axs[0, 0].plot(train_logs["losses"]) + axs[0, 0].set_title("Training Data - BCE Loss") + axs[0, 0].set_ylabel("BCE Loss") + axs[0, 1].plot(train_logs["iou_scores"]) + axs[0, 1].set_title("Training Data - IOU Score") + axs[0, 1].set_ylabel("IOU Score") + axs[0, 2].plot(train_logs["f_scores"]) + axs[0, 2].set_title("Training Data - F Score") + axs[0, 2].set_ylabel("F Score") + + axs[1, 0].plot(valid_logs["losses"]) + axs[1, 0].set_title("Validation Data - BCE Loss") + axs[1, 0].set_ylabel("BCE Loss") + axs[1, 1].plot(valid_logs["iou_scores"]) + axs[1, 1].set_title("Validation Data - IOU Score") + axs[1, 1].set_ylabel("IOU Score") + axs[1, 2].plot(valid_logs["f_scores"]) + axs[1, 2].set_title("Validation Data - F Score") + axs[1, 2].set_ylabel("F Score") fig.suptitle("Loss, Fscore, IOU Scores") for x in axs.flat: - x.set(xlabel='EPOCHS') - + x.set(xlabel="EPOCHS") plt.savefig(file_name) -def train(data_directory : str, - groundtruth_basedirname : str, - device : str, - model_file : str, - patience : int): - - +def train( + data_directory: str, + groundtruth_basedirname: str, + device: str, + model_file: str, + patience: int, +): # get the training images and masks train_directory = os.path.join(data_directory, "train") - x_train_dir = os.path.join(train_directory, "image") - y_train_dir = os.path.join(train_directory, groundtruth_basedirname) + x_train_dir = os.path.join(train_directory, "image") + y_train_dir = os.path.join(train_directory, groundtruth_basedirname) validation_directory = os.path.join(data_directory, "validation") - x_valid_dir = os.path.join(validation_directory, "image") - y_valid_dir = os.path.join(validation_directory, groundtruth_basedirname) + x_valid_dir = os.path.join(validation_directory, "image") + y_valid_dir = os.path.join(validation_directory, groundtruth_basedirname) # model building prerequistes - UNET_basicmodel = smp.Unet(in_channels=1, - encoder_weights = None) + UNET_basicmodel = smp.Unet(in_channels=1, encoder_weights=None) model = UNET_basicmodel.to(device) model.train() - print(summary(model, input_size=(1,512,512))) - - train_dataset = DatasetforPytorch(images_dir=x_train_dir, masks_dir=y_train_dir, - augmentations=get_training_augmentation()) - valid_dataset = DatasetforPytorch(images_dir=x_valid_dir, masks_dir=y_valid_dir, - augmentations=get_validation_augmentation()) - - train_loader = DataLoader(train_dataset, batch_size=8, shuffle=False, num_workers=12) + print(summary(model, input_size=(1, 512, 512))) + + train_dataset = DatasetforPytorch( + images_dir=x_train_dir, + masks_dir=y_train_dir, + augmentations=get_training_augmentation(), + ) + valid_dataset = DatasetforPytorch( + images_dir=x_valid_dir, + masks_dir=y_valid_dir, + augmentations=get_validation_augmentation(), + ) + + train_loader = DataLoader( + train_dataset, batch_size=8, shuffle=False, num_workers=12, + ) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=4) - loss = DiceLoss() + loss = DiceLoss() fscore_fxn = smp.utils.metrics.Fscore(threshold=0.5) - iou_fxn = smp.utils.metrics.IoU(threshold=0.5) - sig = nn.Sigmoid() + iou_fxn = smp.utils.metrics.IoU(threshold=0.5) + sig = nn.Sigmoid() - optimizer = torch.optim.Adam(UNET_basicmodel.parameters(), lr=0.0001) # only calculate for the parameters specified - # tqdm_train_loader = tqdm(train_loader) - # tqdm_valid_loader = tqdm(valid_loader) + optimizer = torch.optim.Adam( + UNET_basicmodel.parameters(), lr=0.0001, + ) # only calculate for the parameters specified train_logs_list = {"losses": [], "f_scores": [], "iou_scores": []} valid_logs_list = {"losses": [], "f_scores": [], "iou_scores": []} # relevant for the while loop max_score = 0 - epoch = 0 + epoch = 0 early_stopping_counter = 0 stop_the_training = False - while stop_the_training == False: - - epoch_loss = 0 - epoch_iou = 0 + while stop_the_training is False: + epoch_loss = 0 + epoch_iou = 0 epoch_fscore = 0 - for (data, target) in train_loader: + for data, target in train_loader: data, target = data.to(device), target.to(device) - optimizer.zero_grad() # clear all data from optimizer.step() + optimizer.zero_grad() # clear all data from optimizer.step() output = model(data) probability = sig(output) - assert torch.isnan(output).any() == False - assert torch.isinf(output).any() == False - losses = loss.forward(probability, target) # take inputs, and pass thru till we get to the - # numbers we want to optimize, which is the loss function (losses.item()) + assert torch.isnan(output).any() is False + assert torch.isinf(output).any() is False + losses = loss.forward( + probability, target, + ) # take inputs, and pass thru till we get to the + # numbers we want to optimize, which is the loss function (losses.item()) fscore = fscore_fxn.forward(probability, target) - iou = iou_fxn.forward(probability, target) - # tqdm_train_loader.set_description(f"LOSS: {losses.item()}, F SCORE {fscore.item()}, IOU SCORE {iou.item()}") - epoch_loss += losses.item()/len(train_loader) - epoch_fscore += fscore.item()/len(train_loader) - epoch_iou += iou.item()/len(train_loader) - losses.backward() # applying back propagation, cacluating the gradients/derivatives. - optimizer.step() # this updates weights. - + iou = iou_fxn.forward(probability, target) + epoch_loss += losses.item() / len(train_loader) + epoch_fscore += fscore.item() / len(train_loader) + epoch_iou += iou.item() / len(train_loader) + losses.backward() # applying back propagation, cacluating the gradients/derivatives. + optimizer.step() # this updates weights. train_logs_list["losses"].append(epoch_loss) train_logs_list["f_scores"].append(epoch_fscore) train_logs_list["iou_scores"].append(epoch_iou) - print(f"TRAIN EPOCH {epoch}: Loss {epoch_loss}, F Score {epoch_fscore}, Iou Score {epoch_iou}") + print( + f"TRAIN EPOCH {epoch}: Loss {epoch_loss}, F Score {epoch_fscore}, Iou Score {epoch_iou}", + ) - epoch_loss = 0 - epoch_iou = 0 + epoch_loss = 0 + epoch_iou = 0 epoch_fscore = 0 - for (data, target) in valid_loader: - data, target = data.to(device), target.to(device) + for data, target in valid_loader: + data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) probability = sig(output) - assert torch.isnan(output).any() == False - assert torch.isinf(output).any() == False + assert torch.isnan(output).any() is False + assert torch.isinf(output).any() is False losses = loss(probability, target) fscore = fscore_fxn.forward(probability, target) - iou = iou_fxn.forward(probability, target) - # tqdm_valid_loader.set_description(f"LOSS: {losses.item()}, F SCORE {fscore.item()}, IOU SCORE {iou.item()}") - epoch_loss += losses.item()/len(valid_loader) - epoch_fscore += fscore.item()/len(valid_loader) - epoch_iou += iou.item()/len(valid_loader) + iou = iou_fxn.forward(probability, target) + epoch_loss += losses.item() / len(valid_loader) + epoch_fscore += fscore.item() / len(valid_loader) + epoch_iou += iou.item() / len(valid_loader) losses.backward() optimizer.step() valid_logs_list["losses"].append(epoch_loss) valid_logs_list["f_scores"].append(epoch_fscore) valid_logs_list["iou_scores"].append(epoch_iou) - print(f"VALID EPOCH {epoch}: Loss {epoch_loss}, F Score {epoch_fscore}, Iou Score {epoch_iou}") + print( + f"VALID EPOCH {epoch}: Loss {epoch_loss}, F Score {epoch_fscore}, Iou Score {epoch_iou}", + ) if epoch == 25: - optimizer.param_groups[0]['lr'] = 1e-5 - print('Decrease decoder learning rate to 1e-5!') + optimizer.param_groups[0]["lr"] = 1e-5 + print("Decrease decoder learning rate to 1e-5!") if epoch_fscore > max_score: max_score = epoch_fscore torch.save(UNET_basicmodel, model_file) - print("MODEL SAVED with F Score of {}".format(epoch_fscore)) + print(f"MODEL SAVED with F Score of {epoch_fscore}") early_stopping_counter = 0 else: - early_stopping_counter += 1 # then add one to the counter + early_stopping_counter += 1 # then add one to the counter print(f"EARLY STOPPING COUNTER PLUS ONE: {early_stopping_counter}") if early_stopping_counter >= patience: stop_the_training = True - - if (epoch%100) == 0: + + if (epoch % 100) == 0: torch.save(UNET_basicmodel, model_file[:-4] + f"_{epoch}.pth") epoch += 1 @@ -352,87 +351,79 @@ def train(data_directory : str, return train_logs_list, valid_logs_list -def visualize_output(model_pathway, - data_directory, - device, - output_dir, - groundtruth_basedir): - +def visualize_output( + model_pathway, data_directory, device, output_dir, groundtruth_basedir, +): def add_to_axis(image, groundtruth, threshold, axis=None): - - new_img = copy.deepcopy(image) - new_img[new_img < threshold] = 0 + new_img[new_img < threshold] = 0 new_img[new_img >= threshold] = 1 - f1_score = fbeta_score(y_true=groundtruth, - y_pred=new_img, - average=None, - beta=1, zero_division='warn') + f1_score = fbeta_score( + y_true=groundtruth, + y_pred=new_img, + average=None, + beta=1, + zero_division="warn", + ) f1_score = np.around(np.average(f1_score), 4) - j_score = jaccard_score(y_true=groundtruth, - y_pred=new_img, - average=None, zero_division='warn') - j_score = np.around(np.average(j_score), 4) - # print(f1_score, j_score) + j_score = jaccard_score( + y_true=groundtruth, y_pred=new_img, average=None, zero_division="warn", + ) + j_score = np.around(np.average(j_score), 4) - if axis != None: + if axis is not None: axis.imshow(new_img) - axis.set_title(f"Threshold: {threshold} - F1: {f1_score}, JACCARD: {j_score}") + axis.set_title( + f"Threshold: {threshold} - F1: {f1_score}, JACCARD: {j_score}", + ) return f1_score, j_score - + sig = nn.Sigmoid() - + best_model = torch.load(model_pathway) model_name = os.path.basename(model_pathway) - model_info = model_name.split("_") - encoder = "resnet34" - encoder_weights = "imagenet" + model_name.split("_") test_directory = os.path.join(data_directory, "test") - x_test_dir = os.path.join(test_directory, "image") - y_test_dir = os.path.join(test_directory, groundtruth_basedir) - num_images = len(os.listdir(x_test_dir)) + x_test_dir = os.path.join(test_directory, "image") + y_test_dir = os.path.join(test_directory, groundtruth_basedir) + len(os.listdir(x_test_dir)) + - # preprocess_input = smp.encoders.get_preprocessing_fn(encoder, pretrained='imagenet') - # Dataset for visualizing test_dataset_vis = DatasetforPytorch(images_dir=x_test_dir, masks_dir=y_test_dir) # create test dataset - test_dataset = preprocessing=DatasetforPytorch(images_dir=x_test_dir, masks_dir=y_test_dir) + test_dataset = DatasetforPytorch( + images_dir=x_test_dir, masks_dir=y_test_dir, + ) nums = [958, 1148, 574, 698, 811, 925] max_f1 = {} - max_j = {} + max_j = {} for i in tqdm(range(len(nums))): - # n = np.random.choice(len(test_dataset_vis)) n = nums[i] - # n = i image_vis = test_dataset_vis[n][0] image, gt_mask = test_dataset[n] - + gt_mask = gt_mask.squeeze() - # print(np.unique(gt_mask)) - + x_tensor = torch.from_numpy(image).to(device).unsqueeze(0) pr_mask = best_model.predict(x_tensor) - pr_mask = sig(pr_mask) # need to make predicitions range from 0 to 1 + pr_mask = sig(pr_mask) # need to make predicitions range from 0 to 1 pr_mask = pr_mask.squeeze().cpu().numpy() - pr_mask_shape = pr_mask.shape - # print("IMAGE, GROUNDTRUTH, and PREDICTED unique values, respectively") - # print(np.unique(image)) - # print(np.unique(gt_mask)) - # print(np.unique(pr_mask)) - fig, ((ax_img, ax_groundtruth, ax_prediction), - (ax_pred1, ax_pred2, ax_pred3), + fig, ( + (ax_img, ax_groundtruth, ax_prediction), + (ax_pred1, ax_pred2, ax_pred3), (ax_pred4, ax_pred5, ax_pred6), - (ax_pred7, ax_pred8, ax_pred9))= plt.subplots(4, 3, figsize = (24, 24)) + (ax_pred7, ax_pred8, ax_pred9), + ) = plt.subplots(4, 3, figsize=(24, 24)) ax_img.imshow(image_vis.squeeze()) ax_img.set_title("Image") @@ -441,44 +432,71 @@ def add_to_axis(image, groundtruth, threshold, axis=None): ax_prediction.imshow(pr_mask) ax_prediction.set_title("Prediction Channel 0") - f1_score_1, j_score_1 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.1, axis=ax_pred1) - f1_score_2, j_score_2 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.2, axis=ax_pred2) - f1_score_3, j_score_3 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.3, axis=ax_pred3) - f1_score_4, j_score_4 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.4, axis=ax_pred4) - f1_score_5, j_score_5 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.5, axis=ax_pred5) - f1_score_6, j_score_6 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.6, axis=ax_pred6) - f1_score_7, j_score_7 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.7, axis=ax_pred7) - f1_score_8, j_score_8 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.8, axis=ax_pred8) - f1_score_9, j_score_9 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.9, axis=ax_pred9) - - # f1_score_1, j_score_1 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.1) # axis=ax_pred1) - # f1_score_2, j_score_2 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.2) # axis=ax_pred1) - # f1_score_3, j_score_3 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.3) # axis=ax_pred1) - # f1_score_4, j_score_4 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.4) # axis=ax_pred1) - # f1_score_5, j_score_5 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.5) # axis=ax_pred1) - # f1_score_6, j_score_6 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.6) # axis=ax_pred1) - # f1_score_7, j_score_7 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.7) # axis=ax_pred1) - # f1_score_8, j_score_8 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.8) # axis=ax_pred1) - # f1_score_9, j_score_9 = add_to_axis(image=pr_mask, groundtruth=gt_mask, threshold=0.9) # axis=ax_pred1) - - list_threshold_f1 = [f1_score_1, f1_score_2, f1_score_3, f1_score_4, \ - f1_score_5, f1_score_6, f1_score_7, f1_score_8, f1_score_9] - list_threshold_j = [j_score_1, j_score_2, j_score_3, j_score_4, \ - j_score_5, j_score_6, j_score_7, j_score_8, j_score_9] + f1_score_1, j_score_1 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.1, axis=ax_pred1, + ) + f1_score_2, j_score_2 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.2, axis=ax_pred2, + ) + f1_score_3, j_score_3 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.3, axis=ax_pred3, + ) + f1_score_4, j_score_4 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.4, axis=ax_pred4, + ) + f1_score_5, j_score_5 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.5, axis=ax_pred5, + ) + f1_score_6, j_score_6 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.6, axis=ax_pred6, + ) + f1_score_7, j_score_7 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.7, axis=ax_pred7, + ) + f1_score_8, j_score_8 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.8, axis=ax_pred8, + ) + f1_score_9, j_score_9 = add_to_axis( + image=pr_mask, groundtruth=gt_mask, threshold=0.9, axis=ax_pred9, + ) + + + list_threshold_f1 = [ + f1_score_1, + f1_score_2, + f1_score_3, + f1_score_4, + f1_score_5, + f1_score_6, + f1_score_7, + f1_score_8, + f1_score_9, + ] + list_threshold_j = [ + j_score_1, + j_score_2, + j_score_3, + j_score_4, + j_score_5, + j_score_6, + j_score_7, + j_score_8, + j_score_9, + ] threshold_max_f1 = max(list_threshold_f1) - threshold_max_j = max(list_threshold_j) + threshold_max_j = max(list_threshold_j) max_f1[i] = threshold_max_f1 - max_j[i] = threshold_max_j + max_j[i] = threshold_max_j fig.suptitle(f"Testing Image {n}") plot_name = os.path.join(output_dir, f"testingimage_{n}") plt.savefig(plot_name) average_f1 = np.average(list(max_f1.values())) - average_j = np.average(list(max_j.values())) + average_j = np.average(list(max_j.values())) max_f1 = sorted(max_f1.items(), key=lambda kv: kv[1]) - max_j = sorted(max_j.items(), key=lambda kv: kv[1]) + max_j = sorted(max_j.items(), key=lambda kv: kv[1]) print(average_f1, average_j) print("MAX F1 Sorted") @@ -488,52 +506,64 @@ def add_to_axis(image, groundtruth, threshold, axis=None): def main(): - # parameters object_identified = "cell" - device = "cuda:0" + device = "cuda:0" - data_basedirectory = "/home/vihanimm/SegmentationModelToolkit/Data/tif_data/" + data_basedirectory = "/home/vihanimm/SegmentationModelToolkit/Data/tif_data/" groundtruth_basedir = "groundtruth_centerbinary" - file_directory = os.path.dirname(os.path.abspath(__file__)) - output_model_directory = os.path.join(file_directory, "models") + file_directory = os.path.dirname(os.path.abspath(__file__)) + output_model_directory = os.path.join(file_directory, "models") if not os.path.isdir(output_model_directory): os.mkdir(output_model_directory) - assert os.path.isdir(output_model_directory), f"{output_model_directory} is not an existing directory for models" - assert os.path.isdir(data_basedirectory), f"{data_basedirectory} is not an existing directory for data" - assert os.path.isdir(file_directory), f"{file_directory} is not existing directory for {__file__}" - - output_model_file = os.path.join(output_model_directory, "UNET_BinaryDiceLoss.pth") - output_train_json = os.path.join(output_model_directory, "{}_train.json".format(output_model_file[:-4])) - output_valid_json = os.path.join(output_model_directory, "{}_valid.json".format(output_model_file[:-4])) - output_score_graph = os.path.join(output_model_directory, "{}.jpg".format(output_model_file[:-4])) + assert os.path.isdir( + output_model_directory, + ), f"{output_model_directory} is not an existing directory for models" + assert os.path.isdir( + data_basedirectory, + ), f"{data_basedirectory} is not an existing directory for data" + assert os.path.isdir( + file_directory, + ), f"{file_directory} is not existing directory for {__file__}" + + output_model_file = os.path.join(output_model_directory, "UNET_BinaryDiceLoss.pth") + output_train_json = os.path.join( + output_model_directory, f"{output_model_file[:-4]}_train.json", + ) + output_valid_json = os.path.join( + output_model_directory, f"{output_model_file[:-4]}_valid.json", + ) + output_score_graph = os.path.join( + output_model_directory, f"{output_model_file[:-4]}.jpg", + ) data_directory = os.path.join(data_basedirectory, object_identified) assert os.path.isdir(data_directory), "f{data_directory} does not exist" - train_history, valid_history = train(data_directory = data_directory, - groundtruth_basedirname = groundtruth_basedir, - device = device, - model_file = output_model_file, - patience = 10) + train_history, valid_history = train( + data_directory=data_directory, + groundtruth_basedirname=groundtruth_basedir, + device=device, + model_file=output_model_file, + patience=10, + ) - json.dump(train_history, open(output_train_json, 'w')) - json.dump(valid_history, open(output_valid_json, 'w')) + json.dump(train_history, open(output_train_json, "w")) + json.dump(valid_history, open(output_valid_json, "w")) - # plot_fromnohup(output_score_graph) - plot_histories(train_logs = train_history, - valid_logs = valid_history, - file_name = output_score_graph) + plot_histories( + train_logs=train_history, valid_logs=valid_history, file_name=output_score_graph, + ) - # output_model_file = "/home/vihanimm/SegmentationModelToolkit/workdir/pytorch_binary/models/UNET_BinaryDiceLoss_1800.pth" - visualize_output(model_pathway = output_model_file, - data_directory = data_directory, - device = device, - output_dir = file_directory, - groundtruth_basedir = groundtruth_basedir) - -main() + visualize_output( + model_pathway=output_model_file, + data_directory=data_directory, + device=device, + output_dir=file_directory, + groundtruth_basedir=groundtruth_basedir, + ) +main() diff --git a/segmentation/polus-smp-training-plugin/plugin.json b/segmentation/polus-smp-training-plugin/plugin.json index 29daf7b5b..5d88567bf 100644 --- a/segmentation/polus-smp-training-plugin/plugin.json +++ b/segmentation/polus-smp-training-plugin/plugin.json @@ -1,3034 +1,3034 @@ { - "name": "Demo SMP Training/Inference", - "version": "0.5.11", - "title": "Segmentation Models Training and Inference", - "description": "Segmentation models training and inference plugin.", - "author": "Gauhar Bains (gauhar.bains@labshare.org), Najib Ishaq (najib.ishaq@axleinfo.com), Madhuri Vihani (madhuri.vihani@nih.gov), Benjamin Houghton (benjamin.houghton@axleinfo.com)", - "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", - "repository": "https://github.com/PolusAI/polus-plugins/tree/dev/segmentation", - "website": "https://ncats.nih.gov/preclinical/core/informatics", - "citation": "", - "containerId": "polusai/smp-training-plugin:0.5.11", - "inputs": [ - { - "name": "inferenceMode", - "description": "'active' or 'inactive' for whether to run in inference mode.", - "type": "enum", - "required": true, - "options": { - "values": [ - "active", - "inactive" - ] - } - }, - { - "name": "imagesInferenceDir", - "description": "Collection containing images on which to run inference.", - "type": "collection", - "required": false, - "options": null - }, - { - "name": "inferencePattern", - "description": "Filename pattern for images on which to run inference.", - "type": "string", - "required": false, - "options": null - }, - { - "name": "pretrainedModel", - "description": "Path to a model that was previously trained with this plugin. If starting fresh, you must instead provide: 'modelName', 'encoderBase', 'encoderVariant', 'encoderWeights', and 'optimizerName'. See the README for available options.", - "type": "genericData", - "required": false, - "options": null - }, - { - "name": "modelName", - "description": "Model architecture to use. Required if starting fresh.", - "type": "enum", - "options": { - "values": [ - "Unet", - "UnetPlusPlus", - "MAnet", - "Linknet", - "FPN", - "PSPNet", - "PAN", - "DeepLabV3", - "DeepLabV3Plus" - ] - }, - "required": false - }, - { - "name": "encoderBase", - "description": "The name of the base encoder to use.", - "type": "enum", - "options": { - "values": [ - "ResNet", - "ResNeXt", - "ResNeSt", - "Res2Ne(X)t", - "RegNet(x/y)", - "GERNet", - "SE-Net", - "SK-ResNe(X)t", - "DenseNet", - "Inception", - "EfficientNet", - "MobileNet", - "DPN", - "VGG" - ] - }, - "required": false - }, - { - "name": "encoderVariant", - "description": "The name of the specific variant to use.", - "type": "enum", - "options": { - "values": [ - "resnet18", - "resnet34", - "resnet50", - "resnet101", - "resnet152", - "resnext50_32x4d", - "resnext101_32x4d", - "resnext101_32x8d", - "resnext101_32x16d", - "resnext101_32x32d", - "resnext101_32x48d", - "timm-resnest14d", - "timm-resnest26d", - "timm-resnest50d", - "timm-resnest101e", - "timm-resnest200e", - "timm-resnest269e", - "timm-resnest50d_4s2x40d", - "timm-resnest50d_1s4x24d", - "timm-res2net50_26w_4s", - "timm-res2net101_26w_4s", - "timm-res2net50_26w_6s", - "timm-res2net50_26w_8s", - "timm-res2net50_48w_2s", - "timm-res2net50_14w_8s", - "timm-res2next50", - "timm-regnetx_002", - "timm-regnetx_004", - "timm-regnetx_006", - "timm-regnetx_008", - "timm-regnetx_016", - "timm-regnetx_032", - "timm-regnetx_040", - "timm-regnetx_064", - "timm-regnetx_080", - "timm-regnetx_120", - "timm-regnetx_160", - "timm-regnetx_320", - "timm-regnety_002", - "timm-regnety_004", - "timm-regnety_006", - "timm-regnety_008", - "timm-regnety_016", - "timm-regnety_032", - "timm-regnety_040", - "timm-regnety_064", - "timm-regnety_080", - "timm-regnety_120", - "timm-regnety_160", - "timm-regnety_320", - "timm-gernet_s", - "timm-gernet_m", - "timm-gernet_l", - "senet154", - "se_resnet50", - "se_resnet101", - "se_resnet152", - "se_resnext50_32x4d", - "se_resnext101_32x4d", - "timm-skresnet18", - "timm-skresnet34", - "timm-skresnext50_32x4d", - "densenet121", - "densenet169", - "densenet201", - "densenet161", - "inceptionresnetv2", - "inceptionv4", - "xception", - "efficientnet-b0", - "efficientnet-b1", - "efficientnet-b2", - "efficientnet-b3", - "efficientnet-b4", - "efficientnet-b5", - "efficientnet-b6", - "efficientnet-b7", - "timm-efficientnet-b0", - "timm-efficientnet-b1", - "timm-efficientnet-b2", - "timm-efficientnet-b3", - "timm-efficientnet-b4", - "timm-efficientnet-b5", - "timm-efficientnet-b6", - "timm-efficientnet-b7", - "timm-efficientnet-b8", - "timm-efficientnet-l2", - "timm-efficientnet-lite0", - "timm-efficientnet-lite1", - "timm-efficientnet-lite2", - "timm-efficientnet-lite3", - "timm-efficientnet-lite4", - "mobilenet_v2", - "timm-mobilenetv3_large_075", - "timm-mobilenetv3_large_100", - "timm-mobilenetv3_large_minimal_100", - "timm-mobilenetv3_small_075", - "timm-mobilenetv3_small_100", - "timm-mobilenetv3_small_minimal_100", - "dpn68", - "dpn68b", - "dpn92", - "dpn98", - "dpn107", - "dpn131", - "vgg11", - "vgg11_bn", - "vgg13", - "vgg13_bn", - "vgg16", - "vgg16_bn", - "vgg19", - "vgg19_bn" - ] - }, - "required": false - }, - { - "name": "encoderWeights", - "description": "The name of the pretrained weights to use.", - "type": "enum", - "options": { - "values": [ - "advprop", - "imagenet", - "imagenet+5k", - "imagenet+background", - "instagram", - "noisy-student", - "random", - "ssl", - "swsl" - ] - }, - "required": false - }, - { - "name": "optimizerName", - "description": "Name of optimization algorithm to use for training the model. Required if starting fresh.", - "type": "enum", - "options": { - "values": [ - "Adadelta", - "Adagrad", - "Adam", - "AdamW", - "SparseAdam", - "Adamax", - "ASGD", - "LBFGS", - "RMSprop", - "Rprop", - "SGD" - ] - }, - "required": false - }, - { - "name": "batchSize", - "description": "Size of each batch for training. If left unspecified, we use the maximum possible based on memory constraints.", - "type": "integer", - "required": false, - "options": null - }, - { - "name": "imagesTrainDir", - "description": "Collection containing images to use for training.", - "type": "collection", - "required": false, - "options": null - }, - { - "name": "labelsTrainDir", - "description": "Collection containing labels, i.e. the ground-truth, for the training images.", - "type": "collection", - "required": false, - "options": null - }, - { - "name": "trainPattern", - "description": "Filename pattern for training images and labels.", - "type": "string", - "required": false, - "options": null - }, - { - "name": "imagesValidDir", - "description": "Collection containing images to use for validation.", - "type": "collection", - "required": false, - "options": null - }, - { - "name": "labelsValidDir", - "description": "Collection containing labels, i.e. the ground-truth, for the validation images.", - "type": "collection", - "required": false, - "options": null - }, - { - "name": "validPattern", - "description": "Filename pattern for validation images and labels.", - "type": "string", - "required": false, - "options": null - }, - { - "name": "device", - "description": "Which device to use for the model", - "type": "string", - "required": false, - "options": null - }, - { - "name": "checkpointFrequency", - "description": "How often to save model checkpoints", - "type": "integer", - "required": false, - "options": null - }, - { - "name": "lossName", - "description": "Name of loss function to use.", - "type": "enum", - "options": { - "values": [ - "JaccardLoss", - "DiceLoss", - "TverskyLoss", - "FocalLoss", - "LovaszLoss", - "SoftBCEWithLogitsLoss", - "SoftCrossEntropyLoss", - "MCCLoss" - ] - }, - "required": false - }, - { - "name": "maxEpochs", - "description": "Maximum number of epochs for which to continue training the model.", - "type": "integer", - "required": false, - "options": null - }, - { - "name": "patience", - "description": "Maximum number of epochs to wait for model to improve.", - "type": "integer", - "required": false, - "options": null - }, - { - "name": "minDelta", - "description": "Minimum improvement in loss to reset patience.", - "type": "number", - "required": false, - "options": null - } - ], - "outputs": [ - { - "name": "outputDir", - "type": "genericData", - "description": "In training mode, this contains the trained model and checkpoints. In inference mode, this contains the output labels." - } - ], - "ui": [ - { - "key": "inputs.inferenceMode", - "title": "inferenceMode", - "description": "'active' or 'inactive' for whether to run in inference mode.", - "default": "inactive" - }, - { - "key": "inputs.imagesInferenceDir", - "title": "imagesInferenceDir", - "description": "Collection containing images on which to run inference.", - "condition": "model.inputs.inferenceMode=='active'" - }, - { - "key": "inputs.inferencePattern", - "title": "inferencePattern", - "description": "Filename pattern for images on which to run inference.", - "default": ".*", - "condition": "model.inputs.inferenceMode=='active'" - }, - { - "key": "inputs.pretrainedModel", - "title": "pretrainedModel", - "description": "Path to a model that was previously trained with this plugin. If starting fresh, you must instead provide: 'modelName', 'encoderBase', 'encoderVariant', 'encoderWeights', and 'optimizerName'. See the README for available options." - }, - { - "key": "inputs.modelName", - "title": "modelName", - "description": "Model architecture to use. Required if starting fresh.", - "default": "Unet" - }, - { - "key": "inputs.encoderBase", - "title": "encoderBase", - "description": "The name of the base encoder to use.", - "default": "ResNet" - }, - { - "key": "inputs.encoderVariant", - "title": "encoderVariant", - "description": "The name of the specific variant to use.", - "default": "resnet34" - }, - { - "key": "inputs.encoderWeights", - "title": "encoderWeights", - "description": "The name of the pretrained weights to use.", - "default": "imagenet" - }, - { - "key": "inputs.optimizerName", - "title": "optimizerName", - "description": "Name of optimization algorithm to use for training the model. Required if starting fresh.", - "default": "Adam", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.batchSize", - "title": "batchSize", - "description": "Size of each batch for training. If left unspecified, we use the maximum possible based on memory constraints.", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.imagesTrainDir", - "title": "imagesTrainDir", - "description": "Collection containing images to use for training.", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.labelsTrainDir", - "title": "labelsTrainDir", - "description": "Collection containing labels, i.e. the ground-truth, for the training images.", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.trainPattern", - "title": "trainPattern", - "description": "Filename pattern for training images and labels.", - "default": ".*", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.imagesValidDir", - "title": "imagesValidDir", - "description": "Collection containing images to use for validation.", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.labelsValidDir", - "title": "labelsValidDir", - "description": "Collection containing labels, i.e. the ground-truth, for the validation images.", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.validPattern", - "title": "validPattern", - "description": "Filename pattern for validation images and labels.", - "default": ".*", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.device", - "title": "device", - "description": "Which device to use for the model", - "default": "gpu" - }, - { - "key": "inputs.checkpointFrequency", - "title": "checkpointFrequency", - "description": "How often to save model checkpoints", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.lossName", - "title": "lossName", - "description": "Name of loss function to use.", - "default": "JaccardLoss", - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.maxEpochs", - "title": "maxEpochs", - "description": "Maximum number of epochs for which to continue training the model.", - "default": 100, - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.patience", - "title": "patience", - "description": "Maximum number of epochs to wait for model to improve.", - "default": 10, - "condition": "model.inputs.inferenceMode=='inactive'" - }, - { - "key": "inputs.minDelta", - "title": "minDelta", - "description": "Minimum improvement in loss to reset patience.", - "default": 0.0001, - "condition": "model.inputs.inferenceMode=='inactive'" - } - ], - "validators": [ - { - "condition": [ - { - "input": "encoderBase", - "value": "ResNet", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "resnet18", - "resnet34", - "resnet50", - "resnet101", - "resnet152" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "ResNeXt", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "resnext50_32x4d", - "resnext101_32x4d", - "resnext101_32x8d", - "resnext101_32x16d", - "resnext101_32x32d", - "resnext101_32x48d" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "ResNeSt", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "timm-resnest14d", - "timm-resnest26d", - "timm-resnest50d", - "timm-resnest101e", - "timm-resnest200e", - "timm-resnest269e", - "timm-resnest50d_4s2x40d", - "timm-resnest50d_1s4x24d" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "Res2Ne(X)t", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "timm-res2net50_26w_4s", - "timm-res2net101_26w_4s", - "timm-res2net50_26w_6s", - "timm-res2net50_26w_8s", - "timm-res2net50_48w_2s", - "timm-res2net50_14w_8s", - "timm-res2next50" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "RegNet(x/y)", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "timm-regnetx_002", - "timm-regnetx_004", - "timm-regnetx_006", - "timm-regnetx_008", - "timm-regnetx_016", - "timm-regnetx_032", - "timm-regnetx_040", - "timm-regnetx_064", - "timm-regnetx_080", - "timm-regnetx_120", - "timm-regnetx_160", - "timm-regnetx_320", - "timm-regnety_002", - "timm-regnety_004", - "timm-regnety_006", - "timm-regnety_008", - "timm-regnety_016", - "timm-regnety_032", - "timm-regnety_040", - "timm-regnety_064", - "timm-regnety_080", - "timm-regnety_120", - "timm-regnety_160", - "timm-regnety_320" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "GERNet", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "timm-gernet_s", - "timm-gernet_m", - "timm-gernet_l" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "SE-Net", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "senet154", - "se_resnet50", - "se_resnet101", - "se_resnet152", - "se_resnext50_32x4d", - "se_resnext101_32x4d" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "SK-ResNe(X)t", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "timm-skresnet18", - "timm-skresnet34", - "timm-skresnext50_32x4d" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "DenseNet", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "densenet121", - "densenet169", - "densenet201", - "densenet161" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "Inception", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "inceptionresnetv2", - "inceptionv4", - "xception" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "EfficientNet", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "efficientnet-b0", - "efficientnet-b1", - "efficientnet-b2", - "efficientnet-b3", - "efficientnet-b4", - "efficientnet-b5", - "efficientnet-b6", - "efficientnet-b7", - "timm-efficientnet-b0", - "timm-efficientnet-b1", - "timm-efficientnet-b2", - "timm-efficientnet-b3", - "timm-efficientnet-b4", - "timm-efficientnet-b5", - "timm-efficientnet-b6", - "timm-efficientnet-b7", - "timm-efficientnet-b8", - "timm-efficientnet-l2", - "timm-efficientnet-lite0", - "timm-efficientnet-lite1", - "timm-efficientnet-lite2", - "timm-efficientnet-lite3", - "timm-efficientnet-lite4" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "MobileNet", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "mobilenet_v2", - "timm-mobilenetv3_large_075", - "timm-mobilenetv3_large_100", - "timm-mobilenetv3_large_minimal_100", - "timm-mobilenetv3_small_075", - "timm-mobilenetv3_small_100", - "timm-mobilenetv3_small_minimal_100" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "DPN", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "dpn68", - "dpn68b", - "dpn92", - "dpn98", - "dpn107", - "dpn131" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderBase", - "value": "VGG", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderVariant", - "values": [ - "vgg11", - "vgg11_bn", - "vgg13", - "vgg13_bn", - "vgg16", - "vgg16_bn", - "vgg19", - "vgg19_bn" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnet18", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "ssl", - "swsl", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnet34", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnet50", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "ssl", - "swsl", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnet101", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnet152", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnext50_32x4d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "ssl", - "swsl", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnext101_32x4d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "ssl", - "swsl", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnext101_32x8d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "instagram", - "ssl", - "swsl", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnext101_32x16d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "instagram", - "ssl", - "swsl", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnext101_32x32d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "instagram", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "resnext101_32x48d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "instagram", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest14d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest26d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest50d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest101e", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest200e", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest269e", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest50d_4s2x40d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-resnest50d_1s4x24d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-res2net50_26w_4s", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-res2net101_26w_4s", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-res2net50_26w_6s", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-res2net50_26w_8s", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-res2net50_48w_2s", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-res2net50_14w_8s", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-res2next50", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_002", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_004", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_006", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_008", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_016", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_032", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_040", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_064", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_080", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_120", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_160", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnetx_320", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_002", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_004", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_006", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_008", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_016", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_032", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_040", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_064", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_080", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_120", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_160", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-regnety_320", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-gernet_s", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-gernet_m", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-gernet_l", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "senet154", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "se_resnet50", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "se_resnet101", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "se_resnet152", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "se_resnext50_32x4d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "se_resnext101_32x4d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-skresnet18", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-skresnet34", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-skresnext50_32x4d", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "densenet121", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "densenet169", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "densenet201", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "densenet161", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "inceptionresnetv2", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "imagenet+background", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "inceptionv4", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "imagenet+background", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "xception", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b0", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b1", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b2", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b3", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b4", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b5", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b6", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "efficientnet-b7", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b0", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b1", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b2", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b3", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b4", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b5", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b6", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b7", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-b8", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "advprop", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-l2", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "noisy-student", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-lite0", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-lite1", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-lite2", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-lite3", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-efficientnet-lite4", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "mobilenet_v2", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-mobilenetv3_large_075", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-mobilenetv3_large_100", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-mobilenetv3_large_minimal_100", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-mobilenetv3_small_075", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-mobilenetv3_small_100", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "timm-mobilenetv3_small_minimal_100", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "dpn68", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "dpn68b", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet+5k", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "dpn92", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet+5k", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "dpn98", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "dpn107", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet+5k", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "dpn131", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg11", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg11_bn", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg13", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg13_bn", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg16", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg16_bn", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg19", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - }, - { - "condition": [ - { - "input": "encoderVariant", - "value": "vgg19_bn", - "eval": "==" - } - ], - "then": [ - { - "action": "show", - "input": "encoderWeights", - "values": [ - "imagenet", - "random" - ] - } - ] - } - ] -} \ No newline at end of file + "name": "Demo SMP Training/Inference", + "version": "0.5.11", + "title": "Segmentation Models Training and Inference", + "description": "Segmentation models training and inference plugin.", + "author": "Gauhar Bains (gauhar.bains@labshare.org), Najib Ishaq (najib.ishaq@axleinfo.com), Madhuri Vihani (madhuri.vihani@nih.gov), Benjamin Houghton (benjamin.houghton@axleinfo.com)", + "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", + "repository": "https://github.com/PolusAI/polus-plugins/tree/dev/segmentation", + "website": "https://ncats.nih.gov/preclinical/core/informatics", + "citation": "", + "containerId": "polusai/smp-training-plugin:0.5.11", + "inputs": [ + { + "name": "inferenceMode", + "description": "'active' or 'inactive' for whether to run in inference mode.", + "type": "enum", + "required": true, + "options": { + "values": [ + "active", + "inactive" + ] + } + }, + { + "name": "imagesInferenceDir", + "description": "Collection containing images on which to run inference.", + "type": "collection", + "required": false, + "options": null + }, + { + "name": "inferencePattern", + "description": "Filename pattern for images on which to run inference.", + "type": "string", + "required": false, + "options": null + }, + { + "name": "pretrainedModel", + "description": "Path to a model that was previously trained with this plugin. If starting fresh, you must instead provide: 'modelName', 'encoderBase', 'encoderVariant', 'encoderWeights', and 'optimizerName'. See the README for available options.", + "type": "genericData", + "required": false, + "options": null + }, + { + "name": "modelName", + "description": "Model architecture to use. Required if starting fresh.", + "type": "enum", + "options": { + "values": [ + "Unet", + "UnetPlusPlus", + "MAnet", + "Linknet", + "FPN", + "PSPNet", + "PAN", + "DeepLabV3", + "DeepLabV3Plus" + ] + }, + "required": false + }, + { + "name": "encoderBase", + "description": "The name of the base encoder to use.", + "type": "enum", + "options": { + "values": [ + "ResNet", + "ResNeXt", + "ResNeSt", + "Res2Ne(X)t", + "RegNet(x/y)", + "GERNet", + "SE-Net", + "SK-ResNe(X)t", + "DenseNet", + "Inception", + "EfficientNet", + "MobileNet", + "DPN", + "VGG" + ] + }, + "required": false + }, + { + "name": "encoderVariant", + "description": "The name of the specific variant to use.", + "type": "enum", + "options": { + "values": [ + "resnet18", + "resnet34", + "resnet50", + "resnet101", + "resnet152", + "resnext50_32x4d", + "resnext101_32x4d", + "resnext101_32x8d", + "resnext101_32x16d", + "resnext101_32x32d", + "resnext101_32x48d", + "timm-resnest14d", + "timm-resnest26d", + "timm-resnest50d", + "timm-resnest101e", + "timm-resnest200e", + "timm-resnest269e", + "timm-resnest50d_4s2x40d", + "timm-resnest50d_1s4x24d", + "timm-res2net50_26w_4s", + "timm-res2net101_26w_4s", + "timm-res2net50_26w_6s", + "timm-res2net50_26w_8s", + "timm-res2net50_48w_2s", + "timm-res2net50_14w_8s", + "timm-res2next50", + "timm-regnetx_002", + "timm-regnetx_004", + "timm-regnetx_006", + "timm-regnetx_008", + "timm-regnetx_016", + "timm-regnetx_032", + "timm-regnetx_040", + "timm-regnetx_064", + "timm-regnetx_080", + "timm-regnetx_120", + "timm-regnetx_160", + "timm-regnetx_320", + "timm-regnety_002", + "timm-regnety_004", + "timm-regnety_006", + "timm-regnety_008", + "timm-regnety_016", + "timm-regnety_032", + "timm-regnety_040", + "timm-regnety_064", + "timm-regnety_080", + "timm-regnety_120", + "timm-regnety_160", + "timm-regnety_320", + "timm-gernet_s", + "timm-gernet_m", + "timm-gernet_l", + "senet154", + "se_resnet50", + "se_resnet101", + "se_resnet152", + "se_resnext50_32x4d", + "se_resnext101_32x4d", + "timm-skresnet18", + "timm-skresnet34", + "timm-skresnext50_32x4d", + "densenet121", + "densenet169", + "densenet201", + "densenet161", + "inceptionresnetv2", + "inceptionv4", + "xception", + "efficientnet-b0", + "efficientnet-b1", + "efficientnet-b2", + "efficientnet-b3", + "efficientnet-b4", + "efficientnet-b5", + "efficientnet-b6", + "efficientnet-b7", + "timm-efficientnet-b0", + "timm-efficientnet-b1", + "timm-efficientnet-b2", + "timm-efficientnet-b3", + "timm-efficientnet-b4", + "timm-efficientnet-b5", + "timm-efficientnet-b6", + "timm-efficientnet-b7", + "timm-efficientnet-b8", + "timm-efficientnet-l2", + "timm-efficientnet-lite0", + "timm-efficientnet-lite1", + "timm-efficientnet-lite2", + "timm-efficientnet-lite3", + "timm-efficientnet-lite4", + "mobilenet_v2", + "timm-mobilenetv3_large_075", + "timm-mobilenetv3_large_100", + "timm-mobilenetv3_large_minimal_100", + "timm-mobilenetv3_small_075", + "timm-mobilenetv3_small_100", + "timm-mobilenetv3_small_minimal_100", + "dpn68", + "dpn68b", + "dpn92", + "dpn98", + "dpn107", + "dpn131", + "vgg11", + "vgg11_bn", + "vgg13", + "vgg13_bn", + "vgg16", + "vgg16_bn", + "vgg19", + "vgg19_bn" + ] + }, + "required": false + }, + { + "name": "encoderWeights", + "description": "The name of the pretrained weights to use.", + "type": "enum", + "options": { + "values": [ + "advprop", + "imagenet", + "imagenet+5k", + "imagenet+background", + "instagram", + "noisy-student", + "random", + "ssl", + "swsl" + ] + }, + "required": false + }, + { + "name": "optimizerName", + "description": "Name of optimization algorithm to use for training the model. Required if starting fresh.", + "type": "enum", + "options": { + "values": [ + "Adadelta", + "Adagrad", + "Adam", + "AdamW", + "SparseAdam", + "Adamax", + "ASGD", + "LBFGS", + "RMSprop", + "Rprop", + "SGD" + ] + }, + "required": false + }, + { + "name": "batchSize", + "description": "Size of each batch for training. If left unspecified, we use the maximum possible based on memory constraints.", + "type": "integer", + "required": false, + "options": null + }, + { + "name": "imagesTrainDir", + "description": "Collection containing images to use for training.", + "type": "collection", + "required": false, + "options": null + }, + { + "name": "labelsTrainDir", + "description": "Collection containing labels, i.e. the ground-truth, for the training images.", + "type": "collection", + "required": false, + "options": null + }, + { + "name": "trainPattern", + "description": "Filename pattern for training images and labels.", + "type": "string", + "required": false, + "options": null + }, + { + "name": "imagesValidDir", + "description": "Collection containing images to use for validation.", + "type": "collection", + "required": false, + "options": null + }, + { + "name": "labelsValidDir", + "description": "Collection containing labels, i.e. the ground-truth, for the validation images.", + "type": "collection", + "required": false, + "options": null + }, + { + "name": "validPattern", + "description": "Filename pattern for validation images and labels.", + "type": "string", + "required": false, + "options": null + }, + { + "name": "device", + "description": "Which device to use for the model", + "type": "string", + "required": false, + "options": null + }, + { + "name": "checkpointFrequency", + "description": "How often to save model checkpoints", + "type": "integer", + "required": false, + "options": null + }, + { + "name": "lossName", + "description": "Name of loss function to use.", + "type": "enum", + "options": { + "values": [ + "JaccardLoss", + "DiceLoss", + "TverskyLoss", + "FocalLoss", + "LovaszLoss", + "SoftBCEWithLogitsLoss", + "SoftCrossEntropyLoss", + "MCCLoss" + ] + }, + "required": false + }, + { + "name": "maxEpochs", + "description": "Maximum number of epochs for which to continue training the model.", + "type": "integer", + "required": false, + "options": null + }, + { + "name": "patience", + "description": "Maximum number of epochs to wait for model to improve.", + "type": "integer", + "required": false, + "options": null + }, + { + "name": "minDelta", + "description": "Minimum improvement in loss to reset patience.", + "type": "number", + "required": false, + "options": null + } + ], + "outputs": [ + { + "name": "outputDir", + "type": "genericData", + "description": "In training mode, this contains the trained model and checkpoints. In inference mode, this contains the output labels." + } + ], + "ui": [ + { + "key": "inputs.inferenceMode", + "title": "inferenceMode", + "description": "'active' or 'inactive' for whether to run in inference mode.", + "default": "inactive" + }, + { + "key": "inputs.imagesInferenceDir", + "title": "imagesInferenceDir", + "description": "Collection containing images on which to run inference.", + "condition": "model.inputs.inferenceMode=='active'" + }, + { + "key": "inputs.inferencePattern", + "title": "inferencePattern", + "description": "Filename pattern for images on which to run inference.", + "default": ".*", + "condition": "model.inputs.inferenceMode=='active'" + }, + { + "key": "inputs.pretrainedModel", + "title": "pretrainedModel", + "description": "Path to a model that was previously trained with this plugin. If starting fresh, you must instead provide: 'modelName', 'encoderBase', 'encoderVariant', 'encoderWeights', and 'optimizerName'. See the README for available options." + }, + { + "key": "inputs.modelName", + "title": "modelName", + "description": "Model architecture to use. Required if starting fresh.", + "default": "Unet" + }, + { + "key": "inputs.encoderBase", + "title": "encoderBase", + "description": "The name of the base encoder to use.", + "default": "ResNet" + }, + { + "key": "inputs.encoderVariant", + "title": "encoderVariant", + "description": "The name of the specific variant to use.", + "default": "resnet34" + }, + { + "key": "inputs.encoderWeights", + "title": "encoderWeights", + "description": "The name of the pretrained weights to use.", + "default": "imagenet" + }, + { + "key": "inputs.optimizerName", + "title": "optimizerName", + "description": "Name of optimization algorithm to use for training the model. Required if starting fresh.", + "default": "Adam", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.batchSize", + "title": "batchSize", + "description": "Size of each batch for training. If left unspecified, we use the maximum possible based on memory constraints.", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.imagesTrainDir", + "title": "imagesTrainDir", + "description": "Collection containing images to use for training.", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.labelsTrainDir", + "title": "labelsTrainDir", + "description": "Collection containing labels, i.e. the ground-truth, for the training images.", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.trainPattern", + "title": "trainPattern", + "description": "Filename pattern for training images and labels.", + "default": ".*", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.imagesValidDir", + "title": "imagesValidDir", + "description": "Collection containing images to use for validation.", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.labelsValidDir", + "title": "labelsValidDir", + "description": "Collection containing labels, i.e. the ground-truth, for the validation images.", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.validPattern", + "title": "validPattern", + "description": "Filename pattern for validation images and labels.", + "default": ".*", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.device", + "title": "device", + "description": "Which device to use for the model", + "default": "gpu" + }, + { + "key": "inputs.checkpointFrequency", + "title": "checkpointFrequency", + "description": "How often to save model checkpoints", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.lossName", + "title": "lossName", + "description": "Name of loss function to use.", + "default": "JaccardLoss", + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.maxEpochs", + "title": "maxEpochs", + "description": "Maximum number of epochs for which to continue training the model.", + "default": 100, + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.patience", + "title": "patience", + "description": "Maximum number of epochs to wait for model to improve.", + "default": 10, + "condition": "model.inputs.inferenceMode=='inactive'" + }, + { + "key": "inputs.minDelta", + "title": "minDelta", + "description": "Minimum improvement in loss to reset patience.", + "default": 0.0001, + "condition": "model.inputs.inferenceMode=='inactive'" + } + ], + "validators": [ + { + "condition": [ + { + "input": "encoderBase", + "value": "ResNet", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "resnet18", + "resnet34", + "resnet50", + "resnet101", + "resnet152" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "ResNeXt", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "resnext50_32x4d", + "resnext101_32x4d", + "resnext101_32x8d", + "resnext101_32x16d", + "resnext101_32x32d", + "resnext101_32x48d" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "ResNeSt", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "timm-resnest14d", + "timm-resnest26d", + "timm-resnest50d", + "timm-resnest101e", + "timm-resnest200e", + "timm-resnest269e", + "timm-resnest50d_4s2x40d", + "timm-resnest50d_1s4x24d" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "Res2Ne(X)t", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "timm-res2net50_26w_4s", + "timm-res2net101_26w_4s", + "timm-res2net50_26w_6s", + "timm-res2net50_26w_8s", + "timm-res2net50_48w_2s", + "timm-res2net50_14w_8s", + "timm-res2next50" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "RegNet(x/y)", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "timm-regnetx_002", + "timm-regnetx_004", + "timm-regnetx_006", + "timm-regnetx_008", + "timm-regnetx_016", + "timm-regnetx_032", + "timm-regnetx_040", + "timm-regnetx_064", + "timm-regnetx_080", + "timm-regnetx_120", + "timm-regnetx_160", + "timm-regnetx_320", + "timm-regnety_002", + "timm-regnety_004", + "timm-regnety_006", + "timm-regnety_008", + "timm-regnety_016", + "timm-regnety_032", + "timm-regnety_040", + "timm-regnety_064", + "timm-regnety_080", + "timm-regnety_120", + "timm-regnety_160", + "timm-regnety_320" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "GERNet", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "timm-gernet_s", + "timm-gernet_m", + "timm-gernet_l" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "SE-Net", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "senet154", + "se_resnet50", + "se_resnet101", + "se_resnet152", + "se_resnext50_32x4d", + "se_resnext101_32x4d" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "SK-ResNe(X)t", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "timm-skresnet18", + "timm-skresnet34", + "timm-skresnext50_32x4d" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "DenseNet", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "densenet121", + "densenet169", + "densenet201", + "densenet161" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "Inception", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "inceptionresnetv2", + "inceptionv4", + "xception" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "EfficientNet", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "efficientnet-b0", + "efficientnet-b1", + "efficientnet-b2", + "efficientnet-b3", + "efficientnet-b4", + "efficientnet-b5", + "efficientnet-b6", + "efficientnet-b7", + "timm-efficientnet-b0", + "timm-efficientnet-b1", + "timm-efficientnet-b2", + "timm-efficientnet-b3", + "timm-efficientnet-b4", + "timm-efficientnet-b5", + "timm-efficientnet-b6", + "timm-efficientnet-b7", + "timm-efficientnet-b8", + "timm-efficientnet-l2", + "timm-efficientnet-lite0", + "timm-efficientnet-lite1", + "timm-efficientnet-lite2", + "timm-efficientnet-lite3", + "timm-efficientnet-lite4" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "MobileNet", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "mobilenet_v2", + "timm-mobilenetv3_large_075", + "timm-mobilenetv3_large_100", + "timm-mobilenetv3_large_minimal_100", + "timm-mobilenetv3_small_075", + "timm-mobilenetv3_small_100", + "timm-mobilenetv3_small_minimal_100" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "DPN", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "dpn68", + "dpn68b", + "dpn92", + "dpn98", + "dpn107", + "dpn131" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderBase", + "value": "VGG", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderVariant", + "values": [ + "vgg11", + "vgg11_bn", + "vgg13", + "vgg13_bn", + "vgg16", + "vgg16_bn", + "vgg19", + "vgg19_bn" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnet18", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "ssl", + "swsl", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnet34", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnet50", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "ssl", + "swsl", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnet101", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnet152", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnext50_32x4d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "ssl", + "swsl", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnext101_32x4d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "ssl", + "swsl", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnext101_32x8d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "instagram", + "ssl", + "swsl", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnext101_32x16d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "instagram", + "ssl", + "swsl", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnext101_32x32d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "instagram", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "resnext101_32x48d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "instagram", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest14d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest26d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest50d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest101e", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest200e", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest269e", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest50d_4s2x40d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-resnest50d_1s4x24d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-res2net50_26w_4s", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-res2net101_26w_4s", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-res2net50_26w_6s", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-res2net50_26w_8s", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-res2net50_48w_2s", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-res2net50_14w_8s", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-res2next50", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_002", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_004", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_006", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_008", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_016", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_032", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_040", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_064", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_080", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_120", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_160", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnetx_320", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_002", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_004", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_006", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_008", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_016", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_032", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_040", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_064", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_080", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_120", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_160", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-regnety_320", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-gernet_s", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-gernet_m", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-gernet_l", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "senet154", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "se_resnet50", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "se_resnet101", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "se_resnet152", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "se_resnext50_32x4d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "se_resnext101_32x4d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-skresnet18", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-skresnet34", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-skresnext50_32x4d", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "densenet121", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "densenet169", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "densenet201", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "densenet161", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "inceptionresnetv2", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "imagenet+background", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "inceptionv4", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "imagenet+background", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "xception", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b0", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b1", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b2", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b3", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b4", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b5", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b6", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "efficientnet-b7", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b0", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b1", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b2", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b3", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b4", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b5", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b6", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b7", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-b8", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "advprop", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-l2", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "noisy-student", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-lite0", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-lite1", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-lite2", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-lite3", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-efficientnet-lite4", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "mobilenet_v2", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-mobilenetv3_large_075", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-mobilenetv3_large_100", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-mobilenetv3_large_minimal_100", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-mobilenetv3_small_075", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-mobilenetv3_small_100", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "timm-mobilenetv3_small_minimal_100", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "dpn68", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "dpn68b", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet+5k", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "dpn92", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet+5k", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "dpn98", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "dpn107", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet+5k", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "dpn131", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg11", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg11_bn", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg13", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg13_bn", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg16", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg16_bn", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg19", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + }, + { + "condition": [ + { + "input": "encoderVariant", + "value": "vgg19_bn", + "eval": "==" + } + ], + "then": [ + { + "action": "show", + "input": "encoderWeights", + "values": [ + "imagenet", + "random" + ] + } + ] + } + ] +} diff --git a/segmentation/polus-smp-training-plugin/requirements-docker.txt b/segmentation/polus-smp-training-plugin/requirements-docker.txt index 21304c015..1a8489277 100644 --- a/segmentation/polus-smp-training-plugin/requirements-docker.txt +++ b/segmentation/polus-smp-training-plugin/requirements-docker.txt @@ -2,4 +2,4 @@ filepattern==1.4.7 bfio[zarr]==2.1.9 albumentations==1.3.0 tensorboard==2.10.1 -segmentation-models-pytorch==0.3.0 \ No newline at end of file +segmentation-models-pytorch==0.3.0 diff --git a/segmentation/polus-smp-training-plugin/src/.gitignore b/segmentation/polus-smp-training-plugin/src/.gitignore index cb1d07bf8..344f079e4 100644 --- a/segmentation/polus-smp-training-plugin/src/.gitignore +++ b/segmentation/polus-smp-training-plugin/src/.gitignore @@ -1 +1 @@ -runs \ No newline at end of file +runs diff --git a/segmentation/polus-smp-training-plugin/src/inference.py b/segmentation/polus-smp-training-plugin/src/inference.py index 35ecaaaf5..2d39d0afc 100644 --- a/segmentation/polus-smp-training-plugin/src/inference.py +++ b/segmentation/polus-smp-training-plugin/src/inference.py @@ -1,14 +1,15 @@ import pathlib -import typing +from concurrent.futures import ThreadPoolExecutor import bfio import numpy import torch from segmentation_models_pytorch.base import SegmentationModel from tqdm import tqdm -from concurrent.futures import ThreadPoolExecutor -from utils import Dataset, Tile, UnTile +from utils import Dataset +from utils import Tile +from utils import UnTile # TILE_SIZE must be a multiple of 1024 TILE_SIZE = 2048 @@ -17,35 +18,27 @@ def thread_loader(image_path, device): - with bfio.BioReader(image_path) as reader: - image = reader[:] - image = Dataset.preprocessing(image.astype(numpy.float32)).to(device) - return image + return Dataset.preprocessing(image.astype(numpy.float32)).to(device) def thread_save(image_path, output_dir: pathlib.Path, prediction, i): - - with bfio.BioReader(image_path) as reader: - - with bfio.BioWriter( - output_dir.joinpath(image_path.name), metadata=reader.metadata - ) as writer: - - writer.dtype = numpy.float32 - writer[:] = prediction[i, 0, :-1, :-1].cpu().numpy() + with bfio.BioReader(image_path) as reader, bfio.BioWriter( + output_dir.joinpath(image_path.name), metadata=reader.metadata, + ) as writer: + writer.dtype = numpy.float32 + writer[:] = prediction[i, 0, :-1, :-1].cpu().numpy() def run_inference( *, model: SegmentationModel, device: torch.device, - image_paths: typing.List[pathlib.Path], + image_paths: list[pathlib.Path], output_dir: pathlib.Path, ): - if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) tile = Tile(tile_size=(MODEL_TILE_SIZE, MODEL_TILE_SIZE)) @@ -71,7 +64,7 @@ def run_inference( paths = batches[0] for batch in tqdm( - batches[1:], desc=f"running inference on {len(image_paths)} images" + batches[1:], desc=f"running inference on {len(image_paths)} images", ): # Load the data patch = torch.stack([t.result() for t in load_threads], axis=0) @@ -82,7 +75,6 @@ def run_inference( load_threads.append(executor.submit(thread_loader, image_path, device)) with torch.no_grad(): - patch, shape = tile(patch) prediction = model.forward(patch.to(device)) prediction = untile(prediction, shape) @@ -93,7 +85,7 @@ def run_inference( for i in range(len(paths)): save_threads.append( - executor.submit(thread_save, paths[i], output_dir, prediction, i) + executor.submit(thread_save, paths[i], output_dir, prediction, i), ) paths = batch @@ -113,10 +105,9 @@ def run_inference( for i in range(len(paths)): save_threads.append( - executor.submit(thread_save, paths[i], output_dir, prediction, i) + executor.submit(thread_save, paths[i], output_dir, prediction, i), ) for t in save_threads: t.result() - return diff --git a/segmentation/polus-smp-training-plugin/src/main.py b/segmentation/polus-smp-training-plugin/src/main.py index 1064375cb..f3166e576 100644 --- a/segmentation/polus-smp-training-plugin/src/main.py +++ b/segmentation/polus-smp-training-plugin/src/main.py @@ -5,120 +5,257 @@ import os from pathlib import Path from typing import Any -from typing import Dict from typing import Optional +import inference import numpy import torch +import training from filepattern import FilePattern -import inference -import training import utils logging.basicConfig( - format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S', + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", ) logger = logging.getLogger("main") logger.setLevel(utils.POLUS_LOG) if __name__ == "__main__": - #TODO: This should be an input parameter per Najib - os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128' + # TODO: This should be an input parameter per Najib + os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" - parser = argparse.ArgumentParser(prog='main', description='Segmentation models training plugin') + parser = argparse.ArgumentParser( + prog="main", description="Segmentation models training plugin", + ) # Input arguments - parser.add_argument('--inferenceMode', dest='inferenceMode', type=str, required=True, - help='\'active\' or \'inactive\' for whether to run in inference mode.') - - parser.add_argument('--imagesInferenceDir', dest='imagesInferenceDir', type=str, required=False, - help='Collection containing images on which to run inference.') - parser.add_argument('--inferencePattern', dest='inferencePattern', type=str, required=False, default='.*', - help='Filename pattern for images on which to run inference.') - - parser.add_argument('--pretrainedModel', dest='pretrainedModel', type=str, required=False, - help='Path to a model that was previously trained with this plugin. ' - 'If starting fresh, you must provide: ' - '\'modelName\', ' - '\'encoderBaseVariantWeights\', and' - '\'optimizerName\'.' - 'See the README for available options.') - - parser.add_argument('--modelName', dest='modelName', type=str, required=False, default='Unet', - help='Which model architecture to use.') - parser.add_argument('--encoderBase', dest='encoderBase', type=str, required=False, default='ResNet', - help='Base encoder to use.') - parser.add_argument('--encoderVariant', dest='encoderVariant', type=str, required=False, default='resnet34', - help='Encoder variant to use.') - parser.add_argument('--encoderWeights', dest='encoderWeights', type=str, required=False, - help='Name of dataset with which the model was pretrained.') - - parser.add_argument('--optimizerName', dest='optimizerName', type=str, required=False, default='Adam', - help='Name of optimization algorithm to use for training the model.') - - parser.add_argument('--batchSize', dest='batchSize', type=int, required=False, - help='Size of each batch for training. If left unspecified, we will automatically use ' - 'the largest possible size based on the model architecture and GPU memory.') - - parser.add_argument('--imagesTrainDir', dest='imagesTrainDir', type=str, required=False, - help='Collection containing images on which to train the model.') - parser.add_argument('--labelsTrainDir', dest='labelsTrainDir', type=str, required=False, - help='Collection containing labels, i.e. the ground-truth, for the training images.') - parser.add_argument('--trainPattern', dest='trainPattern', type=str, required=False, default='.*', - help='Filename pattern for training images.') - - parser.add_argument('--imagesValidDir', dest='imagesValidDir', type=str, required=False, - help='Collection containing images on which to validate the model.') - parser.add_argument('--labelsValidDir', dest='labelsValidDir', type=str, required=False, - help='Collection containing labels, i.e. the ground-truth, for the validation images.') - parser.add_argument('--validPattern', dest='validPattern', type=str, required=False, default='.*', - help='Filename pattern for validation images.') - - parser.add_argument('--device', dest='device', type=str, required=False, default='gpu', - help='Device to run process on') - parser.add_argument('--checkpointFrequency', dest='checkFreq', type=int, required=False, - help="How often to update the checkpoints") - - parser.add_argument('--lossName', dest='lossName', type=str, required=False, default='JaccardLoss', - help='Name of loss function to use.') - parser.add_argument('--maxEpochs', dest='maxEpochs', type=int, required=False, default=100, - help='Maximum number of epochs for which to continue training the model.') - parser.add_argument('--patience', dest='patience', type=int, required=False, default=10, - help='Maximum number of epochs to wait for model to improve.') - parser.add_argument('--minDelta', dest='minDelta', type=float, required=False, default=1e-4, - help='Minimum improvement in loss to reset patience.') - - parser.add_argument('--tensorboardProfiler', dest='tensorboardProfiler', type=bool, required=False, default=False, - help="Generate a profiler using Tensorboard?") # TODO: Check in WIPP - - parser.add_argument('--outputDir', dest='outputDir', type=str, required=True, - help='Location where the model and the final checkpoint will be saved.') + parser.add_argument( + "--inferenceMode", + dest="inferenceMode", + type=str, + required=True, + help="'active' or 'inactive' for whether to run in inference mode.", + ) + + parser.add_argument( + "--imagesInferenceDir", + dest="imagesInferenceDir", + type=str, + required=False, + help="Collection containing images on which to run inference.", + ) + parser.add_argument( + "--inferencePattern", + dest="inferencePattern", + type=str, + required=False, + default=".*", + help="Filename pattern for images on which to run inference.", + ) + + parser.add_argument( + "--pretrainedModel", + dest="pretrainedModel", + type=str, + required=False, + help="Path to a model that was previously trained with this plugin. " + "If starting fresh, you must provide: " + "'modelName', " + "'encoderBaseVariantWeights', and" + "'optimizerName'." + "See the README for available options.", + ) + + parser.add_argument( + "--modelName", + dest="modelName", + type=str, + required=False, + default="Unet", + help="Which model architecture to use.", + ) + parser.add_argument( + "--encoderBase", + dest="encoderBase", + type=str, + required=False, + default="ResNet", + help="Base encoder to use.", + ) + parser.add_argument( + "--encoderVariant", + dest="encoderVariant", + type=str, + required=False, + default="resnet34", + help="Encoder variant to use.", + ) + parser.add_argument( + "--encoderWeights", + dest="encoderWeights", + type=str, + required=False, + help="Name of dataset with which the model was pretrained.", + ) + + parser.add_argument( + "--optimizerName", + dest="optimizerName", + type=str, + required=False, + default="Adam", + help="Name of optimization algorithm to use for training the model.", + ) + + parser.add_argument( + "--batchSize", + dest="batchSize", + type=int, + required=False, + help="Size of each batch for training. If left unspecified, we will automatically use " + "the largest possible size based on the model architecture and GPU memory.", + ) + + parser.add_argument( + "--imagesTrainDir", + dest="imagesTrainDir", + type=str, + required=False, + help="Collection containing images on which to train the model.", + ) + parser.add_argument( + "--labelsTrainDir", + dest="labelsTrainDir", + type=str, + required=False, + help="Collection containing labels, i.e. the ground-truth, for the training images.", + ) + parser.add_argument( + "--trainPattern", + dest="trainPattern", + type=str, + required=False, + default=".*", + help="Filename pattern for training images.", + ) + + parser.add_argument( + "--imagesValidDir", + dest="imagesValidDir", + type=str, + required=False, + help="Collection containing images on which to validate the model.", + ) + parser.add_argument( + "--labelsValidDir", + dest="labelsValidDir", + type=str, + required=False, + help="Collection containing labels, i.e. the ground-truth, for the validation images.", + ) + parser.add_argument( + "--validPattern", + dest="validPattern", + type=str, + required=False, + default=".*", + help="Filename pattern for validation images.", + ) + + parser.add_argument( + "--device", + dest="device", + type=str, + required=False, + default="gpu", + help="Device to run process on", + ) + parser.add_argument( + "--checkpointFrequency", + dest="checkFreq", + type=int, + required=False, + help="How often to update the checkpoints", + ) + + parser.add_argument( + "--lossName", + dest="lossName", + type=str, + required=False, + default="JaccardLoss", + help="Name of loss function to use.", + ) + parser.add_argument( + "--maxEpochs", + dest="maxEpochs", + type=int, + required=False, + default=100, + help="Maximum number of epochs for which to continue training the model.", + ) + parser.add_argument( + "--patience", + dest="patience", + type=int, + required=False, + default=10, + help="Maximum number of epochs to wait for model to improve.", + ) + parser.add_argument( + "--minDelta", + dest="minDelta", + type=float, + required=False, + default=1e-4, + help="Minimum improvement in loss to reset patience.", + ) + + parser.add_argument( + "--tensorboardProfiler", + dest="tensorboardProfiler", + type=bool, + required=False, + default=False, + help="Generate a profiler using Tensorboard?", + ) # TODO: Check in WIPP + + parser.add_argument( + "--outputDir", + dest="outputDir", + type=str, + required=True, + help="Location where the model and the final checkpoint will be saved.", + ) # Parse the arguments args = parser.parse_args() """ Argument parsing """ logger.info("Parsing arguments...") - error_messages = list() + error_messages = [] # Location to save model and checkpoint output_dir = Path(args.outputDir).resolve() - assert output_dir.exists(), f'Directory does not exist: {output_dir}' + assert output_dir.exists(), f"Directory does not exist: {output_dir}" tensorboard_profiler = args.tensorboardProfiler # TODO(Najib): Add support for multiple GPUs device = args.device - assert device in ('cpu', 'gpu'), f'Got device {device}' - device: str = device if torch.cuda.is_available() else 'cpu' - device: torch.device = torch.device('cuda' if device == 'gpu' else 'cpu') - logger.info(f'Using device: {device}...') + assert device in ("cpu", "gpu"), f"Got device {device}" + device: str = device if torch.cuda.is_available() else "cpu" + device: torch.device = torch.device("cuda" if device == "gpu" else "cpu") + logger.info(f"Using device: {device}...") inference_mode = args.inferenceMode - assert inference_mode in ('active', 'inactive'), f'--inferenceMode must be one of \'active\' or \'inactive\'.' - inference_mode = inference_mode == 'active' + assert inference_mode in ( + "active", + "inactive", + ), "--inferenceMode must be one of 'active' or 'inactive'." + inference_mode = inference_mode == "active" # Model Configuration/Compilation # Input Arguments @@ -135,9 +272,11 @@ if inference_mode: images_inference_dir = Path(args.imagesInferenceDir).resolve() inference_pattern: str = args.inferencePattern - if images_inference_dir.joinpath('images').is_dir(): - images_inference_dir = images_inference_dir.joinpath('images') - assert images_inference_dir.exists(), f'Directory does not exist: {images_inference_dir}' + if images_inference_dir.joinpath("images").is_dir(): + images_inference_dir = images_inference_dir.joinpath("images") + assert ( + images_inference_dir.exists() + ), f"Directory does not exist: {images_inference_dir}" else: images_train_dir = Path(args.imagesTrainDir).resolve() @@ -148,131 +287,141 @@ labels_valid_dir = Path(args.labelsValidDir).resolve() valid_pattern: str = args.validPattern - if images_train_dir.joinpath('images').is_dir(): - images_train_dir = images_train_dir.joinpath('images') - if labels_train_dir.joinpath('labels').is_dir(): - labels_train_dir = labels_train_dir.joinpath('labels') - - if images_valid_dir.joinpath('images').is_dir(): - images_valid_dir = images_valid_dir.joinpath('images') - if labels_valid_dir.joinpath('labels').is_dir(): - labels_valid_dir = labels_valid_dir.joinpath('labels') - - assert images_train_dir.exists(), f'Directory does not exist: {images_train_dir}' - assert labels_train_dir.exists(), f'Directory does not exist: {labels_train_dir}' - assert images_valid_dir.exists(), f'Directory does not exist: {images_valid_dir}' - assert labels_valid_dir.exists(), f'Directory does not exist: {labels_valid_dir}' - - config_path = output_dir.joinpath('config.json') + if images_train_dir.joinpath("images").is_dir(): + images_train_dir = images_train_dir.joinpath("images") + if labels_train_dir.joinpath("labels").is_dir(): + labels_train_dir = labels_train_dir.joinpath("labels") + + if images_valid_dir.joinpath("images").is_dir(): + images_valid_dir = images_valid_dir.joinpath("images") + if labels_valid_dir.joinpath("labels").is_dir(): + labels_valid_dir = labels_valid_dir.joinpath("labels") + + assert ( + images_train_dir.exists() + ), f"Directory does not exist: {images_train_dir}" + assert ( + labels_train_dir.exists() + ), f"Directory does not exist: {labels_train_dir}" + assert ( + images_valid_dir.exists() + ), f"Directory does not exist: {images_valid_dir}" + assert ( + labels_valid_dir.exists() + ), f"Directory does not exist: {labels_valid_dir}" + + config_path = output_dir.joinpath("config.json") # Model Creation/Specification via checkpoint dictionary pretrained_model: Optional[Path] = args.pretrainedModel if pretrained_model is None: - encoder_base = args.encoderBase encoder_variant = args.encoderVariant encoder_weights = args.encoderWeights - if encoder_weights == 'random': + if encoder_weights == "random": encoder_weights = None - checkpoint: Dict[str, Any] = { - 'model_name': args.modelName, - 'encoder_variant': encoder_variant, - 'encoder_weights': encoder_weights, - 'optimizer_name': args.optimizerName, - 'final_epoch': 0, - 'model_state_dict': None, - 'optimizer_state_dict': None + checkpoint: dict[str, Any] = { + "model_name": args.modelName, + "encoder_variant": encoder_variant, + "encoder_weights": encoder_weights, + "optimizer_name": args.optimizerName, + "final_epoch": 0, + "model_state_dict": None, + "optimizer_state_dict": None, } - with open(config_path, 'w') as config_file: + with open(config_path, "w") as config_file: json.dump(args.__dict__, config_file, indent=4) else: encoder_base = None pretrained_model = Path(pretrained_model).resolve() checkpoint = torch.load( - pretrained_model.joinpath('checkpoint.pth').resolve(), + pretrained_model.joinpath("checkpoint.pth").resolve(), map_location=device, ) if os.path.exists(config_path): - with open(config_path, 'r') as json_obj: + with open(config_path) as json_obj: config_dict = json.load(json_obj) if checkpoint["model_name"] not in utils.MODELS: error_messages.append( - f'modelName must be one of {list(utils.MODELS.keys())}. ' - f'Got {checkpoint["model_name"]} instead.' + f"modelName must be one of {list(utils.MODELS.keys())}. " + f'Got {checkpoint["model_name"]} instead.', ) if encoder_base is not None: if encoder_base not in utils.ENCODERS: error_messages.append( - f'encoderBase must be one of {list(utils.ENCODERS.keys())}. ' - f'Got {encoder_base} instead.' + f"encoderBase must be one of {list(utils.ENCODERS.keys())}. " + f"Got {encoder_base} instead.", ) else: available_variants = utils.ENCODERS[encoder_base] if checkpoint["encoder_variant"] not in available_variants: error_messages.append( - f'encoderVariant for {encoder_base} must be one of {list(available_variants.keys())}. ' - f'Got {checkpoint["encoder_variant"]} instead.' + f"encoderVariant for {encoder_base} must be one of {list(available_variants.keys())}. " + f'Got {checkpoint["encoder_variant"]} instead.', ) else: available_weights = available_variants[checkpoint["encoder_variant"]] - if ( - (checkpoint["encoder_weights"] is not None) and - (checkpoint["encoder_weights"] not in available_weights) + if (checkpoint["encoder_weights"] is not None) and ( + checkpoint["encoder_weights"] not in available_weights ): error_messages.append( f'encoderWeights for {checkpoint["encoder_variant"]} must be one of {available_weights}. ' - f'Got {checkpoint["encoder_weights"]} instead.' + f'Got {checkpoint["encoder_weights"]} instead.', ) if checkpoint["optimizer_name"] not in utils.OPTIMIZERS: error_messages.append( - f'optimizerName must be one of {list(utils.OPTIMIZERS.keys())}. ' - f'Got {checkpoint["optimizer_name"]} instead.' + f"optimizerName must be one of {list(utils.OPTIMIZERS.keys())}. " + f'Got {checkpoint["optimizer_name"]} instead.', ) if loss_name not in utils.LOSSES: error_messages.append( - f'lossName must be one of {list(utils.LOSSES.keys())}. ' - f'Got {loss_name} instead.\n' + f"lossName must be one of {list(utils.LOSSES.keys())}. " + f"Got {loss_name} instead.\n", ) if len(error_messages) > 0: - error_messages = ['Oh no! Something went wrong'] + error_messages + ['See the README for details.'] - error_message = '\n'.join(error_messages) + error_messages = ( + ["Oh no! Something went wrong", *error_messages, "See the README for details."] + ) + error_message = "\n".join(error_messages) logger.error(error_message) raise ValueError(error_message) - logger.info(f'Using input arguments:') - for arg in sorted(list(args.__dict__.keys())): - logger.info(f'\t{arg} = {args.__dict__[arg]}') + logger.info("Using input arguments:") + for arg in sorted(args.__dict__.keys()): + logger.info(f"\t{arg} = {args.__dict__[arg]}") model, optimizer = training.initialize_model(checkpoint, device) - logger.info('Determining maximum possible batch size...') - num_trainable_params = (utils.TILE_STRIDE ** 2) + sum( + logger.info("Determining maximum possible batch size...") + num_trainable_params = (utils.TILE_STRIDE**2) + sum( numpy.prod(param.size()) for name, param in model.named_parameters() if param.requires_grad ) free_memory = utils.get_device_memory(device) - logger.info(f'found {free_memory} bytes of free memory on device {device}...') + logger.info(f"found {free_memory} bytes of free memory on device {device}...") max_batch_size = int(max(1, free_memory // (2 * 8 * num_trainable_params))) - batch_size = max_batch_size if batch_size is None else min(batch_size, max_batch_size) - logger.info(f'Using batch size: {batch_size}...') + batch_size = ( + max_batch_size if batch_size is None else min(batch_size, max_batch_size) + ) + logger.info(f"Using batch size: {batch_size}...") if inference_mode: model.eval() # noinspection PyUnboundLocalVariable images_fp = FilePattern(images_inference_dir, inference_pattern) - image_paths = [Path(file[0]['file']) for file in images_fp()] + image_paths = [Path(file[0]["file"]) for file in images_fp()] inference.run_inference( model=model, @@ -290,7 +439,7 @@ labels_dir=labels_train_dir, pattern=train_pattern, batch_size=batch_size, - mode="training" + mode="training", ) # noinspection PyUnboundLocalVariable valid_loader = training.initialize_dataloader( @@ -298,32 +447,32 @@ labels_dir=labels_valid_dir, pattern=valid_pattern, batch_size=batch_size, - mode="validation" + mode="validation", ) loss_class = utils.LOSSES[loss_name] loss_params = inspect.signature(loss_class.__init__).parameters - loss_kwargs = dict() - if 'mode' in loss_params: - loss_kwargs['mode'] = 'binary' - elif 'smooth_factor' in loss_params: - loss_kwargs['smooth_factor'] = 0.1 + loss_kwargs = {} + if "mode" in loss_params: + loss_kwargs["mode"] = "binary" + elif "smooth_factor" in loss_params: + loss_kwargs["smooth_factor"] = 0.1 loss = loss_class(**loss_kwargs) loss.__name__ = loss_name epoch_iterators = training.initialize_epoch_iterators( model=model, loss=loss, - metrics=list(metric() for metric in utils.METRICS.values()), + metrics=[metric() for metric in utils.METRICS.values()], device=device, optimizer=optimizer, ) if not os.path.exists(os.path.join(output_dir, "trainlogs.csv")): - f = open(os.path.join(output_dir, "trainlogs.csv"), 'w+') + f = open(os.path.join(output_dir, "trainlogs.csv"), "w+") f.close() if not os.path.exists(os.path.join(output_dir, "validlogs.csv")): - f = open(os.path.join(output_dir, "validlogs.csv"), 'w+') + f = open(os.path.join(output_dir, "validlogs.csv"), "w+") f.close() final_epoch = training.train_model( @@ -333,5 +482,5 @@ output_dir=output_dir, checkpoint=checkpoint, checkpoint_frequency=checkpoint_frequency, - tensorboard_profiler=tensorboard_profiler + tensorboard_profiler=tensorboard_profiler, ) diff --git a/segmentation/polus-smp-training-plugin/src/training.py b/segmentation/polus-smp-training-plugin/src/training.py index 668b1e814..531d96a7f 100644 --- a/segmentation/polus-smp-training-plugin/src/training.py +++ b/segmentation/polus-smp-training-plugin/src/training.py @@ -3,10 +3,7 @@ import sys from pathlib import Path from typing import Any -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple from xmlrpc.client import Boolean import albumentations as albu @@ -27,11 +24,9 @@ from torch.nn.modules.loss import _Loss as TorchLoss from torch.optim import Optimizer from torch.utils.data import DataLoader as TorchDataLoader -from torch.utils.tensorboard import SummaryWriter import utils -# writer = SummaryWriter() sys.path.append(os.path.dirname(__file__)) logging.basicConfig( @@ -43,9 +38,9 @@ def initialize_model( - checkpoint: Dict[str, Any], + checkpoint: dict[str, Any], device: torch.device, -) -> Tuple[SegmentationModel, Optimizer]: +) -> tuple[SegmentationModel, Optimizer]: """Initializes a model from a Checkpoint. A checkpoint knows the: * 'model_name': The architecture of the model in use. @@ -88,7 +83,7 @@ def initialize_model( # noinspection PyArgumentList optimizer = utils.OPTIMIZERS[checkpoint["optimizer_name"]]( - params=model.parameters() + params=model.parameters(), ) if checkpoint["final_epoch"] > 0: @@ -101,15 +96,15 @@ def initialize_model( def configure_augmentations(): # TODO: These are the default augmentation we do for training. # We will add WIPP arguments for configuring custom augmentations. - transforms = [ + return [ albu.RandomCrop(height=256, width=256), utils.PoissonTransform(peak=10, p=0.3), albu.RandomBrightnessContrast(brightness_limit=0.8, contrast_limit=0.4, p=0.2), albu.ShiftScaleRotate( - scale_limit=0.5, rotate_limit=0, shift_limit=0, p=0.5, border_mode=0 + scale_limit=0.5, rotate_limit=0, shift_limit=0, p=0.5, border_mode=0, ), albu.PadIfNeeded( - min_height=256, min_width=256, always_apply=True, border_mode=0 + min_height=256, min_width=256, always_apply=True, border_mode=0, ), albu.OneOf( [ @@ -121,7 +116,6 @@ def configure_augmentations(): ), ] - return transforms def initialize_dataloader( @@ -158,7 +152,7 @@ def initialize_dataloader( augs=configure_augmentations() if mode == "training" else None, ) - loader = utils.MultiEpochsDataLoader( + return utils.MultiEpochsDataLoader( dataset=dataset, num_workers=4, batch_size=batch_size, @@ -167,17 +161,16 @@ def initialize_dataloader( drop_last=True, ) - return loader def initialize_epoch_iterators( *, model: SegmentationModel, loss: TorchLoss, - metrics: List[Metric], + metrics: list[Metric], device: torch.device, optimizer: Optimizer, -) -> Tuple[TrainEpoch, ValidEpoch]: +) -> tuple[TrainEpoch, ValidEpoch]: """Initializes the training and validation iterators that train the model for each epoch. @@ -193,9 +186,9 @@ def initialize_epoch_iterators( """ logger.info("Initializing Epoch Iterators...") - epoch_kwargs = dict( - model=model, loss=loss, metrics=metrics, device=device, verbose=True - ) + epoch_kwargs = { + "model": model, "loss": loss, "metrics": metrics, "device": device, "verbose": True, + } trainer = smp.utils.train.TrainEpoch(optimizer=optimizer, **epoch_kwargs) validator = smp.utils.train.ValidEpoch(**epoch_kwargs) @@ -210,13 +203,11 @@ def _log_epoch( logs: str = ", ".join(f"{k}: {v:.8f}" for k, v in logs.items()) logger.info(f"{mode} logs: {logs}") with open(file_path, "a") as outfile: - outfile.write(f"{str(logs)}\n") - return + outfile.write(f"{logs!s}\n") def batch_update_train(trainer, x, y): - """ - TODO(Madhuri): Docs and type hints + """TODO(Madhuri): Docs and type hints. Args: trainer: @@ -235,8 +226,7 @@ def batch_update_train(trainer, x, y): def batch_update_valid(validator, x, y): - """ - TODO(Madhuri): Docs and type hints + """TODO(Madhuri): Docs and type hints. Args: validator: @@ -253,16 +243,15 @@ def batch_update_valid(validator, x, y): def start_training( - epoch_iterators: Tuple[TrainEpoch, ValidEpoch], - dataloaders: Tuple[TorchDataLoader, TorchDataLoader], - early_stopping: Tuple[int, int, float], - checkpoint: Dict[str, Any], + epoch_iterators: tuple[TrainEpoch, ValidEpoch], + dataloaders: tuple[TorchDataLoader, TorchDataLoader], + early_stopping: tuple[int, int, float], + checkpoint: dict[str, Any], checkpoint_frequency: int, output_dir: Path, prof=None, ) -> int: - """ - TODO(Madhuri): Docs + """TODO(Madhuri): Docs. Args: epoch_iterators: @@ -296,8 +285,8 @@ def start_training( 5 * "-", f"Epoch: {epoch_index}/{num_epochs + starting_epoch}", 5 * "-", - ) - ) + ), + ), ) train_logs = { @@ -332,7 +321,7 @@ def start_training( for valid_x, valid_y in valid_loader: # iterating through the valid batches valid_x, valid_y = valid_x.to(validator.device), valid_y.to( - validator.device + validator.device, ) valid_loss, valid_y_pred = batch_update_valid(validator, valid_x, valid_y) @@ -358,7 +347,7 @@ def start_training( "final_epoch": epoch_index, "model_state_dict": trainer.model.state_dict(), "optimizer_state_dict": trainer.optimizer.state_dict(), - } + }, ) current_loss = valid_logs[validator.loss.__name__] @@ -372,19 +361,19 @@ def start_training( epochs_without_improvement += 1 if epochs_without_improvement >= patience: logger.info( - f"No improvement for {patience} epochs. Stopping training early..." + f"No improvement for {patience} epochs. Stopping training early...", ) break logger.info( - f"Epochs without Improvement: {epochs_without_improvement} of {patience}" + f"Epochs without Improvement: {epochs_without_improvement} of {patience}", ) if checkpoint_frequency is not None: if (epoch_index % checkpoint_frequency) == 0: # noinspection PyUnboundLocalVariable torch.save( - trainer.model, checkpoints_dir.joinpath(f"model_{epoch_index}.pth") + trainer.model, checkpoints_dir.joinpath(f"model_{epoch_index}.pth"), ) torch.save( checkpoint, @@ -406,10 +395,10 @@ def start_training( def train_model( *, - dataloaders: Tuple[TorchDataLoader, TorchDataLoader], - epoch_iterators: Tuple[TrainEpoch, ValidEpoch], - early_stopping: Tuple[int, int, float], - checkpoint: Dict[str, Any], + dataloaders: tuple[TorchDataLoader, TorchDataLoader], + epoch_iterators: tuple[TrainEpoch, ValidEpoch], + early_stopping: tuple[int, int, float], + checkpoint: dict[str, Any], checkpoint_frequency: int, output_dir: Path, tensorboard_profiler: Boolean, @@ -432,7 +421,6 @@ def train_model( The total number of epochs for which the model has been trained by this plugin. """ - # TODO(Najib): Figure out how this will work with WIPP outputs if tensorboard_profiler: tensorboard_dir = output_dir.joinpath("tensorboard") @@ -440,13 +428,12 @@ def train_model( with torch.profiler.profile( schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2), on_trace_ready=torch.profiler.tensorboard_trace_handler( - str(tensorboard_dir) + str(tensorboard_dir), ), record_shapes=True, profile_memory=True, with_stack=True, ) as prof: - epoch = start_training( epoch_iterators, dataloaders, @@ -457,7 +444,6 @@ def train_model( prof=prof, ) else: - epoch = start_training( epoch_iterators, dataloaders, diff --git a/segmentation/polus-smp-training-plugin/src/utils/augmentations.py b/segmentation/polus-smp-training-plugin/src/utils/augmentations.py index 497ff0d3a..60ce1af66 100644 --- a/segmentation/polus-smp-training-plugin/src/utils/augmentations.py +++ b/segmentation/polus-smp-training-plugin/src/utils/augmentations.py @@ -1,7 +1,7 @@ import logging +import math import typing -import math import torch import torchvision from albumentations.core.transforms_interface import BasicTransform @@ -22,12 +22,12 @@ logger.setLevel(helpers.POLUS_LOG) -class LocalNorm(object): +class LocalNorm: def __init__( self, radius: int = 16, max_response: typing.Union[int, float] = 6, - ): + ) -> None: self.radius: int = radius self.window_size: int = 2 * radius self.max_response: float = float(max_response) @@ -82,42 +82,39 @@ def __local_response(self, image: Tensor): class GlobalNorm: - # TODO(Najib) pass class PoissonTransform(BasicTransform): """Apply poisson noise to float32 images.""" - def __init__(self, peak: int, p: float = 0.5): - """ - Args: - peak: [1-10] high values introduces more noise in the image. - p: probability of applying the transform. + def __init__(self, peak: int, p: float = 0.5) -> None: + """Args: + peak: [1-10] high values introduces more noise in the image. + p: probability of applying the transform. """ if not 1 <= peak <= 10: message = f"'peak' must be in the range [1, 10]. Got {peak} instead." logger.error(message) raise ValueError(message) - super(PoissonTransform, self).__init__(p=p) + super().__init__(p=p) self.peak: int = peak def apply(self, image: Tensor, **_): value = torch.tensor(math.exp(10 - self.peak)) if torch.any(torch.isnan(image)): - message = f"image had nan values." + message = "image had nan values." logger.error(message) raise ValueError(message) if torch.any(torch.lt(image, 0)): - message = f"image had negative values." + message = "image had negative values." logger.error(message) raise ValueError(message) - noisy_image = torch.poisson(image * value).float() / value - return noisy_image + return torch.poisson(image * value).float() / value def update_params(self, params, **kwargs): if hasattr(self, "peak"): @@ -130,9 +127,9 @@ def targets(self): def get_params_dependent_on_targets( self, - params: typing.Dict[str, typing.Any], - ) -> typing.Dict[str, typing.Any]: + params: dict[str, typing.Any], + ) -> dict[str, typing.Any]: raise NotImplementedError - def get_transform_init_args_names(self) -> typing.Tuple[str, ...]: + def get_transform_init_args_names(self) -> tuple[str, ...]: raise NotImplementedError diff --git a/segmentation/polus-smp-training-plugin/src/utils/dataset.py b/segmentation/polus-smp-training-plugin/src/utils/dataset.py index 121071b93..c9102c9fc 100644 --- a/segmentation/polus-smp-training-plugin/src/utils/dataset.py +++ b/segmentation/polus-smp-training-plugin/src/utils/dataset.py @@ -1,6 +1,5 @@ import logging from pathlib import Path -from typing import List from typing import Union import albumentations @@ -34,20 +33,19 @@ class Dataset(TorchDataset): [ torchvision.transforms.ToTensor(), augmentations.LocalNorm( - radius=128 + radius=128, ), # TODO(Najib): Replace with Global Norm torch.nn.Sigmoid(), - ] + ], ) def __init__( self, images: numpy.ndarray, - labels: Union[numpy.ndarray, List[Path]], + labels: Union[numpy.ndarray, list[Path]], augs=None, preprocessing=None, - ): - + ) -> None: self.inference_mode = isinstance(labels, list) self.images, self.labels = images, labels @@ -57,7 +55,6 @@ def __init__( self.preprocessing = preprocessing def __getitem__(self, index: int): - if self.inference_mode: return str(self.labels[index]) @@ -78,13 +75,14 @@ def __getitem__(self, index: int): label_tile = label_tile[None, ...] if image_tile.shape != label_tile.shape: + msg = f"Image Tile {image_tile.shape} and Label Tile {label_tile.shape} do not have matching shapes." raise ValueError( - f"Image Tile {image_tile.shape} and Label Tile {label_tile.shape} do not have matching shapes." + msg, ) return image_tile, label_tile - def __len__(self): + def __len__(self) -> int: return len(self.images) @@ -93,14 +91,14 @@ class MultiEpochsDataLoader(TorchDataLoader): Explain what this is supposed to do and why have it at all. """ - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self._DataLoader__initialized = False self.batch_sampler = _RepeatSampler(self.batch_sampler) self._DataLoader__initialized = True self.iterator = super().__iter__() - def __len__(self): + def __len__(self) -> int: return len(self.batch_sampler.sampler) def __iter__(self): @@ -108,13 +106,14 @@ def __iter__(self): yield next(self.iterator) -class _RepeatSampler(object): +class _RepeatSampler: """Sampler that repeats forever. + Args: - sampler (Sampler) + sampler (Sampler). """ - def __init__(self, sampler): + def __init__(self, sampler) -> None: self.sampler = sampler def __iter__(self): @@ -123,15 +122,14 @@ def __iter__(self): class Tile(torch.nn.Module): - """Tile an input""" + """Tile an input.""" - def __init__(self, tile_size=(512, 512)): - super(Tile, self).__init__() + def __init__(self, tile_size=(512, 512)) -> None: + super().__init__() self.tile_size = tile_size def forward(self, x): - with torch.no_grad(): n, c, h, w = x.shape @@ -153,7 +151,7 @@ def forward(self, x): # Reshape the data into proper torch format x = x.permute(0, 1, 2, 4, 3, 5).reshape( - -1, c, self.tile_size[0], self.tile_size[1] + -1, c, self.tile_size[0], self.tile_size[1], ) # Return both the tiled input and the shape of the original tensor @@ -161,10 +159,9 @@ def forward(self, x): class UnTile(Tile): - """Untile an input""" + """Untile an input.""" def forward(self, x, output_shape): - with torch.no_grad(): n, c, h, w = x.shape @@ -175,15 +172,14 @@ def forward(self, x, output_shape): # Reshape the data into tiles x = x.reshape( - n_images, c, h_tiles, w_tiles, self.tile_size[0], self.tile_size[1] + n_images, c, h_tiles, w_tiles, self.tile_size[0], self.tile_size[1], ) # Reconstruct original image size x = x.permute(0, 1, 2, 4, 3, 5) x = x.reshape( - n_images, c, h_tiles * self.tile_size[0], w_tiles * self.tile_size[1] + n_images, c, h_tiles * self.tile_size[0], w_tiles * self.tile_size[1], ) - x = x[:, :, : output_shape[2], : output_shape[3]] + return x[:, :, : output_shape[2], : output_shape[3]] - return x diff --git a/segmentation/polus-smp-training-plugin/src/utils/helpers.py b/segmentation/polus-smp-training-plugin/src/utils/helpers.py index 88351c655..a2614e802 100644 --- a/segmentation/polus-smp-training-plugin/src/utils/helpers.py +++ b/segmentation/polus-smp-training-plugin/src/utils/helpers.py @@ -1,10 +1,8 @@ import logging import os +from collections.abc import Generator from pathlib import Path -from typing import Generator -from typing import List from typing import Optional -from typing import Tuple import numpy import torch @@ -13,31 +11,31 @@ from tqdm import tqdm __all__ = [ - 'POLUS_LOG', - 'TILE_STRIDE', - 'Tiles', - 'get_labels_mapping', - 'get_tiles_mapping', - 'get_device_memory', + "POLUS_LOG", + "TILE_STRIDE", + "Tiles", + "get_labels_mapping", + "get_tiles_mapping", + "get_device_memory", ] -POLUS_LOG = getattr(logging, os.environ.get('POLUS_LOG', 'INFO')) +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) TILE_STRIDE = 256 # List of 5-tuples of (file-path, x_min, x_max, y_min, y_max) -Tiles = List[Tuple[Path, int, int, int, int]] +Tiles = list[tuple[Path, int, int, int, int]] def get_labels_mapping( - images_fp: FilePattern, - labels_fp: Optional[FilePattern], -) -> Tuple[numpy.ndarray, numpy.ndarray]: - """ Creates a filename map between images and labels - In the case where image filenames have different filename + images_fp: FilePattern, + labels_fp: Optional[FilePattern], +) -> tuple[numpy.ndarray, numpy.ndarray]: + """Creates a filename map between images and labels + In the case where image filenames have different filename pattern than label filenames, this function creates a map - between the corresponding images and labels - + between the corresponding images and labels. + Args: images_fp: filepattern object for images labels_fp: filepattern object for labels @@ -46,31 +44,21 @@ def get_labels_mapping( dictionary containing mapping between image & label names """ # TODO(Najib): Get this working again. This used to do many-to-one matching - # labels_map = { # file[0]['file']: labels_fp.get_matching(**{ - # k.upper(): v # for k, v in file[0].items() # if k != 'file' # })[0]['file'] # for file in images_fp() - # } - # image_array = numpy.zeros((len(images_fp()))) - - image_list = list() - image_paths = [ - Path(file[0]['file']).resolve() - for file in images_fp() - ] - - label_list = list() - label_paths = [ - Path(file[0]['file']).resolve() - for file in labels_fp() - ] + + image_list = [] + image_paths = [Path(file[0]["file"]).resolve() for file in images_fp()] + + label_list = [] + label_paths = [Path(file[0]["file"]).resolve() for file in labels_fp()] for image_file, label_file in tqdm( - zip(image_paths, label_paths), - desc=f'loading {len(image_paths)} images', + zip(image_paths, label_paths), + desc=f"loading {len(image_paths)} images", ): # TODO(Najib): Use set intersection to find matching pairs and raise warning about unmatched images/labels. if label_file is not None: @@ -98,7 +86,9 @@ def get_labels_mapping( return image_array, label_array -def iter_tiles_2d(image_path: Path) -> Generator[Tuple[Path, int, int, int, int], None, None]: +def iter_tiles_2d( + image_path: Path, +) -> Generator[tuple[Path, int, int, int, int], None, None]: # TODO(Najib): Open relevant PR to bfio for tile-iterators with BioReader(image_path) as reader: y_end, x_end = reader.Y, reader.X @@ -116,19 +106,19 @@ def iter_tiles_2d(image_path: Path) -> Generator[Tuple[Path, int, int, int, int] yield image_path, y_min, y_max, x_min, x_max -def get_tiles_mapping(image_paths: List[Path]) -> Tiles: - """ creates a tile map for the Dataset class - This function iterates over all the files in the input - collection and creates a dictionary that can be used in - __getitem__ function in the Dataset class. - +def get_tiles_mapping(image_paths: list[Path]) -> Tiles: + """Creates a tile map for the Dataset class + This function iterates over all the files in the input + collection and creates a dictionary that can be used in + __getitem__ function in the Dataset class. + Args: image_paths: The paths to the images. - + Returns: All tile mappings """ - tiles: Tiles = list() + tiles: Tiles = [] for file_name in image_paths: tiles.extend(iter_tiles_2d(file_name)) @@ -137,11 +127,10 @@ def get_tiles_mapping(image_paths: List[Path]) -> Tiles: def get_device_memory(device: torch.device) -> int: - """ Calculates the amount of memory available on the given device. - """ - if 'cpu' in device.type: - _, _, free_memory = map(int, os.popen('free -t -m').readlines()[-1].split()[1:]) - free_memory *= (1024 ** 2) + """Calculates the amount of memory available on the given device.""" + if "cpu" in device.type: + _, _, free_memory = map(int, os.popen("free -t -m").readlines()[-1].split()[1:]) + free_memory *= 1024**2 # Use up to a quarter of RAM for CPU training free_memory = free_memory // 4 else: diff --git a/segmentation/polus-smp-training-plugin/src/utils/options.py b/segmentation/polus-smp-training-plugin/src/utils/options.py index 9787561c3..a19b15f06 100644 --- a/segmentation/polus-smp-training-plugin/src/utils/options.py +++ b/segmentation/polus-smp-training-plugin/src/utils/options.py @@ -1,194 +1,190 @@ -from typing import Dict -from typing import List -from typing import Set -from typing import Type import segmentation_models_pytorch as smp import torch from segmentation_models_pytorch.base import SegmentationModel from segmentation_models_pytorch.utils.base import Metric + # noinspection PyProtectedMember from torch.nn.modules.loss import _Loss as TorchLoss from torch.optim import Optimizer __all__ = [ - 'MODELS', - 'ENCODERS', - 'OPTIMIZERS', - 'LOSSES', - 'METRICS', - 'MODEL_NAMES', - 'BASE_ENCODERS', - 'ENCODER_VARIANTS', - 'ENCODER_WEIGHTS', - 'OPTIMIZER_NAMES', - 'LOSS_NAMES', + "MODELS", + "ENCODERS", + "OPTIMIZERS", + "LOSSES", + "METRICS", + "MODEL_NAMES", + "BASE_ENCODERS", + "ENCODER_VARIANTS", + "ENCODER_WEIGHTS", + "OPTIMIZER_NAMES", + "LOSS_NAMES", ] -MODELS: Dict[str, Type[SegmentationModel]] = { - 'Unet': smp.Unet, - 'UnetPlusPlus': smp.UnetPlusPlus, - 'MAnet': smp.MAnet, - 'Linknet': smp.Linknet, - 'FPN': smp.FPN, - 'PSPNet': smp.PSPNet, - 'PAN': smp.PAN, - 'DeepLabV3': smp.DeepLabV3, - 'DeepLabV3Plus': smp.DeepLabV3Plus, +MODELS: dict[str, type[SegmentationModel]] = { + "Unet": smp.Unet, + "UnetPlusPlus": smp.UnetPlusPlus, + "MAnet": smp.MAnet, + "Linknet": smp.Linknet, + "FPN": smp.FPN, + "PSPNet": smp.PSPNet, + "PAN": smp.PAN, + "DeepLabV3": smp.DeepLabV3, + "DeepLabV3Plus": smp.DeepLabV3Plus, } -MODEL_NAMES: List[str] = list(MODELS.keys()) +MODEL_NAMES: list[str] = list(MODELS.keys()) # A dictionary of base encoder names to a dict of specific encoder names. # The inner dictionaries are encoder names to their pretrained weights -# { base-encoder: { encoder: [weights] } } -ENCODERS: Dict[str, Dict[str, List[str]]] = { - 'ResNet': { - 'resnet18': ['imagenet', 'ssl', 'swsl'], - 'resnet34': ['imagenet'], - 'resnet50': ['imagenet', 'ssl', 'swsl'], - 'resnet101': ['imagenet'], - 'resnet152': ['imagenet'], +ENCODERS: dict[str, dict[str, list[str]]] = { + "ResNet": { + "resnet18": ["imagenet", "ssl", "swsl"], + "resnet34": ["imagenet"], + "resnet50": ["imagenet", "ssl", "swsl"], + "resnet101": ["imagenet"], + "resnet152": ["imagenet"], }, - 'ResNeXt': { - 'resnext50_32x4d': ['imagenet', 'ssl', 'swsl'], - 'resnext101_32x4d': ['ssl', 'swsl'], - 'resnext101_32x8d': ['imagenet', 'instagram', 'ssl', 'swsl'], - 'resnext101_32x16d': ['instagram', 'ssl', 'swsl'], - 'resnext101_32x32d': ['instagram'], - 'resnext101_32x48d': ['instagram'], + "ResNeXt": { + "resnext50_32x4d": ["imagenet", "ssl", "swsl"], + "resnext101_32x4d": ["ssl", "swsl"], + "resnext101_32x8d": ["imagenet", "instagram", "ssl", "swsl"], + "resnext101_32x16d": ["instagram", "ssl", "swsl"], + "resnext101_32x32d": ["instagram"], + "resnext101_32x48d": ["instagram"], }, - 'ResNeSt': { - 'timm-resnest14d': ['imagenet'], - 'timm-resnest26d': ['imagenet'], - 'timm-resnest50d': ['imagenet'], - 'timm-resnest101e': ['imagenet'], - 'timm-resnest200e': ['imagenet'], - 'timm-resnest269e': ['imagenet'], - 'timm-resnest50d_4s2x40d': ['imagenet'], - 'timm-resnest50d_1s4x24d': ['imagenet'], + "ResNeSt": { + "timm-resnest14d": ["imagenet"], + "timm-resnest26d": ["imagenet"], + "timm-resnest50d": ["imagenet"], + "timm-resnest101e": ["imagenet"], + "timm-resnest200e": ["imagenet"], + "timm-resnest269e": ["imagenet"], + "timm-resnest50d_4s2x40d": ["imagenet"], + "timm-resnest50d_1s4x24d": ["imagenet"], }, - 'Res2Ne(X)t': { - 'timm-res2net50_26w_4s': ['imagenet'], - 'timm-res2net101_26w_4s': ['imagenet'], - 'timm-res2net50_26w_6s': ['imagenet'], - 'timm-res2net50_26w_8s': ['imagenet'], - 'timm-res2net50_48w_2s': ['imagenet'], - 'timm-res2net50_14w_8s': ['imagenet'], - 'timm-res2next50': ['imagenet'], + "Res2Ne(X)t": { + "timm-res2net50_26w_4s": ["imagenet"], + "timm-res2net101_26w_4s": ["imagenet"], + "timm-res2net50_26w_6s": ["imagenet"], + "timm-res2net50_26w_8s": ["imagenet"], + "timm-res2net50_48w_2s": ["imagenet"], + "timm-res2net50_14w_8s": ["imagenet"], + "timm-res2next50": ["imagenet"], }, - 'RegNet(x/y)': { - 'timm-regnetx_002': ['imagenet'], - 'timm-regnetx_004': ['imagenet'], - 'timm-regnetx_006': ['imagenet'], - 'timm-regnetx_008': ['imagenet'], - 'timm-regnetx_016': ['imagenet'], - 'timm-regnetx_032': ['imagenet'], - 'timm-regnetx_040': ['imagenet'], - 'timm-regnetx_064': ['imagenet'], - 'timm-regnetx_080': ['imagenet'], - 'timm-regnetx_120': ['imagenet'], - 'timm-regnetx_160': ['imagenet'], - 'timm-regnetx_320': ['imagenet'], - 'timm-regnety_002': ['imagenet'], - 'timm-regnety_004': ['imagenet'], - 'timm-regnety_006': ['imagenet'], - 'timm-regnety_008': ['imagenet'], - 'timm-regnety_016': ['imagenet'], - 'timm-regnety_032': ['imagenet'], - 'timm-regnety_040': ['imagenet'], - 'timm-regnety_064': ['imagenet'], - 'timm-regnety_080': ['imagenet'], - 'timm-regnety_120': ['imagenet'], - 'timm-regnety_160': ['imagenet'], - 'timm-regnety_320': ['imagenet'], + "RegNet(x/y)": { + "timm-regnetx_002": ["imagenet"], + "timm-regnetx_004": ["imagenet"], + "timm-regnetx_006": ["imagenet"], + "timm-regnetx_008": ["imagenet"], + "timm-regnetx_016": ["imagenet"], + "timm-regnetx_032": ["imagenet"], + "timm-regnetx_040": ["imagenet"], + "timm-regnetx_064": ["imagenet"], + "timm-regnetx_080": ["imagenet"], + "timm-regnetx_120": ["imagenet"], + "timm-regnetx_160": ["imagenet"], + "timm-regnetx_320": ["imagenet"], + "timm-regnety_002": ["imagenet"], + "timm-regnety_004": ["imagenet"], + "timm-regnety_006": ["imagenet"], + "timm-regnety_008": ["imagenet"], + "timm-regnety_016": ["imagenet"], + "timm-regnety_032": ["imagenet"], + "timm-regnety_040": ["imagenet"], + "timm-regnety_064": ["imagenet"], + "timm-regnety_080": ["imagenet"], + "timm-regnety_120": ["imagenet"], + "timm-regnety_160": ["imagenet"], + "timm-regnety_320": ["imagenet"], }, - 'GERNet': { - 'timm-gernet_s': ['imagenet'], - 'timm-gernet_m': ['imagenet'], - 'timm-gernet_l': ['imagenet'], + "GERNet": { + "timm-gernet_s": ["imagenet"], + "timm-gernet_m": ["imagenet"], + "timm-gernet_l": ["imagenet"], }, - 'SE-Net': { - 'senet154': ['imagenet'], - 'se_resnet50': ['imagenet'], - 'se_resnet101': ['imagenet'], - 'se_resnet152': ['imagenet'], - 'se_resnext50_32x4d': ['imagenet'], - 'se_resnext101_32x4d': ['imagenet'], + "SE-Net": { + "senet154": ["imagenet"], + "se_resnet50": ["imagenet"], + "se_resnet101": ["imagenet"], + "se_resnet152": ["imagenet"], + "se_resnext50_32x4d": ["imagenet"], + "se_resnext101_32x4d": ["imagenet"], }, - 'SK-ResNe(X)t': { - 'timm-skresnet18': ['imagenet'], - 'timm-skresnet34': ['imagenet'], - 'timm-skresnext50_32x4d': ['imagenet'], + "SK-ResNe(X)t": { + "timm-skresnet18": ["imagenet"], + "timm-skresnet34": ["imagenet"], + "timm-skresnext50_32x4d": ["imagenet"], }, - 'DenseNet': { - 'densenet121': ['imagenet'], - 'densenet169': ['imagenet'], - 'densenet201': ['imagenet'], - 'densenet161': ['imagenet'], + "DenseNet": { + "densenet121": ["imagenet"], + "densenet169": ["imagenet"], + "densenet201": ["imagenet"], + "densenet161": ["imagenet"], }, - 'Inception': { - 'inceptionresnetv2': ['imagenet', 'imagenet+background'], - 'inceptionv4': ['imagenet', 'imagenet+background'], - 'xception': ['imagenet'], + "Inception": { + "inceptionresnetv2": ["imagenet", "imagenet+background"], + "inceptionv4": ["imagenet", "imagenet+background"], + "xception": ["imagenet"], }, - 'EfficientNet': { - 'efficientnet-b0': ['imagenet'], - 'efficientnet-b1': ['imagenet'], - 'efficientnet-b2': ['imagenet'], - 'efficientnet-b3': ['imagenet'], - 'efficientnet-b4': ['imagenet'], - 'efficientnet-b5': ['imagenet'], - 'efficientnet-b6': ['imagenet'], - 'efficientnet-b7': ['imagenet'], - 'timm-efficientnet-b0': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b1': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b2': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b3': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b4': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b5': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b6': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b7': ['imagenet', 'advprop', 'noisy-student'], - 'timm-efficientnet-b8': ['imagenet', 'advprop'], - 'timm-efficientnet-l2': ['noisy-student'], - 'timm-efficientnet-lite0': ['imagenet'], - 'timm-efficientnet-lite1': ['imagenet'], - 'timm-efficientnet-lite2': ['imagenet'], - 'timm-efficientnet-lite3': ['imagenet'], - 'timm-efficientnet-lite4': ['imagenet'], + "EfficientNet": { + "efficientnet-b0": ["imagenet"], + "efficientnet-b1": ["imagenet"], + "efficientnet-b2": ["imagenet"], + "efficientnet-b3": ["imagenet"], + "efficientnet-b4": ["imagenet"], + "efficientnet-b5": ["imagenet"], + "efficientnet-b6": ["imagenet"], + "efficientnet-b7": ["imagenet"], + "timm-efficientnet-b0": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b1": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b2": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b3": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b4": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b5": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b6": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b7": ["imagenet", "advprop", "noisy-student"], + "timm-efficientnet-b8": ["imagenet", "advprop"], + "timm-efficientnet-l2": ["noisy-student"], + "timm-efficientnet-lite0": ["imagenet"], + "timm-efficientnet-lite1": ["imagenet"], + "timm-efficientnet-lite2": ["imagenet"], + "timm-efficientnet-lite3": ["imagenet"], + "timm-efficientnet-lite4": ["imagenet"], }, - 'MobileNet': { - 'mobilenet_v2': ['imagenet'], - 'timm-mobilenetv3_large_075': ['imagenet'], - 'timm-mobilenetv3_large_100': ['imagenet'], - 'timm-mobilenetv3_large_minimal_100': ['imagenet'], - 'timm-mobilenetv3_small_075': ['imagenet'], - 'timm-mobilenetv3_small_100': ['imagenet'], - 'timm-mobilenetv3_small_minimal_100': ['imagenet'], + "MobileNet": { + "mobilenet_v2": ["imagenet"], + "timm-mobilenetv3_large_075": ["imagenet"], + "timm-mobilenetv3_large_100": ["imagenet"], + "timm-mobilenetv3_large_minimal_100": ["imagenet"], + "timm-mobilenetv3_small_075": ["imagenet"], + "timm-mobilenetv3_small_100": ["imagenet"], + "timm-mobilenetv3_small_minimal_100": ["imagenet"], }, - 'DPN': { - 'dpn68': ['imagenet'], - 'dpn68b': ['imagenet+5k'], - 'dpn92': ['imagenet+5k'], - 'dpn98': ['imagenet'], - 'dpn107': ['imagenet+5k'], - 'dpn131': ['imagenet'], + "DPN": { + "dpn68": ["imagenet"], + "dpn68b": ["imagenet+5k"], + "dpn92": ["imagenet+5k"], + "dpn98": ["imagenet"], + "dpn107": ["imagenet+5k"], + "dpn131": ["imagenet"], }, - 'VGG': { - 'vgg11': ['imagenet'], - 'vgg11_bn': ['imagenet'], - 'vgg13': ['imagenet'], - 'vgg13_bn': ['imagenet'], - 'vgg16': ['imagenet'], - 'vgg16_bn': ['imagenet'], - 'vgg19': ['imagenet'], - 'vgg19_bn': ['imagenet'], + "VGG": { + "vgg11": ["imagenet"], + "vgg11_bn": ["imagenet"], + "vgg13": ["imagenet"], + "vgg13_bn": ["imagenet"], + "vgg16": ["imagenet"], + "vgg16_bn": ["imagenet"], + "vgg19": ["imagenet"], + "vgg19_bn": ["imagenet"], }, } -BASE_ENCODERS: List[str] = list(ENCODERS.keys()) -ENCODER_VARIANTS: List[str] = list() -ENCODER_WEIGHTS: Set[str] = {'random'} +BASE_ENCODERS: list[str] = list(ENCODERS.keys()) +ENCODER_VARIANTS: list[str] = [] +ENCODER_WEIGHTS: set[str] = {"random"} for encoder, variants in ENCODERS.items(): ENCODER_VARIANTS.extend(variants.keys()) @@ -196,39 +192,38 @@ for variant, weights in variants.items(): ENCODER_WEIGHTS.update(weights) -OPTIMIZERS: Dict[str, Type[Optimizer]] = { - 'Adadelta': torch.optim.Adadelta, - 'Adagrad': torch.optim.Adagrad, - 'Adam': torch.optim.Adam, - 'AdamW': torch.optim.AdamW, - 'SparseAdam': torch.optim.SparseAdam, - 'Adamax': torch.optim.Adamax, - 'ASGD': torch.optim.ASGD, - 'LBFGS': torch.optim.LBFGS, - 'RMSprop': torch.optim.RMSprop, - 'Rprop': torch.optim.Rprop, - 'SGD': torch.optim.SGD, +OPTIMIZERS: dict[str, type[Optimizer]] = { + "Adadelta": torch.optim.Adadelta, + "Adagrad": torch.optim.Adagrad, + "Adam": torch.optim.Adam, + "AdamW": torch.optim.AdamW, + "SparseAdam": torch.optim.SparseAdam, + "Adamax": torch.optim.Adamax, + "ASGD": torch.optim.ASGD, + "LBFGS": torch.optim.LBFGS, + "RMSprop": torch.optim.RMSprop, + "Rprop": torch.optim.Rprop, + "SGD": torch.optim.SGD, } -OPTIMIZER_NAMES: List[str] = list(OPTIMIZERS.keys()) +OPTIMIZER_NAMES: list[str] = list(OPTIMIZERS.keys()) -LOSSES: Dict[str, Type[TorchLoss]] = { - 'JaccardLoss': smp.losses.JaccardLoss, - 'DiceLoss': smp.losses.DiceLoss, - 'TverskyLoss': smp.losses.TverskyLoss, - 'FocalLoss': smp.losses.FocalLoss, - 'LovaszLoss': smp.losses.LovaszLoss, - 'SoftBCEWithLogitsLoss': smp.losses.SoftBCEWithLogitsLoss, - 'SoftCrossEntropyLoss': smp.losses.SoftCrossEntropyLoss, - # 'MCCLoss': smp.losses.MCCLoss, +LOSSES: dict[str, type[TorchLoss]] = { + "JaccardLoss": smp.losses.JaccardLoss, + "DiceLoss": smp.losses.DiceLoss, + "TverskyLoss": smp.losses.TverskyLoss, + "FocalLoss": smp.losses.FocalLoss, + "LovaszLoss": smp.losses.LovaszLoss, + "SoftBCEWithLogitsLoss": smp.losses.SoftBCEWithLogitsLoss, + "SoftCrossEntropyLoss": smp.losses.SoftCrossEntropyLoss, } -LOSS_NAMES: List[str] = list(LOSSES.keys()) +LOSS_NAMES: list[str] = list(LOSSES.keys()) -METRICS: Dict[str, Type[Metric]] = { - 'iou_score': smp.utils.metrics.IoU, - 'fscore': smp.utils.metrics.Fscore, - 'accuracy': smp.utils.metrics.Accuracy, - 'recall': smp.utils.metrics.Recall, - 'precision': smp.utils.metrics.Precision, +METRICS: dict[str, type[Metric]] = { + "iou_score": smp.utils.metrics.IoU, + "fscore": smp.utils.metrics.Fscore, + "accuracy": smp.utils.metrics.Accuracy, + "recall": smp.utils.metrics.Recall, + "precision": smp.utils.metrics.Precision, } diff --git a/segmentation/polus-smp-training-plugin/tests/__init__.py b/segmentation/polus-smp-training-plugin/tests/__init__.py index e236ad89e..fd0cc6634 100644 --- a/segmentation/polus-smp-training-plugin/tests/__init__.py +++ b/segmentation/polus-smp-training-plugin/tests/__init__.py @@ -1,4 +1,5 @@ from unittest import TestSuite + from tests.model_test import ModelTest from tests.tile_test import TileTest diff --git a/segmentation/polus-smp-training-plugin/tests/model_test.py b/segmentation/polus-smp-training-plugin/tests/model_test.py index 8597dbd51..cd5f4a6ff 100644 --- a/segmentation/polus-smp-training-plugin/tests/model_test.py +++ b/segmentation/polus-smp-training-plugin/tests/model_test.py @@ -1,17 +1,11 @@ +import os +import sys import unittest -import sys, os, json -import copy - -from PIL import Image import numpy as np -import matplotlib.pyplot as plt - sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from sklearn.metrics import fbeta_score, jaccard_score -from tqdm import tqdm import csv @@ -20,143 +14,85 @@ polus_dir = "/home/vihanimm/SegmentationModelToolkit/workdir/SMP_Pipeline/polus-plugins/segmentation/polus-smp-training-plugin/" sys.path.append(polus_dir) -from src.utils import LocalNorm -from src.utils import Dataset -from src.utils import MODELS -from src.training import initialize_model -from src.training import initialize_dataloader -from src.utils import METRICS -from src.utils import LOSSES -import torch modelDir = "/home/vihanimm/SegmentationModelToolkit/workdir/output_SMP/Unet-MCCLoss-resnet18-random-Adam" -images_test_dir = "/home/vihanimm/SegmentationModelToolkit/Data/tif_data/nuclear/test/image/" +images_test_dir = ( + "/home/vihanimm/SegmentationModelToolkit/Data/tif_data/nuclear/test/image/" +) labels_test_dir = "/home/vihanimm/SegmentationModelToolkit/Data/tif_data/nuclear/test/groundtruth_centerbinary_1pixelsmaller/" -def getLogs(logCSV : str, - patience : int, - log_dict : dict) -> dict: - - with open(logCSV, 'r') as csvfile: - for row in list(csv.reader(csvfile))[-1*patience:]: + +def getLogs(logCSV: str, patience: int, log_dict: dict) -> dict: + with open(logCSV) as csvfile: + for row in list(csv.reader(csvfile))[-1 * patience :]: for parameter in row: parameter = parameter.split(":") key = parameter[0].strip(" ") value = float(parameter[1].strip(" ")) - log_dict[key]['all'].append(value) - log_dict[key]['avg'] += value/patience - log_dict[key]['mini'] = np.minimum(log_dict[key]['mini'], value) - log_dict[key]['maxi'] = np.maximum(log_dict[key]['maxi'], value) - + log_dict[key]["all"].append(value) + log_dict[key]["avg"] += value / patience + log_dict[key]["mini"] = np.minimum(log_dict[key]["mini"], value) + log_dict[key]["maxi"] = np.maximum(log_dict[key]["maxi"], value) + return log_dict -# num_images = os.listdir(images_test_dir) -# num_labels = os.listdir(labels_test_dir) -# assert num_images == num_labels -# bestmodelPath = os.path.join(modelDir, "model.pth") -# configPath = os.path.join(modelDir, "config.json") -# configObj = open(configPath, 'r') -# configDict = json.load(configObj) -# bestmodel = torch.load(bestmodelPath).cpu() -# loss = configDict['lossName'] -# patience = configDict['patience'] -# maxEpochs = configDict['maxEpochs'] -# metrics = list(metric() for metric in METRICS.values()) -# metric_loss = LOSSES[loss]() -# metric_loss.__name__ = loss -# metrics.append(metric_loss) -# metric_outputs = {} -# trainlog_dict = {} -# validlog_dict = {} # for metric in metrics: -# metric_outputs[metric.__name__] = {'avg': 0, 'maxi': 0, 'mini': 1} -# trainlog_dict[metric.__name__] = {'all': [], 'avg': 0, 'maxi': 0, 'mini': 1} -# validlog_dict[metric.__name__] = {'all': [], 'avg': 0, 'maxi': 0, 'mini': 1} -# traincsv_path = os.path.join(modelDir, "trainlogs.csv") -# validcsv_path = os.path.join(modelDir, "validlogs.csv") -# trainlog_dict = getLogs(logCSV = traincsv_path, patience = patience, log_dict = trainlog_dict) -# validlog_dict = getLogs(logCSV = validcsv_path, patience = patience, log_dict = validlog_dict) -# test_loader = tqdm(initialize_dataloader( -# images_dir=images_test_dir, -# labels_dir=labels_test_dir, -# pattern="nuclear_test_61{x}.tif", -# batch_size=1, -# mode="validation" -# )) -# test_loader_len = len(test_loader) # for test in test_loader: -# test0 = test[0] -# test1 = test[1] -# pr_mask = bestmodel.predict(test0) # for metric in metrics: -# try: -# metric_value = (METRICS[metric.__name__].forward(self=metric, y_pr=pr_mask, y_gt=test1).numpy()) # except: -# metric_value = (LOSSES[metric.__name__].forward(self=metric, y_pred=pr_mask, y_true=test1).numpy()) -# metric_outputs[metric.__name__]['avg'] += metric_value/test_loader_len -# metric_outputs[metric.__name__]['mini'] = np.minimum(metric_value, metric_outputs[metric.__name__]['mini']) -# metric_outputs[metric.__name__]['maxi'] = np.maximum(metric_value, metric_outputs[metric.__name__]['maxi']) class ModelTest(unittest.TestCase): - def test_accuracy(self): - print("acc") metric_name = "accuracy" - metric_avg = metric_outputs[metric_name]['avg'] - self.assertTrue(metric_avg > trainlog_dict[metric_name]['mini']) - self.assertTrue(metric_avg > trainlog_dict[metric_name]['avg'] - (trainlog_dict[metric_name]['avg'] * .1)) + metric_avg = metric_outputs[metric_name]["avg"] + assert metric_avg > trainlog_dict[metric_name]["mini"] + assert metric_avg > trainlog_dict[metric_name]["avg"] - trainlog_dict[metric_name]["avg"] * 0.1 def test_iou_score(self): - metric_name = "iou_score" - metric_avg = metric_outputs[metric_name]['avg'] - self.assertTrue(metric_avg > trainlog_dict[metric_name]['mini']) - self.assertTrue(metric_avg > trainlog_dict[metric_name]['avg'] - (trainlog_dict[metric_name]['avg'] * .1)) + metric_avg = metric_outputs[metric_name]["avg"] + assert metric_avg > trainlog_dict[metric_name]["mini"] + assert metric_avg > trainlog_dict[metric_name]["avg"] - trainlog_dict[metric_name]["avg"] * 0.1 def test_fscore(self): - metric_name = "fscore" - metric_avg = metric_outputs[metric_name]['avg'] - self.assertTrue(metric_avg > trainlog_dict[metric_name]['mini']) - self.assertTrue(metric_avg > trainlog_dict[metric_name]['avg'] - (trainlog_dict[metric_name]['avg'] * .1)) + metric_avg = metric_outputs[metric_name]["avg"] + assert metric_avg > trainlog_dict[metric_name]["mini"] + assert metric_avg > trainlog_dict[metric_name]["avg"] - trainlog_dict[metric_name]["avg"] * 0.1 def test_recall(self): - metric_name = "recall" - metric_avg = metric_outputs[metric_name]['avg'] - self.assertTrue(metric_avg > trainlog_dict[metric_name]['mini']) - self.assertTrue(metric_avg > trainlog_dict[metric_name]['avg'] - (trainlog_dict[metric_name]['avg'] * .1)) + metric_avg = metric_outputs[metric_name]["avg"] + assert metric_avg > trainlog_dict[metric_name]["mini"] + assert metric_avg > trainlog_dict[metric_name]["avg"] - trainlog_dict[metric_name]["avg"] * 0.1 def test_precision(self): - metric_name = "precision" - metric_avg = metric_outputs[metric_name]['avg'] - self.assertTrue(metric_avg > trainlog_dict[metric_name]['mini']) - self.assertTrue(metric_avg > trainlog_dict[metric_name]['avg'] - (trainlog_dict[metric_name]['avg'] * .1)) + metric_avg = metric_outputs[metric_name]["avg"] + assert metric_avg > trainlog_dict[metric_name]["mini"] + assert metric_avg > trainlog_dict[metric_name]["avg"] - trainlog_dict[metric_name]["avg"] * 0.1 def test_loss(self): - metric_name = loss - metric_avg = metric_outputs[metric_name]['avg'] - self.assertTrue(metric_avg > trainlog_dict[metric_name]['mini']) - self.assertTrue(metric_avg > trainlog_dict[metric_name]['avg'] - (trainlog_dict[metric_name]['avg'] * .1)) + metric_avg = metric_outputs[metric_name]["avg"] + assert metric_avg > trainlog_dict[metric_name]["mini"] + assert metric_avg > trainlog_dict[metric_name]["avg"] - trainlog_dict[metric_name]["avg"] * 0.1 -if __name__=="__main__": +if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/segmentation/polus-smp-training-plugin/tests/tile_test.py b/segmentation/polus-smp-training-plugin/tests/tile_test.py index 22464388d..feafc761d 100644 --- a/segmentation/polus-smp-training-plugin/tests/tile_test.py +++ b/segmentation/polus-smp-training-plugin/tests/tile_test.py @@ -1,39 +1,35 @@ import unittest -from src.utils import Tile, UnTile - import torch +from src.utils import Tile +from src.utils import UnTile + class TileTest(unittest.TestCase): - def test_tile(self): - params = [ - ((5,1,1080,1080),(512,512)), - ((5,3,1080,1080),(512,512)), - ((5,1,1080,1000),(512,512)), - ((5,1,1080,1024),(512,512)), - ((5,1,480,480),(512,512)), - ((5,1,1080,1080),(300,512)), - ((5,1,1080,1000),(512,101)) + ((5, 1, 1080, 1080), (512, 512)), + ((5, 3, 1080, 1080), (512, 512)), + ((5, 1, 1080, 1000), (512, 512)), + ((5, 1, 1080, 1024), (512, 512)), + ((5, 1, 480, 480), (512, 512)), + ((5, 1, 1080, 1080), (300, 512)), + ((5, 1, 1080, 1000), (512, 101)), ] - - for input_shape,tile_size in params: - - with self.subTest(input_shape=input_shape,tile_size=tile_size): - + + for input_shape, tile_size in params: + with self.subTest(input_shape=input_shape, tile_size=tile_size): tile = Tile(tile_size=tile_size) untile = UnTile(tile_size=tile_size) - - image = torch.rand(5,1,1080,1080,dtype=torch.float32) - - image_tiled,input_shape = tile(image) - - image_untiled = untile(image_tiled,input_shape) - - assert torch.all(image==image_untiled) + + image = torch.rand(5, 1, 1080, 1080, dtype=torch.float32) + + image_tiled, input_shape = tile(image) + + image_untiled = untile(image_tiled, input_shape) + + assert torch.all(image == image_untiled) -if __name__=="__main__": +if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/transforms/images/apply-flatfield-tool/.gitignore b/transforms/images/apply-flatfield-tool/.gitignore new file mode 100644 index 000000000..5b8869373 --- /dev/null +++ b/transforms/images/apply-flatfield-tool/.gitignore @@ -0,0 +1,37 @@ +# Created by pytest automatically. + +# Python cache + +__pycache__/ + +*.pyo + +*.pyd + +.DS_Store + +.pytest_cache/ + +*.pyc + +uv.lock +pre-commit-config.yaml + + + +# Virtual environments + +.venv/ + +venv/ + + + + +# Build artifacts + +build/ + +dist/ + +*.egg-info/ diff --git a/transforms/images/apply-flatfield-tool/Dockerfile b/transforms/images/apply-flatfield-tool/Dockerfile index b05d29200..bf8d52999 100644 --- a/transforms/images/apply-flatfield-tool/Dockerfile +++ b/transforms/images/apply-flatfield-tool/Dockerfile @@ -1,25 +1,16 @@ FROM polusai/bfio:2.3.3 -# environment variables defined in polusai/bfio ENV EXEC_DIR="/opt/executables" ENV POLUS_IMG_EXT=".ome.tif" ENV POLUS_TAB_EXT=".csv" ENV POLUS_LOG="INFO" -# Work directory defined in the base container WORKDIR ${EXEC_DIR} -# TODO: Change the tool_dir to the tool directory -ENV TOOL_DIR="transforms/images/apply-flatfield-tool" +# Build context = repo root (PolusAI-image-tools). Path is transforms/images/apply-flatfield-tool +COPY transforms/images/apply-flatfield-tool /opt/executables/app -# Copy the repository into the container -RUN mkdir image-tools -COPY . ${EXEC_DIR}/image-tools +RUN pip3 install --no-cache-dir /opt/executables/app -# Install the tool -RUN pip3 install "${EXEC_DIR}/image-tools/${TOOL_DIR}" --no-cache-dir - -# Set the entrypoint -# TODO: Change the entrypoint to the tool entrypoint ENTRYPOINT ["python3", "-m", "polus.images.transforms.images.apply_flatfield"] CMD ["--help"] diff --git a/transforms/images/apply-flatfield-tool/pyproject.toml b/transforms/images/apply-flatfield-tool/pyproject.toml index cf82fd99d..334d8a502 100644 --- a/transforms/images/apply-flatfield-tool/pyproject.toml +++ b/transforms/images/apply-flatfield-tool/pyproject.toml @@ -12,11 +12,10 @@ packages = [{include = "polus", from = "src"}] [tool.poetry.dependencies] python = ">=3.9,<3.12" bfio = { version = "^2.3.3", extras = ["all"] } -filepattern = "2.0.4" -typer = { version = "^0.7.0", extras = ["all"] } +filepattern = ">=2.0.4" +typer = ">=0.23.0,<0.24.0" numpy = "^1.24.3" tqdm = "^4.65.0" -preadator = "0.4.0-dev2" [tool.poetry.group.dev.dependencies] bump2version = "^1.0.1" diff --git a/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/apply_flatfield.py b/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/apply_flatfield.py index d81ef71da..00abe1701 100644 --- a/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/apply_flatfield.py +++ b/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/apply_flatfield.py @@ -4,10 +4,10 @@ import operator import pathlib import typing +from concurrent.futures import ProcessPoolExecutor import bfio import numpy -import preadator import tqdm from filepattern import FilePattern @@ -145,18 +145,11 @@ def _unshade_batch( df_image: component to be used for flatfield correction """ # Load images - with preadator.ProcessManager( - name="unshade_batch::load", - num_processes=utils.MAX_WORKERS, - threads_per_process=2, - ) as load_executor: - load_futures = [] - for i, inp_path in enumerate(batch_paths): - load_futures.append( - load_executor.submit_process(utils.load_img, inp_path, i), - ) - - load_executor.join_processes() + with ProcessPoolExecutor(max_workers=utils.MAX_WORKERS) as load_executor: + load_futures = [ + load_executor.submit(utils.load_img, inp_path, i) + for i, inp_path in enumerate(batch_paths) + ] images = [f.result() for f in load_futures] images = [img for _, img in sorted(images, key=operator.itemgetter(0))] @@ -169,11 +162,10 @@ def _unshade_batch( img_stack /= ff_image + 1e-8 # Save outputs - with preadator.ProcessManager( - name="unshade_batch::save", - num_processes=utils.MAX_WORKERS, - threads_per_process=2, - ) as save_executor: - for inp_path, img in zip(batch_paths, img_stack): - save_executor.submit_process(utils.save_img, inp_path, img, out_dir) - save_executor.join_processes() + with ProcessPoolExecutor(max_workers=utils.MAX_WORKERS) as save_executor: + save_futures = [ + save_executor.submit(utils.save_img, inp_path, img, out_dir) + for inp_path, img in zip(batch_paths, img_stack) + ] + for f in save_futures: + f.result() diff --git a/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/__main__.py b/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/__main__.py index 204705a9a..11fd9dc6c 100644 --- a/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/__main__.py +++ b/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/__main__.py @@ -7,13 +7,10 @@ import typer from filepattern import FilePattern - -from polus.images.transforms.images.binary_operations import ( - Operation, - StructuringShape, - __version__, - batch_binary_ops, -) +from polus.images.transforms.images.binary_operations import Operation +from polus.images.transforms.images.binary_operations import StructuringShape +from polus.images.transforms.images.binary_operations import __version__ +from polus.images.transforms.images.binary_operations import batch_binary_ops app = typer.Typer() @@ -29,14 +26,14 @@ @app.command() def _main( pattern: str = typer.Option( - ".+", "--filePattern", help="FilePattern used to parse data." + ".+", "--filePattern", help="FilePattern used to parse data.", ), inp_dir: pathlib.Path = typer.Option(..., "--inpDir", help="Path to input images."), out_dir: pathlib.Path = typer.Option( - ..., "--outDir", help="Path to place output files." + ..., "--outDir", help="Path to place output files.", ), operation: Operation = typer.Option( - ..., "--operation", help="Binary operation to perform." + ..., "--operation", help="Binary operation to perform.", ), shape: StructuringShape = typer.Option( StructuringShape.ELLIPSE, @@ -58,7 +55,7 @@ def _main( iterations (e.g. dilation).""", ), preview: bool = typer.Option( - False, help="Output a JSON preview of files generated by this tool." + False, help="Output a JSON preview of files generated by this tool.", ), ) -> None: """Advanced montaging tool.""" @@ -75,7 +72,7 @@ def _main( if preview: with open(pathlib.Path(out_dir).joinpath("preview.json"), "w") as fw: - out_files: typing.Dict[str, typing.Union[typing.List, str]] = { + out_files: dict[str, typing.Union[list, str]] = { r"filepattern": pattern, "outDir": [], } diff --git a/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/binops.py b/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/binops.py index ab981db66..a081732bb 100644 --- a/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/binops.py +++ b/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/binops.py @@ -3,30 +3,30 @@ import logging from enum import Enum from pathlib import Path -from typing import Optional, Union +from typing import Optional +from typing import Union import cv2 import numpy -from bfio import BioReader, BioWriter +from bfio import BioReader +from bfio import BioWriter from filepattern import FilePattern from preadator import ProcessManager -from .utils import ( - TileTuple, - blackhat, - close_, - dilate, - erode, - fill_holes, - invert, - iterate_tiles, - morphgradient, - open_, - remove_large, - remove_small, - skeletonize, - tophat, -) +from .utils import TileTuple +from .utils import blackhat +from .utils import close_ +from .utils import dilate +from .utils import erode +from .utils import fill_holes +from .utils import invert +from .utils import iterate_tiles +from .utils import morphgradient +from .utils import open_ +from .utils import remove_large +from .utils import remove_small +from .utils import skeletonize +from .utils import tophat logger = logging.getLogger(__name__) @@ -132,9 +132,8 @@ def binary_op( else: extra_arguments = None - out_image = OPERATION_DICT[operation](image, kernel=se, n=extra_arguments) + return OPERATION_DICT[operation](image, kernel=se, n=extra_arguments) - return out_image def _tile_thread( @@ -149,22 +148,21 @@ def _tile_thread( iterations: int = 1, threshold: Optional[int] = None, ): - with ProcessManager.thread(): - with BioReader(filepath) as br: - # read a tile of BioReader - tile = br[window_slice] + with ProcessManager.thread(), BioReader(filepath) as br: + # read a tile of BioReader + tile = br[window_slice] - out_tile = binary_op( - image=tile, - operation=operation, - structuring_shape=structuring_shape, - kernel=kernel, - iterations=iterations, - threshold=threshold, - ) + out_tile = binary_op( + image=tile, + operation=operation, + structuring_shape=structuring_shape, + kernel=kernel, + iterations=iterations, + threshold=threshold, + ) - # finalize the output - writer[step_slice] = out_tile[0:step_size, 0:step_size] + # finalize the output + writer[step_slice] = out_tile[0:step_size, 0:step_size] def scalable_binary_op( @@ -224,7 +222,7 @@ def scalable_binary_op( window_size: int = step_size + (2 * extra_padding) for window_slice, step_slice in iterate_tiles( - shape=bfio_shape, window_size=window_size, step_size=step_size + shape=bfio_shape, window_size=window_size, step_size=step_size, ): # info on the Slices for debugging ProcessManager.submit_thread( @@ -316,5 +314,5 @@ def batch_binary_ops( else: raise RuntimeError( "No data to process. Make sure the input directory is correct " - + "and that the filepattern matches files in the input directory." + + "and that the filepattern matches files in the input directory.", ) diff --git a/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/utils.py b/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/utils.py index 9701aa548..c6ee3294b 100644 --- a/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/utils.py +++ b/transforms/images/binary-operations-tool/src/polus/images/transforms/images/binary_operations/utils.py @@ -1,14 +1,15 @@ """Binary operations and processing utilities.""" import logging -from typing import Any, Generator, Tuple +from collections.abc import Generator +from typing import Any import cv2 import numpy as np logger = logging.getLogger("utils") -TileTuple = Tuple[slice, slice, slice, slice, slice] +TileTuple = tuple[slice, slice, slice, slice, slice] def invert(image: np.ndarray, **kwargs) -> np.ndarray: @@ -29,8 +30,7 @@ def dilate(image: np.ndarray, kernel: Any, n: int = 1) -> np.ndarray: Returns: The dilated image """ - dilatedimg = cv2.dilate(image, kernel, iterations=n) - return dilatedimg + return cv2.dilate(image, kernel, iterations=n) def erode(image: np.ndarray, kernel: Any, n: int = 1) -> np.ndarray: @@ -46,8 +46,7 @@ def erode(image: np.ndarray, kernel: Any, n: int = 1) -> np.ndarray: Returns: The eroded image. """ - erodedimg = cv2.erode(image, kernel, iterations=n) - return erodedimg + return cv2.erode(image, kernel, iterations=n) def open_(image: np.ndarray, kernel: int, n: int = 1) -> np.ndarray: @@ -63,8 +62,7 @@ def open_(image: np.ndarray, kernel: int, n: int = 1) -> np.ndarray: Returns: The opened image. """ - openimg = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) - return openimg + return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) def close_(image: np.ndarray, kernel: int, n: Any = 1) -> np.ndarray: @@ -80,8 +78,7 @@ def close_(image: np.ndarray, kernel: int, n: Any = 1) -> np.ndarray: Returns: The closed image. """ - closeimg = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) - return closeimg + return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) def morphgradient(image: np.ndarray, kernel: Any, n: int = 1) -> np.ndarray: @@ -98,8 +95,7 @@ def morphgradient(image: np.ndarray, kernel: Any, n: int = 1) -> np.ndarray: Returns: The morphological gradient of the input image. """ - mg = cv2.morphologyEx(image, cv2.MORPH_GRADIENT, kernel) - return mg + return cv2.morphologyEx(image, cv2.MORPH_GRADIENT, kernel) def fill_holes(image: np.ndarray, kernel: Any = None, n: int = 0) -> np.ndarray: @@ -119,15 +115,14 @@ def fill_holes(image: np.ndarray, kernel: Any = None, n: int = 0) -> np.ndarray: image_dtype = image.dtype image = cv2.convertScaleAbs(image) contour, _ = cv2.findContours( - image, mode=cv2.RETR_CCOMP, method=cv2.CHAIN_APPROX_SIMPLE + image, mode=cv2.RETR_CCOMP, method=cv2.CHAIN_APPROX_SIMPLE, ) for cnt in contour: cv2.drawContours(image, [cnt], 0, 1, -1) - image = image.astype(image_dtype) + return image.astype(image_dtype) - return image def skeletonize(image: np.ndarray, kernel: Any, n: int = 0) -> np.ndarray: @@ -172,8 +167,7 @@ def tophat(image: np.ndarray, kernel: Any, n: int = 0) -> np.ndarray: Returns: An image with tophat operation performed on it. """ - tophat = cv2.morphologyEx(image, cv2.MORPH_TOPHAT, kernel) - return tophat + return cv2.morphologyEx(image, cv2.MORPH_TOPHAT, kernel) def blackhat(image: np.ndarray, kernel: Any = None, n: int = 0) -> np.ndarray: @@ -187,8 +181,7 @@ def blackhat(image: np.ndarray, kernel: Any = None, n: int = 0) -> np.ndarray: Returns: An image with blackhat performed on it. """ - blackhat = cv2.morphologyEx(image, cv2.MORPH_BLACKHAT, kernel) - return blackhat + return cv2.morphologyEx(image, cv2.MORPH_BLACKHAT, kernel) def remove_small(image: np.ndarray, kernel: Any = None, n: int = 2) -> np.ndarray: @@ -208,9 +201,8 @@ def remove_small(image: np.ndarray, kernel: Any = None, n: int = 2) -> np.ndarra uniques[counts < n] = 0 - image_out = uniques[inverse].reshape(image.shape) + return uniques[inverse].reshape(image.shape) - return image_out def remove_large(image: np.ndarray, kernel: Any = None, n: int = 0) -> np.ndarray: @@ -231,14 +223,13 @@ def remove_large(image: np.ndarray, kernel: Any = None, n: int = 0) -> np.ndarra uniques[counts > n] = 0 - image_out = uniques[inverse].reshape(image.shape) + return uniques[inverse].reshape(image.shape) - return image_out def iterate_tiles( - shape: tuple, window_size: int, step_size: int -) -> Generator[Tuple[TileTuple, TileTuple], None, None]: + shape: tuple, window_size: int, step_size: int, +) -> Generator[tuple[TileTuple, TileTuple], None, None]: """Iterate through tiles of an image. Args: diff --git a/transforms/images/image-assembler-tool/.gitignore b/transforms/images/image-assembler-tool/.gitignore index 0185760d9..550d90010 100644 --- a/transforms/images/image-assembler-tool/.gitignore +++ b/transforms/images/image-assembler-tool/.gitignore @@ -4,4 +4,3 @@ poetry.lock out data - diff --git a/transforms/images/montage-tool/src/polus/images/transforms/images/montage/__main__.py b/transforms/images/montage-tool/src/polus/images/transforms/images/montage/__main__.py index 86bbab010..fc3bbf56d 100644 --- a/transforms/images/montage-tool/src/polus/images/transforms/images/montage/__main__.py +++ b/transforms/images/montage-tool/src/polus/images/transforms/images/montage/__main__.py @@ -6,13 +6,11 @@ import typing import typer - -from polus.images.transforms.images.montage import ( - __version__, - generate_montage_patterns, - montage_all, -) -from polus.images.transforms.images.montage.montage import MULTIPLIER, SPACING +from polus.images.transforms.images.montage import __version__ +from polus.images.transforms.images.montage import generate_montage_patterns +from polus.images.transforms.images.montage import montage_all +from polus.images.transforms.images.montage.montage import MULTIPLIER +from polus.images.transforms.images.montage.montage import SPACING app = typer.Typer() @@ -28,17 +26,17 @@ @app.command() def _main( pattern: str = typer.Option( - ..., "--filePattern", help="FilePattern used to parse data." + ..., "--filePattern", help="FilePattern used to parse data.", ), inp_dir: pathlib.Path = typer.Option( - ..., "--inpDir", help="Specify montage organization." + ..., "--inpDir", help="Specify montage organization.", ), - layout: typing.List[str] = typer.Option( - ..., "--layout", help="Specify montage organization." + layout: list[str] = typer.Option( + ..., "--layout", help="Specify montage organization.", ), out_dir: pathlib.Path = typer.Option(..., "--outDir", help="Output collection."), - flip_axis: typing.List[str] = typer.Option( - [], "--flipAxis", help="Axes to flip or reverse order." + flip_axis: list[str] = typer.Option( + [], "--flipAxis", help="Axes to flip or reverse order.", ), image_spacing: int = typer.Option( SPACING, @@ -51,7 +49,7 @@ def _main( help="Spacing between values in the same grid level.", ), preview: bool = typer.Option( - False, help="Output a JSON preview of files generated by this tool." + False, help="Output a JSON preview of files generated by this tool.", ), ) -> None: """Advanced montaging tool.""" @@ -59,7 +57,7 @@ def _main( # Parse the arguments if len(flip_axis) == 1: - flip_axis = [v for v in flip_axis[0]] + flip_axis = list(flip_axis[0]) if len(layout) == 1: layout = layout[0].replace(" ", "").split(",") @@ -75,7 +73,7 @@ def _main( if preview: with open(pathlib.Path(out_dir).joinpath("preview.json"), "w") as fw: subpatterns = generate_montage_patterns(pattern, inp_dir, layout) - out_files: typing.Dict[str, typing.Union[typing.List, str]] = { + out_files: dict[str, typing.Union[list, str]] = { r"filepattern": "img-global-positions-{p:d+}.txt", "outDir": [], } @@ -85,7 +83,7 @@ def _main( else: montage_all( - pattern, inp_dir, layout, flip_axis, out_dir, image_spacing, grid_spacing + pattern, inp_dir, layout, flip_axis, out_dir, image_spacing, grid_spacing, ) diff --git a/transforms/images/montage-tool/src/polus/images/transforms/images/montage/montage.py b/transforms/images/montage-tool/src/polus/images/transforms/images/montage/montage.py index 8b79ae3d6..b051f354f 100644 --- a/transforms/images/montage-tool/src/polus/images/transforms/images/montage/montage.py +++ b/transforms/images/montage-tool/src/polus/images/transforms/images/montage/montage.py @@ -2,16 +2,15 @@ import logging import math import pathlib -from typing import Dict, List, Optional, Tuple, Union +from typing import Optional +from typing import Union from bfio import BioReader from filepattern import FilePattern -from .utils import ( - DictWriter, - VectorWriter, - subpattern, -) +from .utils import DictWriter +from .utils import VectorWriter +from .utils import subpattern logger = logging.getLogger(__name__) @@ -20,7 +19,7 @@ def _get_xy_index( - files: list[dict], dims: str, layout: list[str], flip_axis: List[str] + files: list[dict], dims: str, layout: list[str], flip_axis: list[str], ): """Get the x and y indices from a list of filename dictionaries. @@ -101,8 +100,8 @@ def _get_xy_index( def image_position( - index: Dict[str, int], layout_dimensions: Dict[str, List] -) -> Tuple[int, int, int, int]: + index: dict[str, int], layout_dimensions: dict[str, list], +) -> tuple[int, int, int, int]: """Calculate the image position in the montage from a set of dimensions. Args: @@ -141,13 +140,13 @@ def image_position( def montage( pattern: str, inp_dir: pathlib.Path, - layout_list: List[str], + layout_list: list[str], out_dir: pathlib.Path, image_spacing: int = SPACING, grid_spacing: int = MULTIPLIER, - flip_axis: List[str] = [], + flip_axis: list[str] = [], file_index: int = -1, -) -> Optional[Dict[str, Union[int, str]]]: +) -> Optional[dict[str, Union[int, str]]]: """Generate montage positions for a collection of images. This function generates a single stitching vector for a collection of images to @@ -171,7 +170,7 @@ def montage( fp = FilePattern(inp_dir, pattern, suppress_warnings=True) # Layout dimensions, used to calculate positions later on - layout_dimensions: Dict[str, list] = { + layout_dimensions: dict[str, list] = { "grid_size": [ [] for r in range(len(layout_list)) ], # number of tiles in each dimension in the subgrid @@ -208,10 +207,10 @@ def montage( # Set the pixel and tile dimensions layout_dimensions["tile_size"][len(layout_list) - 1].append( - [grid_width, grid_height] + [grid_width, grid_height], ) layout_dimensions["size"][len(layout_list) - 1].append( - [grid_width * grid_size[0], grid_height * grid_size[1]] + [grid_width * grid_size[0], grid_height * grid_size[1]], ) # Find the largest subgrid size for the lowest subgrid @@ -310,8 +309,8 @@ def montage( def generate_montage_patterns( pattern: str, inp_dir: pathlib.Path, - layout_list: List[str], -) -> List[str]: + layout_list: list[str], +) -> list[str]: """Generate filepatterns from an existing filepattern, one for each montage.""" # Set up the file pattern parser fp = FilePattern(inp_dir, pattern) @@ -330,7 +329,7 @@ def generate_montage_patterns( sp = [] for files in planes: - sp.append(subpattern(filepattern=pattern, values={k: v for k, v in files[0]})) + sp.append(subpattern(filepattern=pattern, values=dict(files[0]))) return sp @@ -338,8 +337,8 @@ def generate_montage_patterns( def montage_all( pattern: str, inp_dir: pathlib.Path, - layout: List[str], - flip_axis: List[str], + layout: list[str], + flip_axis: list[str], out_dir: pathlib.Path, image_spacing: int = SPACING, grid_spacing: int = MULTIPLIER, @@ -349,10 +348,11 @@ def montage_all( for lt in layout: if len(lt) > 2 or len(lt) < 1: logger.error( - "Each layout subgrid must have one or two variables assigned to it." + "Each layout subgrid must have one or two variables assigned to it.", ) + msg = "Each layout subgrid must have one or two variables assigned to it." raise ValueError( - "Each layout subgrid must have one or two variables assigned to it." + msg, ) patterns = generate_montage_patterns(pattern, inp_dir, layout) diff --git a/transforms/images/montage-tool/src/polus/images/transforms/images/montage/utils.py b/transforms/images/montage-tool/src/polus/images/transforms/images/montage/utils.py index 35db8ee1a..e689a174f 100644 --- a/transforms/images/montage-tool/src/polus/images/transforms/images/montage/utils.py +++ b/transforms/images/montage-tool/src/polus/images/transforms/images/montage/utils.py @@ -1,10 +1,11 @@ """Utilities for the image montaging utility.""" import re from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Optional +from typing import Union -def subpattern(filepattern: str, values: Dict[str, Union[int, str]]) -> str: +def subpattern(filepattern: str, values: dict[str, Union[int, str]]) -> str: """Generate a filepattern, replacing variables with defined values. This function takes in a filepattern and defined static values, generating a new @@ -52,7 +53,7 @@ class VectorWriter: string = "file: {}; corr: {}; position: ({}, {}); grid: ({}, {});\n" - def __init__(self, path: Path): + def __init__(self, path: Path) -> None: """Initialize a stitching vector writer. Args: @@ -85,7 +86,7 @@ def write( grid_y: The y-grid position. """ self.fo.write( - self.string.format(file_name, correlation, pos_x, pos_y, grid_x, grid_y) + self.string.format(file_name, correlation, pos_x, pos_y, grid_x, grid_y), ) def __exit__(self, exc_type, exc_val, exc_tb): # noqa @@ -98,7 +99,7 @@ def __del__(self): # noqa class DictWriter: """A dictionary writer for stitching vectors.""" - def __init__(self, path: Optional[Path] = None): + def __init__(self, path: Optional[Path] = None) -> None: """Initialize a dictionary vector writer. The primary purpose of this is for in-memory abstraction of a stitching vector. @@ -106,7 +107,7 @@ def __init__(self, path: Optional[Path] = None): Args: path: Not used for this writer. """ - self.fh: List[Dict[str, Union[str, int]]] = [] + self.fh: list[dict[str, Union[str, int]]] = [] def __enter__(self): # noqa return self @@ -130,7 +131,7 @@ def write( grid_x: The x-grid position. grid_y: The y-grid position. """ - data: Dict[str, Union[str, int]] = { + data: dict[str, Union[str, int]] = { "file_name": file_name, "correlation": correlation, "pox_x": pos_x, diff --git a/transforms/images/polus-apply-flatfield-plugin/VERSION b/transforms/images/polus-apply-flatfield-plugin/VERSION index 867e52437..26aaba0e8 100644 --- a/transforms/images/polus-apply-flatfield-plugin/VERSION +++ b/transforms/images/polus-apply-flatfield-plugin/VERSION @@ -1 +1 @@ -1.2.0 \ No newline at end of file +1.2.0 diff --git a/transforms/images/polus-apply-flatfield-plugin/build-docker.sh b/transforms/images/polus-apply-flatfield-plugin/build-docker.sh index ba7daefb6..df2cdd207 100755 --- a/transforms/images/polus-apply-flatfield-plugin/build-docker.sh +++ b/transforms/images/polus-apply-flatfield-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: - - """Start a process for each set of brightfield/darkfield/photobleach patterns""" + """Start a process for each set of brightfield/darkfield/photobleach patterns.""" # Create the FilePattern objects to handle file access ff_files = FilePattern(ffDir, brightPattern) fp = FilePattern(imgDir, imgPattern) - if darkPattern != None and darkPattern != "": + if darkPattern is not None and darkPattern != "": dark_files = FilePattern(ffDir, darkPattern) - if photoPattern != None and photoPattern != "": + if photoPattern is not None and photoPattern != "": photo_files = FilePattern( - str(Path(ffDir).parents[0].joinpath("metadata").absolute()), photoPattern + str(Path(ffDir).parents[0].joinpath("metadata").absolute()), photoPattern, ) group_by = [v for v in fp.variables if v not in ff_files.variables] - GROUPED = group_by + ["file"] + GROUPED = [*group_by, "file"] ProcessManager.init_processes("main", "unshade") logger.info(f"Running with {ProcessManager.num_processes()} processes.") for files in fp(group_by=group_by): - flat_path = ff_files.get_matching( - **{k.upper(): v for k, v in files[0].items() if k not in GROUPED} + **{k.upper(): v for k, v in files[0].items() if k not in GROUPED}, )[0]["file"] if flat_path is None: logger.warning("Could not find a flatfield image, skipping...") @@ -148,7 +143,7 @@ def main( if darkPattern is not None and darkPattern != "": dark_path = dark_files.get_matching( - **{k.upper(): v for k, v in files[0].items() if k not in GROUPED} + **{k.upper(): v for k, v in files[0].items() if k not in GROUPED}, )[0]["file"] if dark_path is None: @@ -157,7 +152,7 @@ def main( if photoPattern is not None and photoPattern != "": photo_path = photo_files.get_matching( - **{k.upper(): v for k, v in files[0].items() if k not in GROUPED} + **{k.upper(): v for k, v in files[0].items() if k not in GROUPED}, )[0]["file"] if photo_path is None: @@ -168,7 +163,6 @@ def main( unshade_batch(files, outDir, flat_path, dark_path, photo_path) - # ProcessManager.join_processes() if __name__ == "__main__": @@ -230,28 +224,28 @@ def main( required=False, ) parser.add_argument( - "--outDir", dest="outDir", type=str, help="Output collection", required=True + "--outDir", dest="outDir", type=str, help="Output collection", required=True, ) # Parse the arguments args = parser.parse_args() darkPattern = args.darkPattern - logger.info("darkPattern = {}".format(darkPattern)) + logger.info(f"darkPattern = {darkPattern}") ffDir = Path(args.ffDir) # catch the case that ffDir is the output within a workflow if Path(ffDir).joinpath("images").is_dir(): ffDir = ffDir.joinpath("images") - logger.info("ffDir = {}".format(ffDir)) + logger.info(f"ffDir = {ffDir}") brightPattern = args.brightPattern - logger.info("brightPattern = {}".format(brightPattern)) + logger.info(f"brightPattern = {brightPattern}") imgDir = Path(args.imgDir) - logger.info("imgDir = {}".format(imgDir)) + logger.info(f"imgDir = {imgDir}") imgPattern = args.imgPattern - logger.info("imgPattern = {}".format(imgPattern)) + logger.info(f"imgPattern = {imgPattern}") photoPattern = args.photoPattern - logger.info("photoPattern = {}".format(photoPattern)) + logger.info(f"photoPattern = {photoPattern}") outDir = Path(args.outDir) - logger.info("outDir = {}".format(outDir)) + logger.info(f"outDir = {outDir}") logger.info(f"Output file extension = {FILE_EXT}") diff --git a/transforms/images/polus-autocropping-plugin/VERSION b/transforms/images/polus-autocropping-plugin/VERSION index e6d5cb833..6d7de6e6a 100644 --- a/transforms/images/polus-autocropping-plugin/VERSION +++ b/transforms/images/polus-autocropping-plugin/VERSION @@ -1 +1 @@ -1.0.2 \ No newline at end of file +1.0.2 diff --git a/transforms/images/polus-autocropping-plugin/requirements.txt b/transforms/images/polus-autocropping-plugin/requirements.txt index 40aaca043..ce3d6937f 100644 --- a/transforms/images/polus-autocropping-plugin/requirements.txt +++ b/transforms/images/polus-autocropping-plugin/requirements.txt @@ -1,2 +1,2 @@ filepattern==1.4.7 -scipy>=1.7.1 \ No newline at end of file +scipy>=1.7.1 diff --git a/transforms/images/polus-autocropping-plugin/src/__init__.py b/transforms/images/polus-autocropping-plugin/src/__init__.py index 23c82fe21..c83d41efc 100644 --- a/transforms/images/polus-autocropping-plugin/src/__init__.py +++ b/transforms/images/polus-autocropping-plugin/src/__init__.py @@ -1,2 +1,2 @@ -from . import utils from . import autocrop +from . import utils diff --git a/transforms/images/polus-autocropping-plugin/src/autocrop.py b/transforms/images/polus-autocropping-plugin/src/autocrop.py index 03a91d580..8730b3e8f 100644 --- a/transforms/images/polus-autocropping-plugin/src/autocrop.py +++ b/transforms/images/polus-autocropping-plugin/src/autocrop.py @@ -1,7 +1,7 @@ import logging import random -from concurrent.futures import as_completed from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import as_completed from functools import reduce from pathlib import Path @@ -16,23 +16,23 @@ from utils import local_distogram as distogram logging.basicConfig( - format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S', + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", ) logger = logging.getLogger("autocrop") logger.setLevel(constants.POLUS_LOG) def calculate_strip_entropy( - *, - file_path: Path, - z_index: int, - strip_index: int, - along_x: bool, - direction: bool, - smoothing: bool, + *, + file_path: Path, + z_index: int, + strip_index: int, + along_x: bool, + direction: bool, + smoothing: bool, ) -> list[float]: - """ Get the entropy for each row/column in the indexed strip along the given + """Get the entropy for each row/column in the indexed strip along the given axis. A strip spans the entire length/width of the image. Args: @@ -46,22 +46,28 @@ def calculate_strip_entropy( Returns: A list of scores for each row in the strip. """ - histograms: list[list[distogram.Distogram]] = list() + histograms: list[list[distogram.Distogram]] = [] with BioReader(file_path) as reader: - for x_min, x_max, y_min, y_max in helpers.iter_strip(file_path, strip_index, along_x): + for x_min, x_max, y_min, y_max in helpers.iter_strip( + file_path, strip_index, along_x, + ): tile = numpy.asarray( - reader[y_min:y_max, x_min:x_max, z_index:z_index + 1, 0, 0], + reader[y_min:y_max, x_min:x_max, z_index : z_index + 1, 0, 0], dtype=numpy.float32, ) if smoothing: - tile = scipy.ndimage.gaussian_filter(tile, sigma=1, mode='constant', cval=numpy.mean(tile)) + tile = scipy.ndimage.gaussian_filter( + tile, sigma=1, mode="constant", cval=numpy.mean(tile), + ) # It is simpler to work with tiles of shape (strip_width, :) so we can # always iterate over the 0th axis to get the rows/columns of the image. tile = tile if along_x else numpy.transpose(tile) - row_range = range(tile.shape[0]) if direction else reversed(range(tile.shape[0])) + row_range = ( + range(tile.shape[0]) if direction else reversed(range(tile.shape[0])) + ) # Create a distogram for each row in the tile. We use more binds for now # and later merge into a distogram with fewer bins @@ -77,17 +83,23 @@ def calculate_strip_entropy( # In case the last tile had fewer rows than other tiles, # simply pad the list with empty Distograms. - histograms[-1].extend([ - distogram.Distogram(bin_count=constants.MAX_BINS, weighted_diff=constants.WEIGHTED_BINS) - for _ in range(len(histograms[0]) - len(histograms[-1])) - ]) + histograms[-1].extend( + [ + distogram.Distogram( + bin_count=constants.MAX_BINS, weighted_diff=constants.WEIGHTED_BINS, + ) + for _ in range(len(histograms[0]) - len(histograms[-1])) + ], + ) # Merge the Distograms for the same row from across the strip. histograms: list[distogram.Distogram] = [ reduce( lambda residual, value: distogram.merge(residual, value), row_histograms, - distogram.Distogram(bin_count=constants.MAX_BINS, weighted_diff=constants.WEIGHTED_BINS), + distogram.Distogram( + bin_count=constants.MAX_BINS, weighted_diff=constants.WEIGHTED_BINS, + ), ) for row_histograms in zip(*histograms) ] @@ -95,20 +107,19 @@ def calculate_strip_entropy( # Now that each row has its own Distogram, we can compute the entropy of # each row. strip_entropy: list[float] = [ - scipy.stats.entropy([c for _, c in histogram.bins]) - for histogram in histograms + scipy.stats.entropy([c for _, c in histogram.bins]) for histogram in histograms ] return strip_entropy def find_gradient_spike_xy( - file_path: Path, - z_index: int, - along_x: bool, - direction: bool, - smoothing: bool, + file_path: Path, + z_index: int, + along_x: bool, + direction: bool, + smoothing: bool, ) -> int: - """ Find the index of the row/column, after padding, of the first large + """Find the index of the row/column, after padding, of the first large spike in the gradient of entropy of rows/columns. Args: @@ -130,55 +141,65 @@ def find_gradient_spike_xy( num_strips += 1 # In case we are going backward, reverse the strip indices. - strip_indices = list(range(num_strips) if direction else reversed(range(num_strips))) + strip_indices = list( + range(num_strips) if direction else reversed(range(num_strips)), + ) # We don't want to look too deep into the image. If we go through # too many strips, we will just use a high percentile gradient value. deepest_strip = max(1, len(strip_indices) // 4) - raw_entropies = list() - smoothed_gradients = list() + raw_entropies = [] + smoothed_gradients = [] for i, index in enumerate(strip_indices[:deepest_strip]): logger.info( - f'Checking strip {index + 1} of {len(strip_indices)} ' - f'along {"x" if along_x else "y"}-axis in the {z_index}-slice...' + f"Checking strip {index + 1} of {len(strip_indices)} " + f'along {"x" if along_x else "y"}-axis in the {z_index}-slice...', ) - raw_entropies.extend(calculate_strip_entropy( - file_path=file_path, - z_index=z_index, - strip_index=index, - along_x=along_x, - direction=direction, - smoothing=smoothing, - )) + raw_entropies.extend( + calculate_strip_entropy( + file_path=file_path, + z_index=z_index, + strip_index=index, + along_x=along_x, + direction=direction, + smoothing=smoothing, + ), + ) smoothed_gradients = helpers.smoothed_gradients(raw_entropies) index_val = helpers.find_spike(smoothed_gradients, constants.GRADIENT_THRESHOLD) if index_val is None: - raw_entropies = raw_entropies[-(1 + 2 * constants.WINDOW_SIZE):] + raw_entropies = raw_entropies[-(1 + 2 * constants.WINDOW_SIZE) :] else: break else: # There was no break in the loop, i.e. no high gradient was found. - logger.debug(f'Gradient threshold {constants.GRADIENT_THRESHOLD:.2e} was too high. ' - f'Using {constants.GRADIENT_PERCENTILE}th percentile instead...') - threshold = numpy.percentile(smoothed_gradients, q=constants.GRADIENT_PERCENTILE) + logger.debug( + f"Gradient threshold {constants.GRADIENT_THRESHOLD:.2e} was too high. " + f"Using {constants.GRADIENT_PERCENTILE}th percentile instead...", + ) + threshold = numpy.percentile( + smoothed_gradients, q=constants.GRADIENT_PERCENTILE, + ) index_val = helpers.find_spike(smoothed_gradients, float(threshold)) stop = index_val[0] if direction else end - index_val[0] - logger.debug(f'Found gradient spike at index {stop} along axis {"x" if along_x else "y"}') + logger.debug( + f'Found gradient spike at index {stop} along axis {"x" if along_x else "y"}', + ) return stop def estimate_slice_entropies_thread( - file_path: Path, - smoothing: bool, - z_index: int, + file_path: Path, + smoothing: bool, + z_index: int, ) -> distogram.Distogram: tile_indices = list(helpers.iter_tiles_2d(file_path)) if len(tile_indices) > 25: tile_indices = list(random.sample(tile_indices, 25)) - tile_histograms = list() + tile_histograms = [] with BioReader(file_path) as reader: for x_min, x_max, y_min, y_max in tile_indices: @@ -187,18 +208,24 @@ def estimate_slice_entropies_thread( dtype=numpy.float32, ) if smoothing: - tile = scipy.ndimage.gaussian_filter(tile, sigma=1, mode='constant', cval=numpy.mean(tile)) + tile = scipy.ndimage.gaussian_filter( + tile, sigma=1, mode="constant", cval=numpy.mean(tile), + ) - tile_histograms.append(helpers.distogram_from_batch( - tile.flat, - constants.MAX_BINS * 2, - constants.WEIGHTED_BINS, - )) + tile_histograms.append( + helpers.distogram_from_batch( + tile.flat, + constants.MAX_BINS * 2, + constants.WEIGHTED_BINS, + ), + ) return reduce( lambda residual, value: distogram.merge(residual, value), tile_histograms, - distogram.Distogram(bin_count=constants.MAX_BINS, weighted_diff=constants.WEIGHTED_BINS), + distogram.Distogram( + bin_count=constants.MAX_BINS, weighted_diff=constants.WEIGHTED_BINS, + ), ) @@ -207,7 +234,7 @@ def estimate_slice_entropies(file_path: Path, smoothing: bool) -> list[float]: z_end = reader.Z # Find a bounding box for each image in the group. - slice_histograms = list() + slice_histograms = [] with ProcessPoolExecutor(max_workers=constants.NUM_THREADS) as executor: processes = [ executor.submit( @@ -228,11 +255,11 @@ def estimate_slice_entropies(file_path: Path, smoothing: bool) -> list[float]: def determine_bounding_box_thread( - file_path: Path, - smoothing: bool, - crop_y: bool, - crop_x: bool, - z_index: int, + file_path: Path, + smoothing: bool, + crop_y: bool, + crop_x: bool, + z_index: int, ) -> tuple[int, int, int, int]: with BioReader(file_path) as reader: x_end, y_end, z_end = reader.X, reader.Y, reader.Z @@ -253,11 +280,11 @@ def determine_bounding_box_thread( def determine_bounding_box( - file_path: Path, - crop_axes: tuple[bool, bool, bool], - smoothing: bool, + file_path: Path, + crop_axes: tuple[bool, bool, bool], + smoothing: bool, ) -> helpers.BoundingBox: - """ Using the gradient of entropy values of rows/columns in an image, + """Using the gradient of entropy values of rows/columns in an image, determine the bounding-box around the region of the image which contains useful information. @@ -271,10 +298,10 @@ def determine_bounding_box( Returns: A 4-tuple of integers representing a bounding-box. """ - logger.info(f'Finding bounding_box for {file_path.name}...') + logger.info(f"Finding bounding_box for {file_path.name}...") crop_x, crop_y, crop_z = crop_axes - bounding_boxes: list[helpers.BoundingBox] = list() + bounding_boxes: list[helpers.BoundingBox] = [] with BioReader(file_path) as reader: x_end, y_end, z_end = reader.X, reader.Y, reader.Z @@ -293,13 +320,13 @@ def _find_spike(values: list[float]) -> int: z1 = _find_spike(slice_entropies) z2 = z_end - _find_spike(list(reversed(slice_entropies))) except IndexError as e: - logger.error(f'entropies {slice_entropies} produced index error {e}') + logger.error(f"entropies {slice_entropies} produced index error {e}") raise e else: z1, z2 = 0, z_end # Find a bounding box for each z-slice in the image. - bounding_boxes_2d = list() + bounding_boxes_2d = [] with ProcessPoolExecutor(max_workers=constants.NUM_THREADS) as executor: processes = [ executor.submit( @@ -315,18 +342,17 @@ def _find_spike(values: list[float]) -> int: for process in as_completed(processes): bounding_boxes_2d.append(process.result()) - bounding_boxes.extend([ - (z1, z2, y1, y2, x1, x2) - for y1, y2, x1, x2 in bounding_boxes_2d - ]) + bounding_boxes.extend( + [(z1, z2, y1, y2, x1, x2) for y1, y2, x1, x2 in bounding_boxes_2d], + ) bounding_box = helpers.bounding_box_superset(bounding_boxes) - logger.info(f'Determined {bounding_box = } for {file_path.name}') + logger.info(f"Determined {bounding_box = } for {file_path.name}") return bounding_box def verify_group_shape(file_paths: list[Path]): - """ Verifies that all given images have the same x, y, and z dimensions. + """Verifies that all given images have the same x, y, and z dimensions. Args: file_paths: A list of file-paths that belong to the same group. @@ -338,22 +364,21 @@ def verify_group_shape(file_paths: list[Path]): depths.add(reader.Z), heights.add(reader.X), widths.add(reader.Y) if len(depths) > 1 or len(heights) > 1 or len(widths) > 1: - message = 'Group contains images which have different dimensions.' + message = "Group contains images which have different dimensions." logger.error(message) raise ValueError(message) - logger.info(f'Starting from shape {(depths.pop(), heights.pop(), widths.pop())}...') - return + logger.info(f"Starting from shape {(depths.pop(), heights.pop(), widths.pop())}...") def crop_image_group( - *, - file_paths: list[Path], - crop_axes: tuple[bool, bool, bool], - smoothing: bool, - output_dir: Path, + *, + file_paths: list[Path], + crop_axes: tuple[bool, bool, bool], + smoothing: bool, + output_dir: Path, ): - """ Given a list of file-paths to images in the same group, crop those + """Given a list of file-paths to images in the same group, crop those images and write the results in the given output directory. Args: @@ -365,7 +390,7 @@ def crop_image_group( verify_group_shape(file_paths) # Find a bounding box for each image in the group. - bounding_boxes = list() + bounding_boxes = [] with ProcessPoolExecutor(max_workers=constants.NUM_THREADS) as executor: processes = { executor.submit(determine_bounding_box, file_path, crop_axes, smoothing) @@ -376,15 +401,14 @@ def crop_image_group( bounding_box = helpers.bounding_box_superset(bounding_boxes) write_cropped_images(file_paths, output_dir, bounding_box) - return def write_cropped_images( - file_paths: list[Path], - output_dir: Path, - bounding_box: helpers.BoundingBox, + file_paths: list[Path], + output_dir: Path, + bounding_box: helpers.BoundingBox, ): - """ Crops and writes the given group of images using the given bounding box. + """Crops and writes the given group of images using the given bounding box. Args: file_paths: A list of Paths for the input images. @@ -394,15 +418,17 @@ def write_cropped_images( """ z1, z2, y1, y2, x1, x2 = bounding_box out_depth, out_width, out_height = z2 - z1, y2 - y1, x2 - x1 - logger.info(f'Superset bounding {bounding_box = }...') - logger.info(f'Cropping to shape (z, y, x) = {out_depth, out_width, out_height}...') + logger.info(f"Superset bounding {bounding_box = }...") + logger.info(f"Cropping to shape (z, y, x) = {out_depth, out_width, out_height}...") for file_path in file_paths: out_path = output_dir.joinpath(helpers.replace_extension(file_path.name)) - logger.info(f'Writing {out_path.name}...') + logger.info(f"Writing {out_path.name}...") with BioReader(file_path) as reader: - with BioWriter(out_path, metadata=reader.metadata, max_workers=constants.NUM_THREADS) as writer: + with BioWriter( + out_path, metadata=reader.metadata, max_workers=constants.NUM_THREADS, + ) as writer: writer.Z = out_depth writer.Y = out_width writer.X = out_height @@ -421,13 +447,20 @@ def write_cropped_images( in_x_max = min(x2, in_x + constants.TILE_STRIDE) try: - tile = reader[in_y:in_y_max, in_x:in_x_max, z_in:z_in + 1, 0, 0] - writer[out_y:out_y_max, out_x:out_x_max, z_out:z_out + 1, 0, 0] = tile[:] + tile = reader[ + in_y:in_y_max, in_x:in_x_max, z_in : z_in + 1, 0, 0, + ] + writer[ + out_y:out_y_max, + out_x:out_x_max, + z_out : z_out + 1, + 0, + 0, + ] = tile[:] except AssertionError as e: logger.error( - f'failed to read tile {(in_y, in_y_max, in_x, in_x_max, z_in, z_in + 1) = }\n' - f'and write to {(out_y, out_y_max, out_x, out_x_max, z_out, z_out + 1) = }\n' - f'because {e}' + f"failed to read tile {(in_y, in_y_max, in_x, in_x_max, z_in, z_in + 1) = }\n" + f"and write to {(out_y, out_y_max, out_x, out_x_max, z_out, z_out + 1) = }\n" + f"because {e}", ) raise e - return diff --git a/transforms/images/polus-autocropping-plugin/src/main.py b/transforms/images/polus-autocropping-plugin/src/main.py index e8051b9ed..37efd3176 100644 --- a/transforms/images/polus-autocropping-plugin/src/main.py +++ b/transforms/images/polus-autocropping-plugin/src/main.py @@ -3,121 +3,169 @@ from pathlib import Path import filepattern - from autocrop import crop_image_group + from utils import constants logging.basicConfig( - format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S', + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", ) logger = logging.getLogger("main") logger.setLevel(constants.POLUS_LOG) if __name__ == "__main__": - """ Argument parsing """ + """Argument parsing""" logger.info("Parsing arguments...") parser = argparse.ArgumentParser( - prog='main', - description='Autocropping 2d and 3d images by estimating entropy of rows, columns, and z-slices.', + prog="main", + description="Autocropping 2d and 3d images by estimating entropy of rows, columns, and z-slices.", ) # Input arguments - parser.add_argument('--inputDir', dest='inputDir', type=str, required=True, - help='Input image collection to be processed by this plugin.') + parser.add_argument( + "--inputDir", + dest="inputDir", + type=str, + required=True, + help="Input image collection to be processed by this plugin.", + ) - parser.add_argument('--filePattern', dest='filePattern', type=str, required=True, - help='File pattern to use for grouping images.') + parser.add_argument( + "--filePattern", + dest="filePattern", + type=str, + required=True, + help="File pattern to use for grouping images.", + ) - parser.add_argument('--groupBy', dest='groupBy', type=str, required=True, - help='Variables to use for grouping images. Each group is cropped to the same bounding-box.') + parser.add_argument( + "--groupBy", + dest="groupBy", + type=str, + required=True, + help="Variables to use for grouping images. Each group is cropped to the same bounding-box.", + ) - parser.add_argument('--cropX', dest='cropX', type=str, required=False, default='true', - help='Whether to crop along the x-axis.') + parser.add_argument( + "--cropX", + dest="cropX", + type=str, + required=False, + default="true", + help="Whether to crop along the x-axis.", + ) - parser.add_argument('--cropY', dest='cropY', type=str, required=False, default='true', - help='Whether to crop along the y-axis.') + parser.add_argument( + "--cropY", + dest="cropY", + type=str, + required=False, + default="true", + help="Whether to crop along the y-axis.", + ) - parser.add_argument('--cropZ', dest='cropZ', type=str, required=False, default='true', - help='Whether to crop along the z-axis.') + parser.add_argument( + "--cropZ", + dest="cropZ", + type=str, + required=False, + default="true", + help="Whether to crop along the z-axis.", + ) - parser.add_argument('--smoothing', dest='smoothing', type=str, required=False, default='true', - help='Whether to use gaussian smoothing on images to add more tolerance to noise.') + parser.add_argument( + "--smoothing", + dest="smoothing", + type=str, + required=False, + default="true", + help="Whether to use gaussian smoothing on images to add more tolerance to noise.", + ) # Output arguments - parser.add_argument('--outputDir', dest='outputDir', type=str, required=True, - help='Output collection.') + parser.add_argument( + "--outputDir", + dest="outputDir", + type=str, + required=True, + help="Output collection.", + ) # Parse the arguments args = parser.parse_args() - error_messages = list() + error_messages = [] input_dir = Path(args.inputDir).resolve() - if input_dir.joinpath('images').is_dir(): + if input_dir.joinpath("images").is_dir(): # switch to images folder if present - input_dir = input_dir.joinpath('images') + input_dir = input_dir.joinpath("images") if not input_dir.exists(): - error_messages.append(f'inputDir {input_dir} does not exist.') + error_messages.append(f"inputDir {input_dir} does not exist.") pattern = args.filePattern group_by = args.groupBy if len(set(group_by) - set(filepattern.VARIABLES)) > 0: error_messages.append( - f'groupBy variables must be from among {list(filepattern.VARIABLES)}. ' - f'Got {group_by} instead...' + f"groupBy variables must be from among {list(filepattern.VARIABLES)}. " + f"Got {group_by} instead...", ) crop_x = args.cropX - if crop_x in {'true', 'false'}: - crop_x = (crop_x == 'true') + if crop_x in {"true", "false"}: + crop_x = crop_x == "true" else: - error_messages.append('cropX must be either \'true\' or \'false\'') + error_messages.append("cropX must be either 'true' or 'false'") crop_y = args.cropY - if crop_y in {'true', 'false'}: - crop_y = (crop_y == 'true') + if crop_y in {"true", "false"}: + crop_y = crop_y == "true" else: - error_messages.append('cropY must be either \'true\' or \'false\'') + error_messages.append("cropY must be either 'true' or 'false'") crop_z = args.cropZ - if crop_z in {'true', 'false'}: - crop_z = (crop_z == 'true') + if crop_z in {"true", "false"}: + crop_z = crop_z == "true" else: - error_messages.append('cropZ must be either \'true\' or \'false\'') + error_messages.append("cropZ must be either 'true' or 'false'") smoothing = args.smoothing - if smoothing in {'true', 'false'}: - smoothing = smoothing == 'true' + if smoothing in {"true", "false"}: + smoothing = smoothing == "true" else: - error_messages.append('smoothing must be either \'true\' or \'false\'') + error_messages.append("smoothing must be either 'true' or 'false'") output_dir = Path(args.outputDir).resolve() if not output_dir.exists(): - error_messages.append(f'outputDir {output_dir} does not exist.') + error_messages.append(f"outputDir {output_dir} does not exist.") if len(error_messages) > 0: - error_messages.append('See the README for more details on what these parameters should be.') - message = f'Oh no! Something went wrong:\n' + '\n'.join(error_messages) + error_messages.append( + "See the README for more details on what these parameters should be.", + ) + message = "Oh no! Something went wrong:\n" + "\n".join(error_messages) logger.error(message) raise ValueError(message) else: - logger.info(f'inputDir = {input_dir}') - logger.info(f'filePattern = {pattern}') - logger.info(f'groupBy = {group_by}') - logger.info(f'cropX = {crop_x}') - logger.info(f'cropY = {crop_y}') - logger.info(f'cropZ = {crop_z}') - logger.info(f'smoothing = {smoothing}') - logger.info(f'outputDir = {output_dir}') + logger.info(f"inputDir = {input_dir}") + logger.info(f"filePattern = {pattern}") + logger.info(f"groupBy = {group_by}") + logger.info(f"cropX = {crop_x}") + logger.info(f"cropY = {crop_y}") + logger.info(f"cropZ = {crop_z}") + logger.info(f"smoothing = {smoothing}") + logger.info(f"outputDir = {output_dir}") fp = filepattern.FilePattern(input_dir, pattern) groups = list(fp(group_by=group_by)) for i, group in enumerate(groups): if len(group) == 0: continue - file_paths = [files['file'] for files in group] - logger.info(f'Working on group {i + 1}/{len(groups)} containing {len(file_paths)} images...') + file_paths = [files["file"] for files in group] + logger.info( + f"Working on group {i + 1}/{len(groups)} containing {len(file_paths)} images...", + ) crop_image_group( file_paths=file_paths, crop_axes=(crop_x, crop_y, crop_z), diff --git a/transforms/images/polus-autocropping-plugin/src/utils/constants.py b/transforms/images/polus-autocropping-plugin/src/utils/constants.py index 735edee8d..a35856310 100644 --- a/transforms/images/polus-autocropping-plugin/src/utils/constants.py +++ b/transforms/images/polus-autocropping-plugin/src/utils/constants.py @@ -2,8 +2,8 @@ import os from multiprocessing import cpu_count -POLUS_LOG = getattr(logging, os.environ.get('POLUS_LOG', 'INFO')) -POLUS_EXT = os.environ.get('POLUS_EXT', '.ome.tif') +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) +POLUS_EXT = os.environ.get("POLUS_EXT", ".ome.tif") NUM_THREADS = max(1, int(cpu_count() * 0.8)) TILE_STRIDE = 1024 * 2 # TODO: Measure impact of making this larger diff --git a/transforms/images/polus-autocropping-plugin/src/utils/helpers.py b/transforms/images/polus-autocropping-plugin/src/utils/helpers.py index 751440883..ff4d95431 100644 --- a/transforms/images/polus-autocropping-plugin/src/utils/helpers.py +++ b/transforms/images/polus-autocropping-plugin/src/utils/helpers.py @@ -1,4 +1,4 @@ -from collections import Generator +from collections.abc import Generator from pathlib import Path from typing import Optional @@ -25,8 +25,10 @@ BoundingBox = tuple[int, int, int, int, int, int] -def distogram_from_batch(values: list[float], bin_count: int, weighted_diff: bool) -> distogram.Distogram: - """ Create a distogram from a batch of values rather than a stream of values. +def distogram_from_batch( + values: list[float], bin_count: int, weighted_diff: bool, +) -> distogram.Distogram: + """Create a distogram from a batch of values rather than a stream of values. Sometimes, O(n.log(n)) is faster than O(n). Python's built-in sort function is fast enough that it allows us to outperform the theoretically faster update @@ -40,9 +42,9 @@ def distogram_from_batch(values: list[float], bin_count: int, weighted_diff: boo Returns: A Distogram """ - values = list(sorted(values)) + values = sorted(values) step = len(values) // bin_count - values = [values[i: i + step] for i in range(0, len(values), step)] + values = [values[i : i + step] for i in range(0, len(values), step)] bins = [(v[0], len(v)) for v in values] h = distogram.Distogram(bin_count, weighted_diff) @@ -55,19 +57,16 @@ def distogram_from_batch(values: list[float], bin_count: int, weighted_diff: boo return h -def replace_extension(name: str, new_extension: str = None) -> str: - """ Replaces the extension in the name of an input image with `POLUS_EXT` - for writing corresponding output images. """ +def replace_extension(name: str, new_extension: Optional[str] = None) -> str: + """Replaces the extension in the name of an input image with `POLUS_EXT` + for writing corresponding output images. + """ new_extension = constants.POLUS_EXT if new_extension is None else new_extension - return ( - name - .replace('.ome.tif', new_extension) - .replace('.ome.zarr', new_extension) - ) + return name.replace(".ome.tif", new_extension).replace(".ome.zarr", new_extension) def iter_tiles_2d(file_path: Path) -> TileIndices: - """ A Generator of tile_indices in a 3d image. + """A Generator of tile_indices in a 3d image. Args: file_path: Path to the image. @@ -88,7 +87,7 @@ def iter_tiles_2d(file_path: Path) -> TileIndices: def iter_strip(file_path: Path, index: int, axis: int) -> TileIndices: - """ A Generator of tile_indices in the indexed strip along the given axis. + """A Generator of tile_indices in the indexed strip along the given axis. Args: file_path: Path to the image. @@ -125,7 +124,7 @@ def iter_strip(file_path: Path, index: int, axis: int) -> TileIndices: def rolling_mean(values: list[float], *, prepend_zeros: bool = False) -> list[float]: - """ Compute a rolling mean over a list of values. + """Compute a rolling mean over a list of values. This implementation is faster than using numpy.convolve @@ -137,19 +136,20 @@ def rolling_mean(values: list[float], *, prepend_zeros: bool = False) -> list[fl A list of rolling-mean values. """ if prepend_zeros: - zeros = [0.] * constants.WINDOW_SIZE + zeros = [0.0] * constants.WINDOW_SIZE values = zeros + values sums = numpy.cumsum(values) - means = [ + return [ abs(float(a - b)) / constants.WINDOW_SIZE - for a, b in zip(sums[constants.WINDOW_SIZE:], sums[:-constants.WINDOW_SIZE]) + for a, b in zip(sums[constants.WINDOW_SIZE :], sums[: -constants.WINDOW_SIZE]) ] - return means -def smoothed_gradients(values: list[float], *, prepend_zeros: bool = False) -> list[float]: - """ Compute the smoothed gradients between smoothed adjacent values from the given list of values. +def smoothed_gradients( + values: list[float], *, prepend_zeros: bool = False, +) -> list[float]: + """Compute the smoothed gradients between smoothed adjacent values from the given list of values. This implementation is faster than using numpy.convolve @@ -163,15 +163,14 @@ def smoothed_gradients(values: list[float], *, prepend_zeros: bool = False) -> l smoothed_values = rolling_mean(values, prepend_zeros=prepend_zeros) raw_gradients = [ - float(a - b) - for a, b in zip(smoothed_values[1:], smoothed_values[:-1]) + float(a - b) for a, b in zip(smoothed_values[1:], smoothed_values[:-1]) ] return rolling_mean(raw_gradients, prepend_zeros=prepend_zeros) def find_spike(values: list[float], threshold: float) -> Optional[tuple[int, float]]: - """ Returns the index and value of the first gradient that is greater than + """Returns the index and value of the first gradient that is greater than or equal to the given threshold. If no such gradient exists, returns None. Args: @@ -189,7 +188,7 @@ def find_spike(values: list[float], threshold: float) -> Optional[tuple[int, flo def bounding_box_superset(bounding_boxes: list[BoundingBox]) -> BoundingBox: - """ Given a list of bounding-boxes, determine the bounding-box that bounds + """Given a list of bounding-boxes, determine the bounding-box that bounds all given bounding-boxes. This is used to ensure that all images in a group are cropped in a diff --git a/transforms/images/polus-autocropping-plugin/src/utils/local_distogram.py b/transforms/images/polus-autocropping-plugin/src/utils/local_distogram.py index 45b556f0a..08bda8bf5 100644 --- a/transforms/images/polus-autocropping-plugin/src/utils/local_distogram.py +++ b/transforms/images/polus-autocropping-plugin/src/utils/local_distogram.py @@ -1,28 +1,26 @@ __author__ = """Romain Picard""" -__email__ = 'romain.picard@oakbits.com' -__version__ = '1.6.0' +__email__ = "romain.picard@oakbits.com" +__version__ = "1.6.0" import math from bisect import bisect_left from functools import reduce from itertools import accumulate from operator import itemgetter -from typing import List from typing import Optional -from typing import Tuple EPSILON = 1e-5 -Bin = Tuple[float, int] +Bin = tuple[float, int] # bins is a tuple of (cut point, count) -class Distogram(object): - """ Compressed representation of a distribution. - """ - __slots__ = 'bin_count', 'bins', 'min', 'max', 'diffs', 'min_diff', 'weighted_diff' +class Distogram: + """Compressed representation of a distribution.""" + + __slots__ = "bin_count", "bins", "min", "max", "diffs", "min_diff", "weighted_diff" - def __init__(self, bin_count: int = 100, weighted_diff: bool = False): - """ Creates a new Distogram object + def __init__(self, bin_count: int = 100, weighted_diff: bool = False) -> None: + """Creates a new Distogram object. Args: bin_count: [Optional] the number of bins to use. @@ -32,23 +30,22 @@ def __init__(self, bin_count: int = 100, weighted_diff: bool = False): A Distogram object. """ self.bin_count: int = bin_count - self.bins: List[Bin] = list() + self.bins: list[Bin] = [] self.min: Optional[float] = None self.max: Optional[float] = None - self.diffs: Optional[List[float]] = None + self.diffs: Optional[list[float]] = None self.min_diff: Optional[float] = None self.weighted_diff: bool = weighted_diff -def _linspace(start: float, stop: float, num: int) -> List[float]: +def _linspace(start: float, stop: float, num: int) -> list[float]: if num == 1: return [stop] step = (stop - start) / float(num) - values = [start + step * i for i in range(num + 1)] - return values + return [start + step * i for i in range(num + 1)] -def _moment(x: List[float], counts: List[float], c: float, n: int) -> float: +def _moment(x: list[float], counts: list[float], c: float, n: int) -> float: m = (ci * (v - c) ** n for i, (ci, v) in enumerate(zip(counts, x))) return sum(m) / sum(counts) @@ -83,7 +80,6 @@ def _update_diffs(h: Distogram, i: int) -> None: if update_min is True: h.min_diff = min(h.diffs) - return def _trim(h: Distogram) -> Distogram: @@ -117,7 +113,7 @@ def _trim_in_place(h: Distogram, value: float, c: int, i: int): return h -def _compute_diffs(h: Distogram) -> List[float]: +def _compute_diffs(h: Distogram) -> list[float]: if h.weighted_diff is True: diffs = [ (v2 - v1) * math.log(EPSILON + min(f1, f2)) @@ -146,7 +142,7 @@ def _search_in_place_index(h: Distogram, new_value: float, index: int) -> int: def update(h: Distogram, value: float, count: int = 1) -> Distogram: - """ Adds a new element to the distribution. + """Adds a new element to the distribution. Args: h: A Distogram object. @@ -173,8 +169,7 @@ def update(h: Distogram, value: float, count: int = 1) -> Distogram: if index > 0 and len(h.bins) >= h.bin_count: in_place_index = _search_in_place_index(h, value, index) if in_place_index > 0: - h = _trim_in_place(h, value, count, in_place_index) - return h + return _trim_in_place(h, value, count, in_place_index) if index == -1: h.bins.append((value, count)) @@ -193,12 +188,11 @@ def update(h: Distogram, value: float, count: int = 1) -> Distogram: if (h.max is None) or (h.max < value): h.max = value - h = _trim(h) - return h + return _trim(h) def merge(h1: Distogram, h2: Distogram) -> Distogram: - """ Merges two Distogram objects + """Merges two Distogram objects. Args: h1: First Distogram. @@ -208,16 +202,15 @@ def merge(h1: Distogram, h2: Distogram) -> Distogram: A Distogram object being the composition of h1 and h2. The number of bins in this Distogram is equal to the number of bins in h1. """ - h = reduce( + return reduce( lambda residual, b: update(residual, *b), h2.bins, h1, ) - return h def count_at(h: Distogram, value: float): - """ Counts the number of elements present in the distribution up to value. + """Counts the number of elements present in the distribution up to value. Args: h: A Distogram object. @@ -259,7 +252,7 @@ def count_at(h: Distogram, value: float): def count(h: Distogram) -> float: - """ Counts the number of elements in the distribution. + """Counts the number of elements in the distribution. Args: h: A Distogram object. @@ -270,8 +263,8 @@ def count(h: Distogram) -> float: return sum((f for _, f in h.bins)) -def bounds(h: Distogram) -> Tuple[float, float]: - """ Returns the min and max values of the distribution. +def bounds(h: Distogram) -> tuple[float, float]: + """Returns the min and max values of the distribution. Args: h: A Distogram object. @@ -283,7 +276,7 @@ def bounds(h: Distogram) -> Tuple[float, float]: def mean(h: Distogram) -> float: - """ Returns the mean of the distribution. + """Returns the mean of the distribution. Args: h: A Distogram object. @@ -296,7 +289,7 @@ def mean(h: Distogram) -> float: def variance(h: Distogram) -> float: - """ Returns the variance of the distribution. + """Returns the variance of the distribution. Args: h: A Distogram object. @@ -309,7 +302,7 @@ def variance(h: Distogram) -> float: def stddev(h: Distogram) -> float: - """ Returns the standard deviation of the distribution. + """Returns the standard deviation of the distribution. Args: h: A Distogram object. @@ -321,8 +314,8 @@ def stddev(h: Distogram) -> float: return math.sqrt(variance(h)) -def histogram(h: Distogram, ucount: int = 100) -> List[Tuple[float, float]]: - """ Returns a histogram of the distribution +def histogram(h: Distogram, ucount: int = 100) -> list[tuple[float, float]]: + """Returns a histogram of the distribution. Args: h: A Distogram object. @@ -336,20 +329,19 @@ def histogram(h: Distogram, ucount: int = 100) -> List[Tuple[float, float]]: bins in the Distogram object. """ if len(h.bins) < h.bin_count: - raise ValueError("not enough elements in distribution") + msg = "not enough elements in distribution" + raise ValueError(msg) - bin_bounds = _linspace(h.min, h.max, num=ucount+1) + bin_bounds = _linspace(h.min, h.max, num=ucount + 1) counts = [count_at(h, e) for e in bin_bounds[1:-1]] - u = [ - (b, new - last) - for b, new, last in zip(bin_bounds[1:], counts[1:], counts[:-1]) + return [ + (b, new - last) for b, new, last in zip(bin_bounds[1:], counts[1:], counts[:-1]) ] - return u def quantile(h: Distogram, value: float) -> Optional[float]: - """ Returns a quantile of the distribution + """Returns a quantile of the distribution. Args: h: A Distogram object. diff --git a/transforms/images/polus-autocropping-plugin/tests/__init__.py b/transforms/images/polus-autocropping-plugin/tests/__init__.py index 036d52578..2e3121c20 100644 --- a/transforms/images/polus-autocropping-plugin/tests/__init__.py +++ b/transforms/images/polus-autocropping-plugin/tests/__init__.py @@ -1,6 +1,7 @@ from unittest import TestSuite -from .version_test import VersionTest + from .test_autocrop import CorrectnessTest +from .version_test import VersionTest test_cases = ( VersionTest, diff --git a/transforms/images/polus-autocropping-plugin/tests/test_autocrop.py b/transforms/images/polus-autocropping-plugin/tests/test_autocrop.py index b8caa826f..b7a46872e 100644 --- a/transforms/images/polus-autocropping-plugin/tests/test_autocrop.py +++ b/transforms/images/polus-autocropping-plugin/tests/test_autocrop.py @@ -21,8 +21,8 @@ class CorrectnessTest(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - cls.infile = tempfile.NamedTemporaryFile(suffix='.ome.tif') - cls.outfile = tempfile.NamedTemporaryFile(suffix='.ome.tif') + cls.infile = tempfile.NamedTemporaryFile(suffix=".ome.tif") + cls.outfile = tempfile.NamedTemporaryFile(suffix=".ome.tif") random_image = numpy.random.randint( low=0, @@ -47,18 +47,28 @@ def test_tile_generator(self): with BioReader(self.infile.name) as reader: for index in range(self.num_strips): for axis in (0, 1): - tiles = list(helpers.iter_strip(Path(self.infile.name), index, axis)) + tiles = list( + helpers.iter_strip(Path(self.infile.name), index, axis) + ) self.assertEqual(len(tiles), self.num_strips) for i, (x, x_max, y, y_max) in enumerate(tiles): tile = reader[y:y_max, x:x_max, 0:1, 0, 0] tile = tile if axis == 0 else numpy.transpose(tile) - true_rows = self.hanging if index == (self.num_strips - 1) else constants.TILE_STRIDE - true_cols = self.hanging if i == (self.num_strips - 1) else constants.TILE_STRIDE + true_rows = ( + self.hanging + if index == (self.num_strips - 1) + else constants.TILE_STRIDE + ) + true_cols = ( + self.hanging + if i == (self.num_strips - 1) + else constants.TILE_STRIDE + ) self.assertEqual( tile.shape, (true_rows, true_cols), - f'index {index}, axis {axis}, tile {i}, shape {tile.shape}' + f"index {index}, axis {axis}, tile {i}, shape {tile.shape}", ) return @@ -84,8 +94,8 @@ def test_determine_bbox(self): crop_axes=(True, True, True), smoothing=True, ) - self.assertTrue(0 <= x1 < x2 <= self.image_size, f'{x1, x2}') - self.assertTrue(0 <= y1 < y2 <= self.image_size, f'{y1, y2}') + self.assertTrue(0 <= x1 < x2 <= self.image_size, f"{x1, x2}") + self.assertTrue(0 <= y1 < y2 <= self.image_size, f"{y1, y2}") def test_bbox_superset(self): bounding_boxes = [ @@ -99,5 +109,5 @@ def test_bbox_superset(self): self.assertEqual((8, 22, 60, 71, 106, 180), bounding_box) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/transforms/images/polus-autocropping-plugin/tests/version_test.py b/transforms/images/polus-autocropping-plugin/tests/version_test.py index f2a3cf36e..8e54bb4a7 100644 --- a/transforms/images/polus-autocropping-plugin/tests/version_test.py +++ b/transforms/images/polus-autocropping-plugin/tests/version_test.py @@ -4,23 +4,21 @@ class VersionTest(unittest.TestCase): - version_path = Path(__file__).parent.parent.joinpath("VERSION") json_path = Path(__file__).parent.parent.joinpath("plugin.json") def test_plugin_manifest(self): # Get the plugin version - with open(self.version_path, 'r') as file: + with open(self.version_path) as file: version = file.readline() # Load the plugin manifest - with open(self.json_path, 'r') as file: + with open(self.json_path) as file: plugin_json = json.load(file) - self.assertEqual(plugin_json['version'], version) - self.assertTrue(plugin_json['containerId'].endswith(version)) + assert plugin_json["version"] == version + assert plugin_json["containerId"].endswith(version) - return if __name__ == "__main__": diff --git a/transforms/images/polus-ftl-label-plugin/Cargo.toml b/transforms/images/polus-ftl-label-plugin/Cargo.toml index 13c2f3a87..3fd5d68d9 100644 --- a/transforms/images/polus-ftl-label-plugin/Cargo.toml +++ b/transforms/images/polus-ftl-label-plugin/Cargo.toml @@ -21,4 +21,4 @@ criterion = { version = "^0.3", features = ["html_reports"] } [[bench]] name = "ftl_rust" -harness = false \ No newline at end of file +harness = false diff --git a/transforms/images/polus-ftl-label-plugin/README.md b/transforms/images/polus-ftl-label-plugin/README.md index 2ec2fc5cd..c6f72641d 100644 --- a/transforms/images/polus-ftl-label-plugin/README.md +++ b/transforms/images/polus-ftl-label-plugin/README.md @@ -31,7 +31,7 @@ To see detailed documentation for the `Rust` implementation you need to: That last command will generate documentation and open a new tab in your default web browser. We determine whether to use the `Cython` or `Rust` implementation on a per-image basis depending on the size of that image. -If we expect the image to occupy less than `500MB` of memory, we use the `Cython` implementation otherwise we use the `Rust` implementation. +If we expect the image to occupy less than `500MB` of memory, we use the `Cython` implementation otherwise we use the `Rust` implementation. For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). diff --git a/transforms/images/polus-ftl-label-plugin/SimpleTiledTiffViewer.py b/transforms/images/polus-ftl-label-plugin/SimpleTiledTiffViewer.py index ed3409514..bdb7b3e11 100644 --- a/transforms/images/polus-ftl-label-plugin/SimpleTiledTiffViewer.py +++ b/transforms/images/polus-ftl-label-plugin/SimpleTiledTiffViewer.py @@ -1,33 +1,44 @@ -from bfio import BioReader -from pathlib import Path import argparse import logging +from pathlib import Path + import matplotlib.pyplot as plt +from bfio import BioReader -if __name__=="__main__": +if __name__ == "__main__": # Initialize the logger - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) # Setup the argument parsing logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='View *.ome.tif images and labels from FTL plugin.') - parser.add_argument('--inpDir', dest='inpDir', type=str, - help='Input image collection to be processed by this plugin', required=True) - parser.add_argument('--outDir', dest='outDir', type=str, - help='Output collection', required=True) + parser = argparse.ArgumentParser( + prog="main", description="View *.ome.tif images and labels from FTL plugin.", + ) + parser.add_argument( + "--inpDir", + dest="inpDir", + type=str, + help="Input image collection to be processed by this plugin", + required=True, + ) + parser.add_argument( + "--outDir", dest="outDir", type=str, help="Output collection", required=True, + ) # Parse the arguments args = parser.parse_args() inpDir = Path(args.inpDir) - logger.info('inpDir = {}'.format(inpDir)) + logger.info(f"inpDir = {inpDir}") outDir = Path(args.outDir) - logger.info('outDir = {}'.format(outDir)) + logger.info(f"outDir = {outDir}") # Get all file names in inpDir image collection - files = [f for f in inpDir.iterdir() if f.is_file() and f.name.endswith('.tif')] + files = [f for f in inpDir.iterdir() if f.is_file() and f.name.endswith(".tif")] for file in files: # Set up the BioReader @@ -37,12 +48,10 @@ with BioReader(outDir / file.name) as br_out: img_out = br_out[:] - fig, ax = plt.subplots(1, 2, figsize=(16,8)) + fig, ax = plt.subplots(1, 2, figsize=(16, 8)) ax[0].imshow(img_in), ax[0].set_title("Original Image") ax[1].imshow(img_out), ax[1].set_title("Labelled Image") fig.suptitle(file.name) plt.show() # Use savefig if you are on a headless machine, i.e. AWS EC2 instance - # plt.savefig(outDir / (file.stem.split('.ome')[0] + '.png')) plt.close() - diff --git a/transforms/images/polus-ftl-label-plugin/build-docker.sh b/transforms/images/polus-ftl-label-plugin/build-docker.sh index 2a0c728f2..fc30a216e 100755 --- a/transforms/images/polus-ftl-label-plugin/build-docker.sh +++ b/transforms/images/polus-ftl-label-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: + """Creates a PolygonSet object to provide a minimal interface with the Rust implementation. Args: connectivity: Determines neighbors among pixels. Must be 1, 2 or 3. See the README for more details. """ if not (1 <= connectivity <= 3): - raise ValueError(f'connectivity must be 1, 2 or 3. Got {connectivity} instead') + msg = f"connectivity must be 1, 2 or 3. Got {connectivity} instead" + raise ValueError( + msg, + ) self.__polygon_set: RustPolygonSet = RustPolygonSet(connectivity) self.connectivity: int = connectivity self.metadata = None self.num_polygons = 0 - def __len__(self): - """ Returns the number of objects that were detected. """ + def __len__(self) -> int: + """Returns the number of objects that were detected.""" return self.num_polygons def dtype(self): - """ Chooses the minimal dtype for labels depending on the number of objects. """ - if self.num_polygons < 2 ** 8: + """Chooses the minimal dtype for labels depending on the number of objects.""" + if self.num_polygons < 2**8: dtype = numpy.uint8 - elif self.num_polygons < 2 ** 16: + elif self.num_polygons < 2**16: dtype = numpy.uint16 else: dtype = numpy.uint32 @@ -68,18 +71,20 @@ def _get_iteration_params(z_shape, y_shape, x_shape): num_rows += 1 return tile_size, num_slices, num_cols, num_rows - + def read_from(self, infile: Path): - """ Reads from a .ome.tif file and finds and labels all objects. + """Reads from a .ome.tif file and finds and labels all objects. Args: infile: Path to an ome.tif file for which to produce labels. """ - logger.info(f'Processing {infile.name}...') + logger.info(f"Processing {infile.name}...") with BioReader(infile) as reader: self.metadata = reader.metadata - tile_size, num_slices, num_cols, num_rows = self._get_iteration_params(reader.Z, reader.Y, reader.X) + tile_size, num_slices, num_cols, num_rows = self._get_iteration_params( + reader.Z, reader.Y, reader.X, + ) tile_count = 0 for z in range(0, reader.Z, tile_size): z_max = min(reader.Z, z + tile_size) @@ -96,29 +101,37 @@ def read_from(self, infile: Path): tile = tile.transpose(2, 0, 1) self.__polygon_set.add_tile(tile, (z, y, x)) tile_count += 1 - logger.debug(f'added tile #{tile_count} ({z}:{z_max}, {y}:{y_max}, {x}:{x_max})') - logger.info(f'Reading Progress {100 * tile_count / (num_slices * num_cols * num_rows):6.3f}%...') - - logger.info('digesting polygons...') + logger.debug( + f"added tile #{tile_count} ({z}:{z_max}, {y}:{y_max}, {x}:{x_max})", + ) + logger.info( + f"Reading Progress {100 * tile_count / (num_slices * num_cols * num_rows):6.3f}%...", + ) + + logger.info("digesting polygons...") self.__polygon_set.digest() self.num_polygons = self.__polygon_set.len() - logger.info(f'collected {self.num_polygons} polygons') + logger.info(f"collected {self.num_polygons} polygons") return self def write_to(self, outfile: Path): - """ Writes a labelled ome.tif to the given path. + """Writes a labelled ome.tif to the given path. This uses the metadata of the input file and sets the dtype depending on the number of labelled objects. Args: outfile: Path where the labelled image will be written. """ - with BioWriter(outfile, metadata=self.metadata, max_workers=cpu_count()) as writer: + with BioWriter( + outfile, metadata=self.metadata, max_workers=cpu_count(), + ) as writer: writer.dtype = self.dtype() - logger.info(f'writing {outfile.name} with dtype {self.dtype()}...') + logger.info(f"writing {outfile.name} with dtype {self.dtype()}...") - tile_size, _, num_cols, num_rows = self._get_iteration_params(writer.Z, writer.Y, writer.X) + tile_size, _, num_cols, num_rows = self._get_iteration_params( + writer.Z, writer.Y, writer.X, + ) tile_count = 0 for z in range(writer.Z): for y in range(0, writer.Y, tile_size): @@ -126,9 +139,17 @@ def write_to(self, outfile: Path): for x in range(0, writer.X, tile_size): x_max = min(writer.X, x + tile_size) - tile = extract_tile(self.__polygon_set, (z, z + 1, y, y_max, x, x_max)) - writer[y:y_max, x:x_max, z:z + 1, 0, 0] = tile.transpose(1, 2, 0) + tile = extract_tile( + self.__polygon_set, (z, z + 1, y, y_max, x, x_max), + ) + writer[y:y_max, x:x_max, z : z + 1, 0, 0] = tile.transpose( + 1, 2, 0, + ) tile_count += 1 - logger.debug(f'Wrote tile {tile_count}, ({z}, {y}:{y_max}, {x}:{x_max})') - logger.info(f'Writing Progress {100 * tile_count / (num_cols * num_rows * writer.Z):6.3f}%...') + logger.debug( + f"Wrote tile {tile_count}, ({z}, {y}:{y_max}, {x}:{x_max})", + ) + logger.info( + f"Writing Progress {100 * tile_count / (num_cols * num_rows * writer.Z):6.3f}%...", + ) return self diff --git a/transforms/images/polus-ftl-label-plugin/plugin.json b/transforms/images/polus-ftl-label-plugin/plugin.json index 84c99b4d1..5fa99b3fa 100644 --- a/transforms/images/polus-ftl-label-plugin/plugin.json +++ b/transforms/images/polus-ftl-label-plugin/plugin.json @@ -1,45 +1,45 @@ { - "name": "FTL Label", - "version": "0.3.9", - "title": "FTL Label", - "description": "Label objects in a 2d or 3d binary image.", - "author": "Nick Schaub (nick.schaub@nih.gov), Najib Ishaq (najib.ishaq@axleinfo.com)", - "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", - "repository": "https://github.com/labshare/polus-plugins", - "website": "https://ncats.nih.gov/preclinical/core/informatics", - "citation": "", - "containerId": "labshare/polus-ftl-label-plugin:0.3.8", - "inputs": [ - { - "name": "inpDir", - "type": "collection", - "description": "Input image collection to be processed by this plugin", - "required": true - }, - { - "name": "connectivity", - "type": "number", - "description": "City block connectivity", - "required": true - } - ], - "outputs": [ - { - "name": "outDir", - "type": "collection", - "description": "Output collection" - } - ], - "ui": [ - { - "key": "inputs.inpDir", - "title": "Input collection", - "description": "Input image collection to be processed by this plugin" - }, - { - "key": "inputs.connectivity", - "title": "Connectivity", - "description": "City block connectivity" - } - ] + "name": "FTL Label", + "version": "0.3.9", + "title": "FTL Label", + "description": "Label objects in a 2d or 3d binary image.", + "author": "Nick Schaub (nick.schaub@nih.gov), Najib Ishaq (najib.ishaq@axleinfo.com)", + "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", + "repository": "https://github.com/labshare/polus-plugins", + "website": "https://ncats.nih.gov/preclinical/core/informatics", + "citation": "", + "containerId": "labshare/polus-ftl-label-plugin:0.3.8", + "inputs": [ + { + "name": "inpDir", + "type": "collection", + "description": "Input image collection to be processed by this plugin", + "required": true + }, + { + "name": "connectivity", + "type": "number", + "description": "City block connectivity", + "required": true + } + ], + "outputs": [ + { + "name": "outDir", + "type": "collection", + "description": "Output collection" + } + ], + "ui": [ + { + "key": "inputs.inpDir", + "title": "Input collection", + "description": "Input image collection to be processed by this plugin" + }, + { + "key": "inputs.connectivity", + "title": "Connectivity", + "description": "City block connectivity" + } + ] } diff --git a/transforms/images/polus-ftl-label-plugin/run-plugin.sh b/transforms/images/polus-ftl-label-plugin/run-plugin.sh index 8265a5b04..77653d23b 100644 --- a/transforms/images/polus-ftl-label-plugin/run-plugin.sh +++ b/transforms/images/polus-ftl-label-plugin/run-plugin.sh @@ -24,4 +24,4 @@ docker run --mount type=bind,source="${data_path}",target=/data/ \ labshare/polus-ftl-label-plugin:"${version}" \ --inpDir ${inpDir} \ --connectivity ${connectivity} \ - --outDir ${outDir} \ No newline at end of file + --outDir ${outDir} diff --git a/transforms/images/polus-ftl-label-plugin/rust_setup.py b/transforms/images/polus-ftl-label-plugin/rust_setup.py index 5da317d3a..03553b491 100644 --- a/transforms/images/polus-ftl-label-plugin/rust_setup.py +++ b/transforms/images/polus-ftl-label-plugin/rust_setup.py @@ -1,7 +1,6 @@ from setuptools import setup from setuptools_rust import RustExtension - setup( name="ftl-rust", version="0.1.0", diff --git a/transforms/images/polus-ftl-label-plugin/src/bench_rust.py b/transforms/images/polus-ftl-label-plugin/src/bench_rust.py index 17f3d12aa..d8c6f4994 100644 --- a/transforms/images/polus-ftl-label-plugin/src/bench_rust.py +++ b/transforms/images/polus-ftl-label-plugin/src/bench_rust.py @@ -6,21 +6,22 @@ def bench_rust(): count = 2209 - infile = Path(f'../../data/input_array/test_infile_{count}.ome.tif').resolve() - outfile = Path(f'../../data/input_array/test_outfile_{count}.ome.tif').resolve() + infile = Path(f"../../data/input_array/test_infile_{count}.ome.tif").resolve() + outfile = Path(f"../../data/input_array/test_outfile_{count}.ome.tif").resolve() polygon_set = PolygonSet(connectivity=1) - + start = time.time() polygon_set.read_from(infile) end = time.time() - print(f'took {end - start:.3f} seconds to read and digest...') + print(f"took {end - start:.3f} seconds to read and digest...") - assert count == len(polygon_set), f'found {len(polygon_set)} objects instead of {count}.' + assert count == len( + polygon_set, + ), f"found {len(polygon_set)} objects instead of {count}." polygon_set.write_to(outfile) - print(f'took {time.time() - end:.3f} seconds to write...') - return + print(f"took {time.time() - end:.3f} seconds to write...") -if __name__ == '__main__': +if __name__ == "__main__": bench_rust() diff --git a/transforms/images/polus-ftl-label-plugin/src/ftl.pyx b/transforms/images/polus-ftl-label-plugin/src/ftl.pyx index 1b491d6fc..b019a302d 100644 --- a/transforms/images/polus-ftl-label-plugin/src/ftl.pyx +++ b/transforms/images/polus-ftl-label-plugin/src/ftl.pyx @@ -46,7 +46,7 @@ cdef extern from "x86intrin.h": __m128i _mm_shuffle_epi8(__m128i a,__m128i b) nogil __m256i _mm256_add_epi16(__m256i a, __m256i b) nogil - + __m128i _mm_set_epi8(char e15,char e14,char e13,char e12, char e11,char e10,char e9, char e8, char e7, char e6, char e5, char e4, @@ -60,7 +60,7 @@ cdef extern from "x86intrin.h": char e11,char e10,char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) nogil - + __m256i _mm256_set_epi64x(long long e3,long long e2, long long e1,long long e0) nogil @@ -117,7 +117,7 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): image (numpy.ndarray): An n-dimensional image reshaped to a linear array of pixels. shape (tuple): The shape of the image - + Outputs: numpy.ndarray: Indices of pixel objects """ @@ -125,7 +125,7 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): ''' Set up AVX vectors ''' # Vector to hold pixel values cdef __m128i v - # Vector to hold + # Vector to hold cdef __m128i mask # Vector for pixel indices cdef __m256i edges @@ -160,7 +160,7 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): cdef vector[np.uint16_t] temp temp.resize(16,0) cdef vector[np.uint32_t] output - + ''' Looping variables ''' cdef unsigned long p,i,j,n,r cdef unsigned long position = 0 @@ -182,9 +182,9 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): strides[j] = strides[j]*np.uint32(shape[ndim - i]) cdef Py_ssize_t last_stride = shape[ndim-1] positions = strides[0]*shape[0]//last_stride - + ''' - + Loop through all points and find the start and stopping edges of consecutive nonzero values. The way this works is that pixels are laid out linearly in memory according to the last dimension of the matrix. So, in a 2-d matrix @@ -199,7 +199,7 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): matrix is 3-d with dimensions (128x128x64), then there are 128x128 positions to evaluate. Each position is starting point for a new line of pixels along the last dimension. - + ''' for p in range(positions): @@ -217,15 +217,15 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): for i in range(ndim-1): output.push_back(coords[i]) output.push_back(0) - + ''' - + The following loop is designed for speed. It analyzes 15 pixels at a time, and if all 15 pixels have the same value then it quickly escapes to the next iteration of the loop. It stops looping when it gets less than 15 pixels from the end of the line of pixels so that it doesn't run into the next line of pixels. - + ''' r = 0 # manually register the pixel index, can be optimized with modulo outside the loop for n in range(0,last_stride-15,15): @@ -272,7 +272,7 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): else: on_obj=False output.push_back(temp[i]) - + # Advance the vector index i_vec = _mm256_add_epi16(i_vec,cnst15) @@ -304,7 +304,7 @@ cdef np.ndarray run_length_encode_16(unsigned char [:] image,tuple shape): # Turn the uint16 vector into a numpy.ndarray of appropriate size row_objects = np.asarray(output,dtype=np.uint32).reshape(-1,ndim+1) - + return row_objects @cython.boundscheck(False) @@ -325,7 +325,7 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): image (numpy.ndarray): An n-dimensional image reshaped to a linear array of pixels. shape (tuple): The shape of the image - + Outputs: numpy.ndarray: Indices of pixel objects """ @@ -333,7 +333,7 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): ''' Set up AVX vectors ''' # Vector to hold pixel values cdef __m128i v - # Vector to hold + # Vector to hold cdef __m128i mask # Vector for pixel indices cdef __m256i edges,edges_shf @@ -372,7 +372,7 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): cdef vector[np.uint32_t] temp temp.resize(16,0) cdef vector[np.uint32_t] output - + ''' Looping variables ''' cdef unsigned long long p,i,j,n,r cdef unsigned long long position = 0 @@ -394,9 +394,9 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): strides[j] = strides[j]*np.uint64(shape[ndim - i]) cdef Py_ssize_t last_stride = shape[ndim-1] positions = strides[0]*shape[0]//last_stride - + ''' - + Loop through all points and find the start and stopping edges of consecutive nonzero values. The way this works is that pixels are laid out linearly in memory according to the last dimension of the matrix. @@ -411,7 +411,7 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): the matrix is 3-d with dimensions (128x128x64), then there are 128x128 positions to evaluate. Each position is starting point for a new line of pixels along the last dimension. - + ''' for p in range(positions): @@ -429,15 +429,15 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): for i in range(ndim-1): output.push_back(coords[i]) output.push_back(0) - + ''' - + The following loop is designed for speed. It analyzes 15 pixels at a time, and if all 15 pixels have the same value then it quickly escapes to the next iteration of the loop. It stops loops when it gets less than 15 pixels from the end of the line of pixels so that it doesn't run into the next line of pixels. - + ''' r = 0 # manualy register the pixel index, can be optimized with modulo outside the loop for n in range(0,last_stride-15,15): @@ -491,7 +491,7 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): on_obj=False output.push_back(temp[i]) - + # Advance the vector index i_vec = _mm256_add_epi32(i_vec,cnst15) @@ -523,7 +523,7 @@ cdef np.ndarray run_length_encode_32(unsigned char [:] image,tuple shape): # Turn the uint16 vector into a numpy.ndarray of appropriate size row_objects = np.asarray(output,dtype=np.uint32).reshape(-1,ndim+1) - + return row_objects @cython.boundscheck(False) @@ -543,28 +543,28 @@ cdef rle_index(tuple image_shape, shape (tuple): The shape of the image rle_objects (numpy.ndarray): An n-dimensional image reshaped to a linear array of pixels. - + Outputs: numpy.ndarray: Indices of pixel objects """ - + # Get indices of lower dimension transitions cdef Py_ssize_t shape0 = rle_objects.shape[0] cdef Py_ssize_t shape1 = rle_objects.shape[1] cdef np.ndarray ld_change = np.argwhere(np.any((rle_objects[1:,:shape1-2] - rle_objects[:shape0-1,:shape1-2]) != 0,axis=1)) + 1 cdef Py_ssize_t ld_shape0 = ld_change.shape[0] cdef Py_ssize_t ld_shape1 = ld_change.shape[1] - + ld_change = np.vstack((np.array(0,dtype=np), ld_change, np.array(rle_objects.shape[0]))).astype(int) - + # Initialize the index matrix shape = 2 for i in range(len(image_shape)-1): shape *= (image_shape[i] + 2) cdef np.ndarray rle_indices = np.full(shape,np.iinfo(np.uint64).max,dtype=np.uint64) - + # Assign values to the index matrix cdef np.ndarray rle_sparse = np.zeros(ld_change.shape[0]-1,dtype=np.uint32) for i in range(rle_objects.shape[1]-3): @@ -572,11 +572,11 @@ cdef rle_index(tuple image_shape, rle_sparse = rle_sparse * (image_shape[i+1] + 2) rle_sparse += rle_objects[ld_change[:ld_change.shape[0]-1],shape1-3].squeeze() + 1 rle_sparse *= 2 - + # Set the indices rle_indices[rle_sparse] = ld_change[:ld_change.shape[0]-1].squeeze() rle_indices[rle_sparse + 1] = ld_change[1:].squeeze() - + return rle_sparse,rle_indices @cython.boundscheck(False) @@ -599,13 +599,13 @@ cdef void compare_objects(unsigned long [:] range1, labels (np.ndarray): 1d array of labels for each object """ cdef unsigned long long current_row = range1[0] - + cdef unsigned long long next_row = range2[0] cdef unsigned long long ind_start = rle_objects.shape[1] - 2 cdef unsigned long long ind_end = rle_objects.shape[1] - 1 - + # Loop through all row objects in the current and next rows while current_row < range1[1] and next_row < range2[1]: # if the current objects do not overlap, move to the next one @@ -615,7 +615,7 @@ cdef void compare_objects(unsigned long [:] range1, elif rle_objects[current_row,ind_start] > rle_objects[next_row,ind_end]: next_row += 1 continue - + # relabel the overlapping object in the next row if labels[labels[labels[next_row]]] < labels[labels[labels[current_row]]]: labels[labels[labels[current_row]]] = labels[labels[labels[next_row]]] @@ -624,7 +624,7 @@ cdef void compare_objects(unsigned long [:] range1, labels[labels[labels[next_row]]] = labels[labels[labels[current_row]]] labels[next_row] = labels[labels[current_row]] next_row += 1 - + # relabel additional objects in the next row while next_row < range2[1] and rle_objects[current_row,ind_end] >= rle_objects[next_row,ind_start]: if labels[labels[labels[next_row]]] < labels[labels[labels[current_row]]]: @@ -634,7 +634,7 @@ cdef void compare_objects(unsigned long [:] range1, labels[labels[labels[next_row]]] = labels[labels[labels[current_row]]] labels[next_row] = labels[labels[labels[current_row]]] next_row += 1 - + # relabel collisions while current_row+1 < range1[1] and rle_objects[current_row+1,ind_start] <= rle_objects[next_row-1,ind_end]: current_row += 1 @@ -655,16 +655,16 @@ cdef void reconcile_labels(uint_ind labels) nogil: label of the two objects that overlap. While objects are being relabeled, only the root label is generally changed, reducing the number of the memory calls by not relabeling all - rle objects when a new label is assigned. At the end of a + rle objects when a new label is assigned. At the end of a comparison in a given dimension, the labels need to be re-assigned based on their root label, where the root label is the label that is an index to itself. Args: labels (np.ndarray): 1d array of labels for each object - + """ - + cdef Py_ssize_t i cdef Py_ssize_t size = labels.shape[0] @@ -690,9 +690,9 @@ cdef np.ndarray generate_output_8(unsigned int [:,:] rle_objects, Outputs: np.ndarray: 8-bit labeled image - + """ - + # Initialize iteration counter cdef long long i @@ -705,9 +705,9 @@ cdef np.ndarray generate_output_8(unsigned int [:,:] rle_objects, cdef long long obj_start = rle_objects.shape[1] - 2 cdef long long obj_end = rle_objects.shape[1] - 1 cdef np.ndarray start_ind = np.zeros(rle_objects.shape[0],dtype=np.uint64) - + for i in range(ndims): - start_ind += rle_objects[:,i] + start_ind += rle_objects[:,i] start_ind *= image_shape[i+1] start_ind += rle_objects[:,obj_start] @@ -716,7 +716,7 @@ cdef np.ndarray generate_output_8(unsigned int [:,:] rle_objects, fill_n(&linear_image[start_ind_memview[i]], rle_objects[i,obj_end] - rle_objects[i,obj_start], labels[i]) - + return label_image @cython.boundscheck(False) @@ -737,9 +737,9 @@ cdef np.ndarray generate_output_16(unsigned int [:,:] rle_objects, Outputs: np.ndarray: 16-bit labeled image - + """ - + # Initialize iteration counter cdef long i @@ -752,9 +752,9 @@ cdef np.ndarray generate_output_16(unsigned int [:,:] rle_objects, cdef long long obj_start = rle_objects.shape[1] - 2 cdef long long obj_end = rle_objects.shape[1] - 1 cdef np.ndarray start_ind = np.zeros(rle_objects.shape[0],dtype=np.uint64) - + for i in range(ndims): - start_ind += rle_objects[:,i] + start_ind += rle_objects[:,i] start_ind *= image_shape[i+1] start_ind += rle_objects[:,obj_start] @@ -763,7 +763,7 @@ cdef np.ndarray generate_output_16(unsigned int [:,:] rle_objects, fill_n(&linear_image[start_ind_memview[i]], rle_objects[i,obj_end] - rle_objects[i,obj_start], labels[i]) - + return label_image @cython.boundscheck(False) @@ -784,9 +784,9 @@ cdef np.ndarray generate_output_32(unsigned int [:,:] rle_objects, Outputs: np.ndarray: 32-bit labeled image - + """ - + # Initialize iteration counter cdef long long i @@ -799,9 +799,9 @@ cdef np.ndarray generate_output_32(unsigned int [:,:] rle_objects, cdef long long obj_start = rle_objects.shape[1] - 2 cdef long long obj_end = rle_objects.shape[1] - 1 cdef np.ndarray start_ind = np.zeros(rle_objects.shape[0],dtype=np.uint64) - + for i in range(ndims): - start_ind += rle_objects[:,i] + start_ind += rle_objects[:,i] start_ind *= image_shape[i+1] start_ind += rle_objects[:,obj_start] @@ -810,7 +810,7 @@ cdef np.ndarray generate_output_32(unsigned int [:,:] rle_objects, fill_n(&linear_image[start_ind_memview[i]], rle_objects[i,obj_end] - rle_objects[i,obj_start], labels[i]) - + return label_image @cython.boundscheck(False) @@ -830,7 +830,7 @@ cdef unsigned int human_labels(unsigned long [:] labels) nogil: Outputs: np.uint32: Number of labels - + """ cdef Py_ssize_t size = labels.shape[0] cdef unsigned int num = 0 @@ -842,7 +842,7 @@ cdef unsigned int human_labels(unsigned long [:] labels) nogil: labels[i] = num continue labels[i] = labels[labels[i]] - + return num @cython.boundscheck(False) @@ -860,7 +860,7 @@ cdef np.ndarray label(unsigned char [:] image, rle_objects = run_length_encode_16(image,shape) else: rle_objects = run_length_encode_32(image,shape) - + # Get evaluation coordinates ndims = rle_objects.shape[1] - 2 if ndims == 1: @@ -873,7 +873,7 @@ cdef np.ndarray label(unsigned char [:] image, ind_d = ind_d[1] offsets = np.argwhere(ind_mat>0) - 1 offsets = offsets[np.argwhere(np.sum(np.absolute(offsets),axis=1)<=connectivity).squeeze(),:] - + # Adjust pixel coordinates to account for connectivity rle_objects_less_one = rle_objects.copy() rle_objects_less_one[...,rle_objects.shape[1]-1] -= 1 @@ -884,27 +884,27 @@ cdef np.ndarray label(unsigned char [:] image, rle_objects_mats.append(rle_objects_less_one) else: rle_objects_mats.append(rle_objects) - + # Get indices of higher coordinate changes rle_sparse,rle_indices = rle_index(shape,rle_objects) cdef unsigned long [:] rle_indices_memview = rle_indices cdef unsigned int [:] rle_sparse_memview = rle_sparse num_points = rle_sparse.shape[0] - 1 - + # Initalize the output cdef unsigned long[:] labels = np.arange(rle_objects.shape[0],dtype=np.uint64) - + # null value cdef unsigned long null_val = np.iinfo(np.uint64).max cdef unsigned long [:] offset_index,current_index cdef unsigned int [:,:] rle_objects_mat - cdef unsigned int [:] rle_sparse_offset_memview + cdef unsigned int [:] rle_sparse_offset_memview # Loop over the dimensions compare_time = 0 reconcile_time = 0 for d in range(offsets.shape[0]): - + rle_sparse_offset = rle_sparse.copy() o = 0 for i in range(offsets.shape[1]-1): @@ -914,7 +914,7 @@ cdef np.ndarray label(unsigned char [:] image, rle_sparse_offset_memview = rle_sparse_offset rle_objects_mat = rle_objects_mats[d] - + # Loop over points for index in range(num_points): offset_index = rle_indices_memview[rle_sparse_offset_memview[index]:rle_sparse_offset_memview[index+1]] @@ -922,18 +922,18 @@ cdef np.ndarray label(unsigned char [:] image, if offset_index[0] == null_val: continue - + compare_objects(current_index, offset_index, rle_objects_mat, labels) - + # Reconcile object labels after each offset is analyzed reconcile_labels(labels) - + # Make labels for humans num_objects = human_labels(labels) - + # Generate the output with smallest data type if num_objects < 2**8-1: label_image = generate_output_8(rle_objects,labels,shape) @@ -941,14 +941,14 @@ cdef np.ndarray label(unsigned char [:] image, label_image = generate_output_16(rle_objects,labels,shape) else: label_image = generate_output_32(rle_objects,labels,shape) - + return label_image def label_nd(image,connectivity): if connectivity == None: connectivity = image.ndim - + # Error checking assert connectivity<=image.ndim,\ "connectivity must be less than or equal to the number of image dimensions" - return label(image.reshape(-1),image.shape,connectivity) \ No newline at end of file + return label(image.reshape(-1),image.shape,connectivity) diff --git a/transforms/images/polus-ftl-label-plugin/src/main.py b/transforms/images/polus-ftl-label-plugin/src/main.py index c24c8e587..5ac9e73d1 100644 --- a/transforms/images/polus-ftl-label-plugin/src/main.py +++ b/transforms/images/polus-ftl-label-plugin/src/main.py @@ -2,35 +2,35 @@ import logging import os from pathlib import Path -from typing import List, Tuple +import ftl import numpy from bfio import BioReader from bfio import BioWriter -from preadator import ProcessManager - -import ftl from ftl_rust import PolygonSet +from preadator import ProcessManager -POLUS_LOG = getattr(logging, os.environ.get('POLUS_LOG', 'INFO')) -POLUS_EXT = os.environ.get('POLUS_EXT', '.ome.tif') # TODO: Figure out how to use this +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) +POLUS_EXT = os.environ.get("POLUS_EXT", ".ome.tif") # TODO: Figure out how to use this # Initialize the logger logging.basicConfig( - format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S', + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", ) logger = logging.getLogger("main") logger.setLevel(POLUS_LOG) def get_output_name(filename: str) -> str: - name = filename.split('.ome')[0] - return f'{name}{POLUS_EXT}' + name = filename.split(".ome")[0] + return f"{name}{POLUS_EXT}" -def filter_by_size(file_paths: List[Path], size_threshold: int) -> Tuple[List[Path], List[Path]]: - """ Partitions the input files by the memory-footprint for the images. +def filter_by_size( + file_paths: list[Path], size_threshold: int, +) -> tuple[list[Path], list[Path]]: + """Partitions the input files by the memory-footprint for the images. Args: file_paths: The list of files to partition. @@ -40,7 +40,7 @@ def filter_by_size(file_paths: List[Path], size_threshold: int) -> Tuple[List[Pa A 2-tuple of lists of paths. The first list contains small images and the second list contains large images. """ - small_files, large_files = list(), list() + small_files, large_files = [], [] threshold: int = size_threshold * 1024 * 1024 for file_path in file_paths: @@ -64,7 +64,7 @@ def filter_by_size(file_paths: List[Path], size_threshold: int) -> Tuple[List[Pa def label_cython(input_path: Path, output_path: Path, connectivity: int): - """ Label the input image and writes labels back out. + """Label the input image and writes labels back out. Args: input_path: Path to input image. @@ -76,7 +76,6 @@ def label_cython(input_path: Path, output_path: Path, connectivity: int): input_path, max_workers=active_threads.count, ) as reader: - with BioWriter( output_path, max_workers=active_threads.count, @@ -88,15 +87,15 @@ def label_cython(input_path: Path, output_path: Path, connectivity: int): if not numpy.any(image): writer.dtype = numpy.uint8 writer[:] = numpy.zeros_like(image, dtype=numpy.uint8) - return + return None - image = (image > 0) + image = image > 0 if connectivity > image.ndim: ProcessManager.log( - f'{input_path.name}: Connectivity is not less than or equal to the number of image dimensions, ' - f'skipping this image. connectivity={connectivity}, ndim={image.ndim}' + f"{input_path.name}: Connectivity is not less than or equal to the number of image dimensions, " + f"skipping this image. connectivity={connectivity}, ndim={image.ndim}", ) - return + return None # Run the labeling algorithm labels = ftl.label_nd(image, connectivity) @@ -111,55 +110,66 @@ def label_cython(input_path: Path, output_path: Path, connectivity: int): # Setup the argument parsing logger.info("Parsing arguments...") parser = argparse.ArgumentParser( - prog='main', - description='Label objects in a 2d or 3d binary image.', + prog="main", + description="Label objects in a 2d or 3d binary image.", ) parser.add_argument( - '--inpDir', dest='inpDir', type=str, required=True, - help='Input image collection to be processed by this plugin', + "--inpDir", + dest="inpDir", + type=str, + required=True, + help="Input image collection to be processed by this plugin", ) parser.add_argument( - '--connectivity', dest='connectivity', type=str, required=True, - help='City block connectivity, must be less than or equal to the number of dimensions', + "--connectivity", + dest="connectivity", + type=str, + required=True, + help="City block connectivity, must be less than or equal to the number of dimensions", ) parser.add_argument( - '--outDir', dest='outDir', type=str, required=True, - help='Output collection', + "--outDir", + dest="outDir", + type=str, + required=True, + help="Output collection", ) # Parse the arguments args = parser.parse_args() _connectivity = int(args.connectivity) - logger.info(f'connectivity = {_connectivity}') + logger.info(f"connectivity = {_connectivity}") _input_dir = Path(args.inpDir).resolve() - assert _input_dir.exists(), f'{_input_dir } does not exist.' - if _input_dir.joinpath('images').is_dir(): - _input_dir = _input_dir.joinpath('images') - logger.info(f'inpDir = {_input_dir}') + assert _input_dir.exists(), f"{_input_dir } does not exist." + if _input_dir.joinpath("images").is_dir(): + _input_dir = _input_dir.joinpath("images") + logger.info(f"inpDir = {_input_dir}") _output_dir = Path(args.outDir).resolve() - assert _output_dir.exists(), f'{_output_dir } does not exist.' - logger.info(f'outDir = {_output_dir}') + assert _output_dir.exists(), f"{_output_dir } does not exist." + logger.info(f"outDir = {_output_dir}") # We only need a thread manager since labeling and image reading/writing # release the gil ProcessManager.init_threads() # Get all file names in inpDir image collection - _files = list(filter( - lambda _file: _file.is_file() and _file.name.endswith('.ome.tif'), - _input_dir.iterdir() - )) + _files = list( + filter( + lambda _file: _file.is_file() and _file.name.endswith(".ome.tif"), + _input_dir.iterdir(), + ), + ) _small_files, _large_files = filter_by_size(_files, 500) - logger.info(f'processing {len(_files)} images in total...') - logger.info(f'processing {len(_small_files)} small images with cython...') - logger.info(f'processing {len(_large_files)} large images with rust') + logger.info(f"processing {len(_files)} images in total...") + logger.info(f"processing {len(_small_files)} small images with cython...") + logger.info(f"processing {len(_large_files)} large images with rust") if _small_files: for _infile in _small_files: diff --git a/transforms/images/polus-ftl-label-plugin/src/requirements.txt b/transforms/images/polus-ftl-label-plugin/src/requirements.txt index 17958cbd9..ee27563d6 100644 --- a/transforms/images/polus-ftl-label-plugin/src/requirements.txt +++ b/transforms/images/polus-ftl-label-plugin/src/requirements.txt @@ -3,4 +3,3 @@ preadator==0.2.0 numpy==1.21.4 bfio[all]==2.1.9 filepattern==1.4.7 - diff --git a/transforms/images/polus-ftl-label-plugin/src/setup.py b/transforms/images/polus-ftl-label-plugin/src/setup.py index 4ad66cc27..6685ea8a5 100644 --- a/transforms/images/polus-ftl-label-plugin/src/setup.py +++ b/transforms/images/polus-ftl-label-plugin/src/setup.py @@ -1,11 +1,15 @@ -from setuptools import setup -import numpy, os +import os + +import numpy from Cython.Build import cythonize from Cython.Compiler import Options +from setuptools import setup Options.annotate = True -os.environ['CFLAGS'] = '-march=haswell -O3' -os.environ['CXXFLAGS'] = '-march=haswell -O3' -setup(ext_modules=cythonize("ftl.pyx",compiler_directives={'language_level' : "3"}), - include_dirs=[numpy.get_include()]) +os.environ["CFLAGS"] = "-march=haswell -O3" +os.environ["CXXFLAGS"] = "-march=haswell -O3" +setup( + ext_modules=cythonize("ftl.pyx", compiler_directives={"language_level": "3"}), + include_dirs=[numpy.get_include()], +) diff --git a/transforms/images/polus-image-registration-plugin/Dockerfile b/transforms/images/polus-image-registration-plugin/Dockerfile index 3f94d8778..667cb2d96 100644 --- a/transforms/images/polus-image-registration-plugin/Dockerfile +++ b/transforms/images/polus-image-registration-plugin/Dockerfile @@ -1,6 +1,6 @@ FROM polusai/bfio:2.1.9 - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -18,4 +18,4 @@ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir COPY VERSION / -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/transforms/images/polus-image-registration-plugin/README.md b/transforms/images/polus-image-registration-plugin/README.md index 4aac366aa..14fa6710e 100644 --- a/transforms/images/polus-image-registration-plugin/README.md +++ b/transforms/images/polus-image-registration-plugin/README.md @@ -1,6 +1,6 @@ # Polus Projective Transformation Image Registration Plugin -WIPP Plugin Title : Image Registration Plugin +WIPP Plugin Title : Image Registration Plugin Contact [Gauhar Bains](mailto:gauhar.bains@labshare.org) for more information. @@ -8,53 +8,53 @@ For more information on WIPP, visit the [official WIPP page](https://isg.nist.go ## Description -This plugin registers an image collection. First it parses the image collection using `parser.py` into registration sets. Each registration set consists of: moving image, template image, similiar transformation images. The registration algorithm(explained in the next section, can be found in `image_registration.py`) registers the moving image with template image and stores the transformation required to do so. This stored transformation is used to transform each image in similar transformation list. +This plugin registers an image collection. First it parses the image collection using `parser.py` into registration sets. Each registration set consists of: moving image, template image, similiar transformation images. The registration algorithm(explained in the next section, can be found in `image_registration.py`) registers the moving image with template image and stores the transformation required to do so. This stored transformation is used to transform each image in similar transformation list. ## Algorithm ### To do -1. Find a better way to handle poorly correlated transforms. +1. Find a better way to handle poorly correlated transforms. 2. Find a more scalable approach to do rough transformation. The algorithm has been tested on images of size around 1.2 Gigapixel but a better approach may be needed for images significantly larger than these. -### Parsing -The parsing algorithm uses the functions from the `file_pattern utility`. It takes the following inputs : Filename pattern, registration variable, similar transformation variable. The registration variable helps determine the moving and the template images where as the similar transformation variable helps determine the similar transformation images. Note: The code produces the expected output when len(registration_variable)==len(similarity_variable)==1. The code will NOT spit out an error when the more than one variable is passed as registration or similarity variable, but additional testing needs to be done for this usecase. +### Parsing +The parsing algorithm uses the functions from the `file_pattern utility`. It takes the following inputs : Filename pattern, registration variable, similar transformation variable. The registration variable helps determine the moving and the template images where as the similar transformation variable helps determine the similar transformation images. Note: The code produces the expected output when len(registration_variable)==len(similarity_variable)==1. The code will NOT spit out an error when the more than one variable is passed as registration or similarity variable, but additional testing needs to be done for this usecase. -Some sample text files can be found in the examples folder. Short example shown below: +Some sample text files can be found in the examples folder. Short example shown below: -Parsing example : - -`Inputs:` -Filepattern : `x{xxx}_y{yyy}_z{zzz}_c{ccc}_t{ttt}.ome.tif` -Registration_variable : `t` -similar_transformation_variable : `c` -template_ : `x001_y001_z001_c001_t001.ome.tif` +Parsing example : -`Output set 1 :` -Template Image: x001_y001_z001_c001_t001.ome.tif -Moving Image: x001_y001_z001_c001_t002.ome.tif -Similar Transformation Images : [ x001_y001_z001_c002_t002.ome.tif , x001_y001_z001_c003_t002.ome.tif ] +`Inputs:` +Filepattern : `x{xxx}_y{yyy}_z{zzz}_c{ccc}_t{ttt}.ome.tif` +Registration_variable : `t` +similar_transformation_variable : `c` +template_ : `x001_y001_z001_c001_t001.ome.tif` -`Output set 2:` -Template Image: x001_y002_z001_c001_t001.ome.tif -Moving Image: x001_y002_z001_c001_t002.ome.tif -Similar Transformation Images : [ x001_y002_z001_c002_t002.ome.tif , x001_y002_z001_c003_t002.ome.tif ] +`Output set 1 :` +Template Image: x001_y001_z001_c001_t001.ome.tif +Moving Image: x001_y001_z001_c001_t002.ome.tif +Similar Transformation Images : [ x001_y001_z001_c002_t002.ome.tif , x001_y001_z001_c003_t002.ome.tif ] +`Output set 2:` +Template Image: x001_y002_z001_c001_t001.ome.tif +Moving Image: x001_y002_z001_c001_t002.ome.tif +Similar Transformation Images : [ x001_y002_z001_c002_t002.ome.tif , x001_y002_z001_c003_t002.ome.tif ] -### Registration -The registration algorithm is present in `image_registration.py`. It uses projective transformation(Homography matrix) to alter the moving image and align it with the reference image. Background information about homography can be found here: https://en.wikipedia.org/wiki/Homography . -The moving image undergoes 2 transformations: + +### Registration +The registration algorithm is present in `image_registration.py`. It uses projective transformation(Homography matrix) to alter the moving image and align it with the reference image. Background information about homography can be found here: https://en.wikipedia.org/wiki/Homography . +The moving image undergoes 2 transformations: 1. `Rough Transformation` : In this the whole moving image is transformed using the homography matrix calculated between the entire moving and template image. -2. `Fine Transformation` : To carry out fine transformation ,the homography matrix is found between the corresponding tiles of the roughly transformed moving image and the template image. Each image is divided into 4 tiles. +2. `Fine Transformation` : To carry out fine transformation ,the homography matrix is found between the corresponding tiles of the roughly transformed moving image and the template image. Each image is divided into 4 tiles. + +To find the homography matrix(for fine or rough tranformation), we need coordinates of atleast 4 matching points in the template and the moving image. To do this the ORB feature detector has been used. However, its computationally very expensive to run feature matching on large images(our test data consists of 1.3 gigapixel images). To overcome this, the homography matrix at every step of our algorithm has been calculated between scaled down versions( 16 * 16 times smaller) of the respective images. To use this homography matrix on actual sized images, the matrix is scaled up using a scale matrix. The proof for upscaling a homography matrix is shown below. -To find the homography matrix(for fine or rough tranformation), we need coordinates of atleast 4 matching points in the template and the moving image. To do this the ORB feature detector has been used. However, its computationally very expensive to run feature matching on large images(our test data consists of 1.3 gigapixel images). To overcome this, the homography matrix at every step of our algorithm has been calculated between scaled down versions( 16 * 16 times smaller) of the respective images. To use this homography matrix on actual sized images, the matrix is scaled up using a scale matrix. The proof for upscaling a homography matrix is shown below. +`Proof` : -`Proof` : +Credit for proof : https://stackoverflow.com/questions/21019338/how-to-change-the-homography-with-the-scale-of-the-image/56623249 + +![homography](https://user-images.githubusercontent.com/48079888/78402511-b04d8200-75c8-11ea-9d22-cee13f3912db.gif) -Credit for proof : https://stackoverflow.com/questions/21019338/how-to-change-the-homography-with-the-scale-of-the-image/56623249 - -![homography](https://user-images.githubusercontent.com/48079888/78402511-b04d8200-75c8-11ea-9d22-cee13f3912db.gif) - @@ -83,4 +83,3 @@ This plugin takes one input argument and one output argument: | `--template` | Template image to be used for image registration | Input | string | | `--TransformationVariable` | variable to help identify which images have similar transformation | Input | string | | `--outDir` | Output collection | Output | collection | - diff --git a/transforms/images/polus-image-registration-plugin/build-docker.sh b/transforms/images/polus-image-registration-plugin/build-docker.sh index 0809b7e34..43dd281d0 100755 --- a/transforms/images/polus-image-registration-plugin/build-docker.sh +++ b/transforms/images/polus-image-registration-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$(1 else 1 - -def get_scaled_down_images(image,scale_factor,get_max=False): """ - This function returns the scaled down version of an image. + TARGET_SIZE = 5000000 # 5 megapixels + scale_factor = np.sqrt((height * width) / TARGET_SIZE) + return int(scale_factor) if scale_factor > 1 else 1 + + +def get_scaled_down_images(image, scale_factor, get_max=False): + """This function returns the scaled down version of an image. Inputs: image : A BioReader object scale_factor : the factor by which the image needs to be scaled down Outputs: - rescaled_image: scaled down version of the input image + rescaled_image: scaled down version of the input image. """ - # Calculate scaling variables - stride = int(scale_factor * np.floor(4096/scale_factor)) - width = np.ceil(image.num_y()/scale_factor).astype(int) - height = np.ceil(image.num_x()/scale_factor).astype(int) - + stride = int(scale_factor * np.floor(4096 / scale_factor)) + width = np.ceil(image.num_y() / scale_factor).astype(int) + height = np.ceil(image.num_x() / scale_factor).astype(int) + # Initialize the output - rescaled_image = np.zeros((width,height),dtype=image._pix['type']) - + rescaled_image = np.zeros((width, height), dtype=image._pix["type"]) + # If max value is requested, initialize the variables if get_max: max_val = 0 min_val = np.inf - - def load_and_scale(X,Y,x,y,get_max=get_max,reader=image,scale_factor=scale_factor,rescaled_image=rescaled_image): - """load_and_scale Load a section of an image and downscale - + + def load_and_scale( + X, + Y, + x, + y, + get_max=get_max, + reader=image, + scale_factor=scale_factor, + rescaled_image=rescaled_image, + ): + """load_and_scale Load a section of an image and downscale. + This is a transient method, and only works within the get scaled_down_images method. It's used to thread out loading and downscaling of large images. - + """ - # Read an image tile - tile = reader.read_image(X=X,Y=Y,Z=[0,1],C=[0],T=[0]).squeeze() - + tile = reader.read_image(X=X, Y=Y, Z=[0, 1], C=[0], T=[0]).squeeze() + # Average the image for scaling - blurred_image = cv2.boxFilter(tile,-1,(scale_factor,scale_factor)) - + blurred_image = cv2.boxFilter(tile, -1, (scale_factor, scale_factor)) + # Collect pixels for downscaled image - rescaled_image[y[0]:y[1],x[0]:x[1]] = blurred_image[::scale_factor,::scale_factor] - + rescaled_image[y[0] : y[1], x[0] : x[1]] = blurred_image[ + ::scale_factor, ::scale_factor, + ] + if get_max: - return np.max(tile),np.min(tile) + return np.max(tile), np.min(tile) else: return None - + # Load and downscale the image threads = [] - with ThreadPoolExecutor(max([cpu_count()//2,1])) as executor: - for x in range(0,image.num_x(),stride): - x_max = np.min([x+stride,image.num_x()]) # max x to load - xi = int(x//scale_factor) # initial scaled x-index - xe = int(np.ceil(x_max/scale_factor)) # ending scaled x-index - for y in range(0,image.num_y(),stride): - y_max = np.min([y+stride,image.num_y()]) # max y to load - yi = int(y//scale_factor) # initial scaled y-index - ye = int(np.ceil(y_max/scale_factor)) # ending scaled y-index - - threads.append(executor.submit(load_and_scale,[x,x_max],[y,y_max],[xi,xe],[yi,ye])) - + with ThreadPoolExecutor(max([cpu_count() // 2, 1])) as executor: + for x in range(0, image.num_x(), stride): + x_max = np.min([x + stride, image.num_x()]) # max x to load + xi = int(x // scale_factor) # initial scaled x-index + xe = int(np.ceil(x_max / scale_factor)) # ending scaled x-index + for y in range(0, image.num_y(), stride): + y_max = np.min([y + stride, image.num_y()]) # max y to load + yi = int(y // scale_factor) # initial scaled y-index + ye = int(np.ceil(y_max / scale_factor)) # ending scaled y-index + + threads.append( + executor.submit( + load_and_scale, [x, x_max], [y, y_max], [xi, xe], [yi, ye], + ), + ) + # Return max and min values if requested if get_max: results = [thread.result() for thread in threads] max_val = max(result[0] for result in results) min_val = min(result[1] for result in results) - return rescaled_image,max_val,min_val + return rescaled_image, max_val, min_val else: return rescaled_image -def register_image(br_ref,br_mov,bw,Xt,Yt,Xm,Ym,x,y,X_crop,Y_crop,max_val,min_val,method): - """register_image Register one section of two images + +def register_image( + br_ref, br_mov, bw, Xt, Yt, Xm, Ym, x, y, X_crop, Y_crop, max_val, min_val, method, +): + """register_image Register one section of two images. This method is designed to be used within a thread. It registers one section of two different images, saves the output, and returns the homography matrix used to transform the image. """ - # Load a section of the reference and moving images - ref_tile = br_ref.read_image(X=[Xt[0],Xt[1]],Y=[Yt[0],Yt[1]],Z=[0,1],C=[0],T=[0]).squeeze() - mov_tile = br_mov.read_image(X=[Xm[0],Xm[1]],Y=[Ym[0],Ym[1]],Z=[0,1],C=[0],T=[0]).squeeze() - + ref_tile = br_ref.read_image( + X=[Xt[0], Xt[1]], Y=[Yt[0], Yt[1]], Z=[0, 1], C=[0], T=[0], + ).squeeze() + mov_tile = br_mov.read_image( + X=[Xm[0], Xm[1]], Y=[Ym[0], Ym[1]], Z=[0, 1], C=[0], T=[0], + ).squeeze() + # Get the transformation matrix - projective_transform = get_transform(mov_tile,ref_tile,max_val,min_val,method) - + projective_transform = get_transform(mov_tile, ref_tile, max_val, min_val, method) + # Use the rough transformation matrix if no matrix was returned is_rough = False - if not isinstance(projective_transform,np.ndarray): + if not isinstance(projective_transform, np.ndarray): is_rough = True projective_transform = Rough_Homography_Upscaled - + # Transform the moving image - if method=='Projective': - transformed_image = cv2.warpPerspective(mov_tile,projective_transform,(Xt[1]-Xt[0],Yt[1]-Yt[0])) + if method == "Projective": + transformed_image = cv2.warpPerspective( + mov_tile, projective_transform, (Xt[1] - Xt[0], Yt[1] - Yt[0]), + ) else: - transformed_image = cv2.warpAffine(mov_tile,projective_transform,(Xt[1]-Xt[0],Yt[1]-Yt[0])) - + transformed_image = cv2.warpAffine( + mov_tile, projective_transform, (Xt[1] - Xt[0], Yt[1] - Yt[0]), + ) + # Determine the correlation between the reference and transformed moving image - corr = corr2(ref_tile,transformed_image) - + corr = corr2(ref_tile, transformed_image) + # If the correlation is bad, try using the rough transform instead if corr < 0.4 and not is_rough: - if method=='Projective': - transformed_image = cv2.warpPerspective(mov_tile,Rough_Homography_Upscaled,(Xt[1]-Xt[0],Yt[1]-Yt[0])) + if method == "Projective": + transformed_image = cv2.warpPerspective( + mov_tile, Rough_Homography_Upscaled, (Xt[1] - Xt[0], Yt[1] - Yt[0]), + ) else: - transformed_image = cv2.warpAffine(mov_tile,Rough_Homography_Upscaled,(Xt[1]-Xt[0],Yt[1]-Yt[0])) - projective_transform = Rough_Homography_Upscaled - + transformed_image = cv2.warpAffine( + mov_tile, Rough_Homography_Upscaled, (Xt[1] - Xt[0], Yt[1] - Yt[0]), + ) + projective_transform = Rough_Homography_Upscaled + # Write the transformed moving image - bw.write_image(transformed_image[Y_crop[0]:Y_crop[1],X_crop[0]:X_crop[1],np.newaxis,np.newaxis,np.newaxis],X=[x],Y=[y]) - + bw.write_image( + transformed_image[ + Y_crop[0] : Y_crop[1], + X_crop[0] : X_crop[1], + np.newaxis, + np.newaxis, + np.newaxis, + ], + X=[x], + Y=[y], + ) + return projective_transform -def apply_transform(br_mov,bw,tiles,shape,transform,method): - """apply_transform Apply a transform to an image + +def apply_transform(br_mov, bw, tiles, shape, transform, method): + """apply_transform Apply a transform to an image. This method is designed to be used within a thread. It loads a section of an image, applies a transform, and saves the transformed image to file. """ - # Get the tile indices - Xm,Ym,Xt,Yt = tiles - + Xm, Ym, Xt, Yt = tiles + # Read the moving image tile - mov_tile = br_mov.read_image(X=[Xm[0],Xm[1]],Y=[Ym[0],Ym[1]],Z=[0,1],C=[0],T=[0]).squeeze() - + mov_tile = br_mov.read_image( + X=[Xm[0], Xm[1]], Y=[Ym[0], Ym[1]], Z=[0, 1], C=[0], T=[0], + ).squeeze() + # Get the image coordinates and shape - x,y,X_crop,Y_crop = shape - + x, y, X_crop, Y_crop = shape + # Transform the moving image - if method=='Projective': - transformed_image = cv2.warpPerspective(mov_tile,transform,(Xt[1]-Xt[0],Yt[1]-Yt[0])) + if method == "Projective": + transformed_image = cv2.warpPerspective( + mov_tile, transform, (Xt[1] - Xt[0], Yt[1] - Yt[0]), + ) else: - transformed_image = cv2.warpAffine(mov_tile,transform,(Xt[1]-Xt[0],Yt[1]-Yt[0])) - + transformed_image = cv2.warpAffine( + mov_tile, transform, (Xt[1] - Xt[0], Yt[1] - Yt[0]), + ) + # Write the transformed image to the output file - bw.write_image(transformed_image[Y_crop[0]:Y_crop[1],X_crop[0]:X_crop[1],np.newaxis,np.newaxis,np.newaxis],[x],[y]) + bw.write_image( + transformed_image[ + Y_crop[0] : Y_crop[1], + X_crop[0] : X_crop[1], + np.newaxis, + np.newaxis, + np.newaxis, + ], + [x], + [y], + ) -if __name__=="__main__": - + +if __name__ == "__main__": # Initialize the logger - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("image_registration.py") logger.setLevel(logging.INFO) # Setup the argument parsing logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='imageRegistration', description='This script registers an image collection') - parser.add_argument('--registrationString', dest='registration_string', type=str, required=True) - parser.add_argument('--similarTransformationString', dest='similar_transformation_string', type=str, required=True) - parser.add_argument('--outDir', dest='outDir', type=str, required=True) - parser.add_argument('--template', dest='template', type=str, required=True) - parser.add_argument('--method', dest='method', type=str, required=True) - - # parse the arguments + parser = argparse.ArgumentParser( + prog="imageRegistration", + description="This script registers an image collection", + ) + parser.add_argument( + "--registrationString", dest="registration_string", type=str, required=True, + ) + parser.add_argument( + "--similarTransformationString", + dest="similar_transformation_string", + type=str, + required=True, + ) + parser.add_argument("--outDir", dest="outDir", type=str, required=True) + parser.add_argument("--template", dest="template", type=str, required=True) + parser.add_argument("--method", dest="method", type=str, required=True) + + # parse the arguments args = parser.parse_args() registration_string = args.registration_string similar_transformation_string = args.similar_transformation_string - outDir = args.outDir + outDir = args.outDir template = args.template method = args.method - + # Set up the number of threads for each task - read_workers = max([cpu_count()//3,1]) - write_workers = max([cpu_count()-1,2]) - loop_workers = max([3*cpu_count()//4,2]) - + read_workers = max([cpu_count() // 3, 1]) + write_workers = max([cpu_count() - 1, 2]) + loop_workers = max([3 * cpu_count() // 4, 2]) + # extract filenames from registration_string and similar_transformation_string - registration_set=registration_string.split() - similar_transformation_set=similar_transformation_string.split() - - filename_len=len(template) - + registration_set = registration_string.split() + similar_transformation_set = similar_transformation_string.split() + + filename_len = len(template) + # seperate the filename of the moving image from the complete path - moving_image_name=registration_set[1][-1*filename_len:] - + moving_image_name = registration_set[1][-1 * filename_len :] + # read and downscale reference image - br_ref = BioReader(registration_set[0],max_workers=write_workers) - scale_factor=get_scale_factor(br_ref.num_y(),br_ref.num_x()) - logger.info('Scale factor: {}'.format(scale_factor)) - + br_ref = BioReader(registration_set[0], max_workers=write_workers) + scale_factor = get_scale_factor(br_ref.num_y(), br_ref.num_x()) + logger.info(f"Scale factor: {scale_factor}") + # intialize the scale factor and scale matrix(to be used to upscale the transformation matrices) - if method == 'Projective': - scale_matrix = np.array([[1,1,scale_factor],[1,1,scale_factor],[1/scale_factor,1/scale_factor,1]]) + if method == "Projective": + scale_matrix = np.array( + [ + [1, 1, scale_factor], + [1, 1, scale_factor], + [1 / scale_factor, 1 / scale_factor, 1], + ], + ) else: - scale_matrix = np.array([[1/scale_factor,1/scale_factor,1],[1/scale_factor,1/scale_factor,1]]) - - logger.info('Reading and downscaling reference image: {}'.format(Path(registration_set[0]).name)) - reference_image_downscaled,max_val,min_val = get_scaled_down_images(br_ref,scale_factor,get_max=True) + scale_matrix = np.array( + [ + [1 / scale_factor, 1 / scale_factor, 1], + [1 / scale_factor, 1 / scale_factor, 1], + ], + ) + + logger.info( + "Reading and downscaling reference image: {}".format( + Path(registration_set[0]).name, + ), + ) + reference_image_downscaled, max_val, min_val = get_scaled_down_images( + br_ref, scale_factor, get_max=True, + ) br_ref.max_workers = read_workers - + # read moving image - logger.info('Reading and downscaling moving image: {}'.format(Path(registration_set[1]).name)) - br_mov = BioReader(registration_set[1],max_workers=write_workers) - moving_image_downscaled = get_scaled_down_images(br_mov,scale_factor) + logger.info( + "Reading and downscaling moving image: {}".format( + Path(registration_set[1]).name, + ), + ) + br_mov = BioReader(registration_set[1], max_workers=write_workers) + moving_image_downscaled = get_scaled_down_images(br_mov, scale_factor) br_mov.max_workers = read_workers - + # calculate rough transformation between scaled down reference and moving image logger.info("calculating rough homography...") - Rough_Homography_Downscaled = get_transform(moving_image_downscaled, - reference_image_downscaled, - max_val, - min_val, - method) - + Rough_Homography_Downscaled = get_transform( + moving_image_downscaled, reference_image_downscaled, max_val, min_val, method, + ) + # upscale the rough homography matrix logger.info("Inverting homography...") - if method=='Projective': - Rough_Homography_Upscaled=Rough_Homography_Downscaled*scale_matrix - homography_inverse=np.linalg.inv(Rough_Homography_Upscaled) + if method == "Projective": + Rough_Homography_Upscaled = Rough_Homography_Downscaled * scale_matrix + homography_inverse = np.linalg.inv(Rough_Homography_Upscaled) else: - Rough_Homography_Upscaled=Rough_Homography_Downscaled - homography_inverse=cv2.invertAffineTransform(Rough_Homography_Downscaled) - + Rough_Homography_Upscaled = Rough_Homography_Downscaled + homography_inverse = cv2.invertAffineTransform(Rough_Homography_Downscaled) + # Initialize the output file - bw = BioWriter(str(Path(outDir).joinpath(Path(registration_set[1]).name)),metadata=br_mov.read_metadata(),max_workers=write_workers) + bw = BioWriter( + str(Path(outDir).joinpath(Path(registration_set[1]).name)), + metadata=br_mov.read_metadata(), + max_workers=write_workers, + ) bw.num_x(br_ref.num_x()) bw.num_y(br_ref.num_y()) bw.num_z(1) bw.num_c(1) bw.num_t(1) - + # transformation variables reg_shape = [] reg_tiles = [] reg_homography = [] - + # Loop through image tiles and start threads logger.info("Starting threads...") threads = [] first_tile = True with ThreadPoolExecutor(max_workers=loop_workers) as executor: - for x in range(0,br_ref.num_x(),2048): - for y in range(0,br_ref.num_y(),2048): - + for x in range(0, br_ref.num_x(), 2048): + for y in range(0, br_ref.num_y(), 2048): # Get reference/template image coordinates - Xt = [np.max([0,x-1024]),np.min([br_ref.num_x(),x+2048+1024])] - Yt = [np.max([0,y-1024]),np.min([br_ref.num_y(),y+2048+1024])] - + Xt = [np.max([0, x - 1024]), np.min([br_ref.num_x(), x + 2048 + 1024])] + Yt = [np.max([0, y - 1024]), np.min([br_ref.num_y(), y + 2048 + 1024])] + # Use the rough homography to get coordinates in the moving image - coords = np.array([[Xt[0],Xt[0],Xt[1],Xt[1]], - [Yt[0],Yt[1],Yt[1],Yt[0]], - [1,1,1,1]], - dtype=np.float64) - - coords = np.matmul(homography_inverse,coords) - - mins = np.min(coords,axis=1) - maxs = np.max(coords,axis=1) - - Xm = [int(np.floor(np.max([mins[0],0]))), - int(np.ceil(np.min([maxs[0],br_mov.num_x()])))] - Ym = [int(np.floor(np.max([mins[1],0]))), - int(np.ceil(np.min([maxs[1],br_mov.num_y()])))] - - reg_tiles.append((Xm,Ym,Xt,Yt)) - + coords = np.array( + [ + [Xt[0], Xt[0], Xt[1], Xt[1]], + [Yt[0], Yt[1], Yt[1], Yt[0]], + [1, 1, 1, 1], + ], + dtype=np.float64, + ) + + coords = np.matmul(homography_inverse, coords) + + mins = np.min(coords, axis=1) + maxs = np.max(coords, axis=1) + + Xm = [ + int(np.floor(np.max([mins[0], 0]))), + int(np.ceil(np.min([maxs[0], br_mov.num_x()]))), + ] + Ym = [ + int(np.floor(np.max([mins[1], 0]))), + int(np.ceil(np.min([maxs[1], br_mov.num_y()]))), + ] + + reg_tiles.append((Xm, Ym, Xt, Yt)) + # Get cropping dimensions X_crop = [1024 if Xt[0] > 0 else 0] - X_crop.append(2048+X_crop[0] if Xt[1]-Xt[0] >= 3072 else Xt[1]-Xt[0]+X_crop[0]) + X_crop.append( + 2048 + X_crop[0] + if Xt[1] - Xt[0] >= 3072 + else Xt[1] - Xt[0] + X_crop[0], + ) Y_crop = [1024 if Yt[0] > 0 else 0] - Y_crop.append(2048+Y_crop[0] if Yt[1]-Yt[0] >= 3072 else Yt[1]-Yt[0]+Y_crop[0]) - reg_shape.append((x,y,X_crop,Y_crop)) - + Y_crop.append( + 2048 + Y_crop[0] + if Yt[1] - Yt[0] >= 3072 + else Yt[1] - Yt[0] + Y_crop[0], + ) + reg_shape.append((x, y, X_crop, Y_crop)) + # Start a thread to register the tiles - threads.append(executor.submit(register_image,br_ref,br_mov,bw,Xt,Yt,Xm,Ym,x,y,X_crop,Y_crop,max_val,min_val,method)) - + threads.append( + executor.submit( + register_image, + br_ref, + br_mov, + bw, + Xt, + Yt, + Xm, + Ym, + x, + y, + X_crop, + Y_crop, + max_val, + min_val, + method, + ), + ) + # Bioformats require the first tile be written before any other tile if first_tile: - logger.info('Waiting for first_tile to finish...') + logger.info("Waiting for first_tile to finish...") first_tile = False threads[0].result() - + # Wait for threads to finish, track progress for thread_num in range(len(threads)): if thread_num % 10 == 0: - logger.info('Registration progress: {:6.2f}%'.format(100*thread_num/len(threads))) + logger.info( + "Registration progress: {:6.2f}%".format( + 100 * thread_num / len(threads), + ), + ) reg_homography.append(threads[thread_num].result()) - + # Close the image bw.close_image() - logger.info('Registration progress: {:6.2f}%'.format(100.0)) - + logger.info(f"Registration progress: {100.0:6.2f}%") + # iterate across all images which have the similar transformation as the moving image above for moving_image_path in similar_transformation_set: - # seperate image name from the path to it - moving_image_name=moving_image_path[-1*filename_len:] - - logger.info('Applying registration to image: {}'.format(moving_image_name)) - - br_mov = BioReader(moving_image_path,max_workers=read_workers) - - bw = BioWriter(str(Path(outDir).joinpath(moving_image_name)), metadata=br_mov.read_metadata(),max_workers=write_workers) + moving_image_name = moving_image_path[-1 * filename_len :] + + logger.info(f"Applying registration to image: {moving_image_name}") + + br_mov = BioReader(moving_image_path, max_workers=read_workers) + + bw = BioWriter( + str(Path(outDir).joinpath(moving_image_name)), + metadata=br_mov.read_metadata(), + max_workers=write_workers, + ) bw.num_x(br_ref.num_x()) bw.num_y(br_ref.num_y()) bw.num_z(1) bw.num_c(1) bw.num_t(1) - + # Apply transformation to remaining images - logger.info('Transformation progress: {:5.2f}%'.format(0.0)) + logger.info(f"Transformation progress: {0.0:5.2f}%") threads = [] with ThreadPoolExecutor(loop_workers) as executor: first_tile = True - for tile,shape,transform in zip(reg_tiles,reg_shape,reg_homography): - + for tile, shape, transform in zip(reg_tiles, reg_shape, reg_homography): # Start transformation threads - threads.append(executor.submit(apply_transform,br_mov,bw,tile,shape,transform,method)) - + threads.append( + executor.submit( + apply_transform, br_mov, bw, tile, shape, transform, method, + ), + ) + # The first tile must be written before all other tiles if first_tile: first_tile = False threads[0].result() - # Wait for threads to finish and track progress + # Wait for threads to finish and track progress for thread_num in range(len(threads)): if thread_num % 10 == 0: - logger.info('Transformation progress: {:6.2f}%'.format(100*thread_num/len(threads))) + logger.info( + "Transformation progress: {:6.2f}%".format( + 100 * thread_num / len(threads), + ), + ) threads[thread_num].result() - logger.info('Transformation progress: {:6.2f}%'.format(100.0)) - + logger.info(f"Transformation progress: {100.0:6.2f}%") + bw.close_image() - \ No newline at end of file diff --git a/transforms/images/polus-image-registration-plugin/src/log4j.properties b/transforms/images/polus-image-registration-plugin/src/log4j.properties index b6682b3d8..bdcc5504b 100644 --- a/transforms/images/polus-image-registration-plugin/src/log4j.properties +++ b/transforms/images/polus-image-registration-plugin/src/log4j.properties @@ -9,4 +9,4 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n diff --git a/transforms/images/polus-image-registration-plugin/src/main.py b/transforms/images/polus-image-registration-plugin/src/main.py index 8c6e05a92..a3e972431 100644 --- a/transforms/images/polus-image-registration-plugin/src/main.py +++ b/transforms/images/polus-image-registration-plugin/src/main.py @@ -1,78 +1,135 @@ -import argparse, logging, subprocess -import numpy as np -from pathlib import Path -from parser import parse_collection +import argparse +import logging import shutil +import subprocess +from parser import parse_collection +from pathlib import Path - -if __name__=="__main__": +if __name__ == "__main__": # Initialize the logger - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) # Setup the argument parsing logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='This plugin registers an image collection') - parser.add_argument('--filePattern', dest='filePattern', type=str, help='Filename pattern used to separate data', required=True) - parser.add_argument('--inpDir', dest='inpDir', type=str,help='Input image collection to be processed by this plugin', required=True) - parser.add_argument('--registrationVariable', dest='registrationVariable', type=str, help='variable to help identify which images need to be registered to each other', required=True) - parser.add_argument('--template', dest='template', type=str, help='Template image to be used for image registration', required=True) - parser.add_argument('--TransformationVariable', dest='TransformationVariable', type=str,help='variable to help identify which images have similar transformation', required=True) - parser.add_argument('--outDir', dest='outDir', type=str, help='Output collection', required=True) - parser.add_argument('--method', dest='method', type=str, help='projective, affine, or partialaffine', required=True) - - # Parse the arguments + parser = argparse.ArgumentParser( + prog="main", description="This plugin registers an image collection", + ) + parser.add_argument( + "--filePattern", + dest="filePattern", + type=str, + help="Filename pattern used to separate data", + required=True, + ) + parser.add_argument( + "--inpDir", + dest="inpDir", + type=str, + help="Input image collection to be processed by this plugin", + required=True, + ) + parser.add_argument( + "--registrationVariable", + dest="registrationVariable", + type=str, + help="variable to help identify which images need to be registered to each other", + required=True, + ) + parser.add_argument( + "--template", + dest="template", + type=str, + help="Template image to be used for image registration", + required=True, + ) + parser.add_argument( + "--TransformationVariable", + dest="TransformationVariable", + type=str, + help="variable to help identify which images have similar transformation", + required=True, + ) + parser.add_argument( + "--outDir", dest="outDir", type=str, help="Output collection", required=True, + ) + parser.add_argument( + "--method", + dest="method", + type=str, + help="projective, affine, or partialaffine", + required=True, + ) + + # Parse the arguments args = parser.parse_args() filePattern = args.filePattern - logger.info('filePattern = {}'.format(filePattern)) - + logger.info(f"filePattern = {filePattern}") + inpDir = args.inpDir # check if images folder is present in the input directory - if (Path.is_dir(Path(inpDir).joinpath('images'))): - inpDir= str(Path(inpDir).joinpath('images')) - - logger.info('inpDir = {}'.format(inpDir)) + if Path.is_dir(Path(inpDir).joinpath("images")): + inpDir = str(Path(inpDir).joinpath("images")) + + logger.info(f"inpDir = {inpDir}") registrationVariable = args.registrationVariable - logger.info('registrationVariable = {}'.format(registrationVariable)) + logger.info(f"registrationVariable = {registrationVariable}") template = args.template - logger.info('template = {}'.format(template)) + logger.info(f"template = {template}") TransformationVariable = args.TransformationVariable - logger.info('TransformationVariable = {}'.format(TransformationVariable)) + logger.info(f"TransformationVariable = {TransformationVariable}") outDir = args.outDir - logger.info('outDir = {}'.format(outDir)) + logger.info(f"outDir = {outDir}") method = args.method - logger.info('method = {}'.format(method)) - - # get template image path - template_image_path=str(Path(inpDir).joinpath(template).absolute()) - + logger.info(f"method = {method}") + + # get template image path + template_image_path = str(Path(inpDir).joinpath(template).absolute()) + # filename len - filename_len= len(template) - + filename_len = len(template) + # parse the input collection - logger.info('Parsing the input collection and getting registration_dictionary') - registration_dictionary=parse_collection(inpDir,filePattern,registrationVariable, TransformationVariable, template_image_path) - - logger.info('Iterating over registration_dictionary....') - for registration_set,similar_transformation_set in registration_dictionary.items(): - + logger.info("Parsing the input collection and getting registration_dictionary") + registration_dictionary = parse_collection( + inpDir, + filePattern, + registrationVariable, + TransformationVariable, + template_image_path, + ) + + logger.info("Iterating over registration_dictionary....") + for registration_set, similar_transformation_set in registration_dictionary.items(): # registration_dictionary consists of set of already registered images as well - if registration_set[0]==registration_set[1]: - similar_transformation_set=similar_transformation_set.tolist() + if registration_set[0] == registration_set[1]: + similar_transformation_set = similar_transformation_set.tolist() similar_transformation_set.append(registration_set[0]) for image_path in similar_transformation_set: - image_name=image_path[-1*filename_len:] - logger.info('Copying image {} to output directory'.format(image_name)) - shutil.copy2(image_path,str(Path(outDir).joinpath(image_name).absolute())) + image_name = image_path[-1 * filename_len :] + logger.info(f"Copying image {image_name} to output directory") + shutil.copy2( + image_path, str(Path(outDir).joinpath(image_name).absolute()), + ) continue - + # concatenate lists into a string to pass as an argument to argparse - registration_string=' '.join(registration_set) - similar_transformation_string=' '.join(similar_transformation_set) + registration_string = " ".join(registration_set) + similar_transformation_string = " ".join(similar_transformation_set) # open subprocess image_registration.py - registration = subprocess.Popen("python3 image_registration.py --registrationString '{}' --similarTransformationString '{}' --outDir '{}' --template '{}' --method '{}'".format(registration_string,similar_transformation_string,outDir,template,method), shell=True ) + registration = subprocess.Popen( + "python3 image_registration.py --registrationString '{}' --similarTransformationString '{}' --outDir '{}' --template '{}' --method '{}'".format( + registration_string, + similar_transformation_string, + outDir, + template, + method, + ), + shell=True, + ) registration.wait() - \ No newline at end of file diff --git a/transforms/images/polus-image-registration-plugin/src/parser.py b/transforms/images/polus-image-registration-plugin/src/parser.py index 5b7560fa5..514ddda23 100755 --- a/transforms/images/polus-image-registration-plugin/src/parser.py +++ b/transforms/images/polus-image-registration-plugin/src/parser.py @@ -1,117 +1,120 @@ -from filepattern import FilePattern, parse_directory,get_matching,parse_filename,get_regex -import os import itertools + import numpy as np +from filepattern import FilePattern +from filepattern import get_regex +from filepattern import parse_directory -def parse_collection(directory_path,file_pattern,registration_variable, similarity_variable, template_image): - - """ - This function parses the input directory and returns a dictionary. Each key in the dictionary is a tuple - consisting of a template and a moving image. The value corresponding to each key is a list of images that have - similar transformation as the moving image. +def parse_collection( + directory_path, + file_pattern, + registration_variable, + similarity_variable, + template_image, +): + """This function parses the input directory and returns a dictionary. Each key in the dictionary is a tuple + consisting of a template and a moving image. The value corresponding to each key is a list of images that have + similar transformation as the moving image. Note: The code produces the expected output when len(registration_variable)==len(similarity_variable)==1. The code will NOT spit out an error when the more than one variable is passed as registration or similarity variable, but additional testing needs to be done to validate the script for this usecase. - - inputs : + + inputs : directory_path: path to the input collection file_pattern: file name pattern of the input images registration_variable : variable to help determine the set of moving and template images similarity variable: variable to help determine the set of images having a similar transformation corresponding to each set of moving and template images template_image: name of a template image - - outputs : result_dic + + outputs : result_dic example of result_dic is shown below - + result_dic = { (template_img1 : moving_img1) : [set1_img1,set1_img2, set1_img3....], (template_img2 : moving_img2) : [set2_img1,set2_img2, set2_img3....], . . } - - + + """ - - # Predefined variables order - #var_order = 'rtczyx' - + # Predefined variables order + # get all variables in the file pattern - _,variables=get_regex(file_pattern) - + _, variables = get_regex(file_pattern) + # get variables except the registration and similarity variable - moving_variables=[var for var in variables if var not in registration_variable and var not in similarity_variable] - + moving_variables = [ + var + for var in variables + if var not in registration_variable and var not in similarity_variable + ] + # uvals is dictionary with all the possible variables as key - # corresponding to each key is a list of all values which that variable can take for the input collection - _, uvals=parse_directory(directory_path,file_pattern) - - parser_object=FilePattern(directory_path,file_pattern) - - image_set=[] - - # extract the index values from uvals for each variable in moving_variables - moving_variables_set=[uvals[var] for var in moving_variables] - + # corresponding to each key is a list of all values which that variable can take for the input collection + _, uvals = parse_directory(directory_path, file_pattern) + + parser_object = FilePattern(directory_path, file_pattern) + + image_set = [] + + # extract the index values from uvals for each variable in moving_variables + moving_variables_set = [uvals[var] for var in moving_variables] + # iterate over the similar transformation variables # Code produced expected output when len(registration_variable)==len(similarity_variable)==1 # refer to function description for char in similarity_variable: - # append the variable to the moving variable set - moving_variables.append(char) - + moving_variables.append(char) + # iterate over all possible index values of the similar transf. variable for ind in uvals[char]: - registration_set=[] - + registration_set = [] + # append the fixed value of the index to the moving variables set - moving_variables_set.append([ind]) - + moving_variables_set.append([ind]) + # get all the possible combinations of the index values in the moving variables set - registration_indices_combinations=list(itertools.product(*moving_variables_set)) - all_dicts=[] - - # iterate over all combinations and create a dictionary for each combination - # the dictionary is of the form {'C'=1, 'X'=2...etc} which can be used as an input + registration_indices_combinations = list( + itertools.product(*moving_variables_set), + ) + all_dicts = [] + + # iterate over all combinations and create a dictionary for each combination + # the dictionary is of the form {'C'=1, 'X'=2...etc} which can be used as an input # to the get_matching() function for index_comb in registration_indices_combinations: - inter_dict={} + inter_dict = {} for i in range(len(moving_variables)): - inter_dict.update({moving_variables[i].upper():index_comb[i]}) + inter_dict.update({moving_variables[i].upper(): index_comb[i]}) # store all dictionaries - all_dicts.append(inter_dict) - + all_dicts.append(inter_dict) + # iterate over all dictionaries for reg_dict in all_dicts: - intermediate_set=[] + intermediate_set = [] # use get_matching function to get all filenames with defined variable values in the dictionary - files=parser_object.get_matching(**reg_dict) - - # files is a list of dictionaries + files = parser_object.get_matching(**reg_dict) + + # files is a list of dictionaries for file_dict in files: - intermediate_set.append(file_dict['file']) - registration_set.append(intermediate_set) - + intermediate_set.append(file_dict["file"]) + registration_set.append(intermediate_set) + # delete the fixed index value of the similar transf. variable to prepare for the next iteration moving_variables_set.pop(-1) image_set.append(registration_set) - - - # parse image set to form the result dictionary - result_dic={} - old_set=np.array(image_set) + + # parse image set to form the result dictionary + result_dic = {} + old_set = np.array(image_set) for j in range(old_set.shape[1]): - inter=old_set[:,j,:] + inter = old_set[:, j, :] for k in range(inter.shape[1]): - ky=(inter[0,0],inter[0,k]) - items=inter[1:,k] - result_dic.update({ky:items}) - - return result_dic - + ky = (inter[0, 0], inter[0, k]) + items = inter[1:, k] + result_dic.update({ky: items}) - - - \ No newline at end of file + return result_dic diff --git a/transforms/images/polus-intensity-projection-plugin/README.md b/transforms/images/polus-intensity-projection-plugin/README.md index f212eabda..d8bf2e5f2 100644 --- a/transforms/images/polus-intensity-projection-plugin/README.md +++ b/transforms/images/polus-intensity-projection-plugin/README.md @@ -2,16 +2,16 @@ This WIPP plugin calculates the volumetric intensity projection of a 3d image along the z-direction(depth). The following types of intensity projections have -been implemented: +been implemented: -1. Maximum: -2. Minimum -3. Mean +1. Maximum: +2. Minimum +3. Mean ``` Example: Consider an input image of size: (x,y,z). If the user chooses the option `max`, the code will calculate the value of the maximum intensity value along the z-direction for every x,y position. The output will be a 2d image of -size (x,y). +size (x,y). ``` Contact [Gauhar Bains](mailto:gauhar.bains@labshare.org) for more information. @@ -37,4 +37,3 @@ This plugin takes one input argument and one output argument: | `--inpDir` | Input image collection to be processed | Input | collection | | `--projectionType` | Type of volumetric intensity projection | Input | string | | `--outDir` | Output collection | Output | collection | - diff --git a/transforms/images/polus-intensity-projection-plugin/build-docker.sh b/transforms/images/polus-intensity-projection-plugin/build-docker.sh index b6e5a9e8e..b018e0e09 100755 --- a/transforms/images/polus-intensity-projection-plugin/build-docker.sh +++ b/transforms/images/polus-intensity-projection-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: - """ Main execution function + """Main execution function. Args: input_dir: path to directory containing the input images. @@ -29,85 +30,89 @@ def main( light_background: whether the image has a light or dark background. output_dir: path to directory where to store the output images. """ - for in_path in input_dir.iterdir(): in_path = Path(in_path) out_path = Path(output_dir).joinpath(in_path.name) # Load the input image with BioReader(in_path) as reader: - logger.info(f'Working on {in_path.name} with shape {reader.shape}') + logger.info(f"Working on {in_path.name} with shape {reader.shape}") # Initialize the output image - with BioWriter(out_path, metadata=reader.metadata, max_workers=cpu_count()) as writer: + with BioWriter( + out_path, metadata=reader.metadata, max_workers=cpu_count(), + ) as writer: rolling_ball( reader=reader, writer=writer, ball_radius=ball_radius, light_background=light_background, ) - return if __name__ == "__main__": - """ Argument parsing """ + """Argument parsing""" logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='A WIPP plugin to perform background subtraction using the rolling-ball algorithm.') - + parser = argparse.ArgumentParser( + prog="main", + description="A WIPP plugin to perform background subtraction using the rolling-ball algorithm.", + ) + # Input arguments parser.add_argument( - '--inputDir', - dest='input_dir', + "--inputDir", + dest="input_dir", type=str, - help='Input image collection to be processed by this plugin.', + help="Input image collection to be processed by this plugin.", required=True, ) parser.add_argument( - '--ballRadius', - dest='ball_radius', + "--ballRadius", + dest="ball_radius", type=str, - default='25', - help='Radius of the ball used to perform background subtraction.', + default="25", + help="Radius of the ball used to perform background subtraction.", required=False, ) parser.add_argument( - '--lightBackground', - dest='light_background', + "--lightBackground", + dest="light_background", type=str, - default='false', - help='Whether the image has a light or dark background.', + default="false", + help="Whether the image has a light or dark background.", required=False, ) # Output arguments parser.add_argument( - '--outputDir', - dest='output_dir', + "--outputDir", + dest="output_dir", type=str, - help='Output collection', + help="Output collection", required=True, ) - + # Parse the arguments args = parser.parse_args() _input_dir = Path(args.input_dir).resolve() - if _input_dir.joinpath('images').is_dir(): + if _input_dir.joinpath("images").is_dir(): # switch to images folder if present - _input_dir = _input_dir.joinpath('images').resolve() - logger.info(f'inputDir = {_input_dir}') + _input_dir = _input_dir.joinpath("images").resolve() + logger.info(f"inputDir = {_input_dir}") _ball_radius = int(args.ball_radius) - logger.info(f'ballRadius = {_ball_radius}') + logger.info(f"ballRadius = {_ball_radius}") _light_background = args.light_background - if _light_background in {'true', 'false'}: - _light_background = (_light_background == 'true') + if _light_background in {"true", "false"}: + _light_background = _light_background == "true" else: - raise ValueError(f'lightBackground must be either \'true\' or \'false\'') - logger.info(f'lightBackground = {_light_background}') + msg = "lightBackground must be either 'true' or 'false'" + raise ValueError(msg) + logger.info(f"lightBackground = {_light_background}") _output_dir = args.output_dir - logger.info(f'outputDir = {_output_dir}') + logger.info(f"outputDir = {_output_dir}") main( input_dir=_input_dir, diff --git a/transforms/images/polus-rolling-ball-plugin/src/rolling_ball.py b/transforms/images/polus-rolling-ball-plugin/src/rolling_ball.py index e1789d38b..7bdd4c73b 100644 --- a/transforms/images/polus-rolling-ball-plugin/src/rolling_ball.py +++ b/transforms/images/polus-rolling-ball-plugin/src/rolling_ball.py @@ -9,7 +9,7 @@ def _rolling_ball(tile, ball_radius: int, light_background: bool): - """ Applies the rolling-ball algorithm to a single tile. + """Applies the rolling-ball algorithm to a single tile. Args: tile: A tile, usually from an ome.tif file. @@ -36,12 +36,11 @@ def _rolling_ball(tile, ball_radius: int, light_background: bool): # if the image had a light backend, invert the result. result = util.invert(tile) if light_background else tile - result = numpy.reshape(result, shape) - return result + return numpy.reshape(result, shape) def _bounds(x, x_max, ball_radius): - """ Calculates the indices for handling the edges of tiles. + """Calculates the indices for handling the edges of tiles. We pad each tile with 'ball_radius' pixels from the full image along the top, bottom, left, and right edges of each tile. @@ -56,12 +55,12 @@ def _bounds(x, x_max, ball_radius): def rolling_ball( - reader: BioReader, - writer: BioWriter, - ball_radius: int, - light_background: bool, + reader: BioReader, + writer: BioWriter, + ball_radius: int, + light_background: bool, ): - """ Applies the rolling-ball algorithm from skimage to perform background subtraction. + """Applies the rolling-ball algorithm from skimage to perform background subtraction. This function processes the image in tiles and, therefore, scales to images of any size. It writes the resulting image to the given BioWriter object. @@ -75,14 +74,18 @@ def rolling_ball( """ for z in range(reader.Z): - for y in range(0, reader.Y, TILE_SIZE): - y_max, pad_top, pad_bottom, tile_top, tile_bottom = _bounds(y, reader.Y, ball_radius) + y_max, pad_top, pad_bottom, tile_top, tile_bottom = _bounds( + y, reader.Y, ball_radius, + ) for x in range(0, reader.X, TILE_SIZE): - x_max, pad_left, pad_right, tile_left, tile_right = _bounds(x, reader.X, ball_radius) + x_max, pad_left, pad_right, tile_left, tile_right = _bounds( + x, reader.X, ball_radius, + ) - tile = reader[pad_top:pad_bottom, pad_left:pad_right, z:z + 1, 0, 0] + tile = reader[pad_top:pad_bottom, pad_left:pad_right, z : z + 1, 0, 0] result = _rolling_ball(tile, ball_radius, light_background) - writer[y:y_max, x:x_max, z:z + 1, 0, 0] = result[tile_top:tile_bottom, tile_left:tile_right] - return + writer[y:y_max, x:x_max, z : z + 1, 0, 0] = result[ + tile_top:tile_bottom, tile_left:tile_right, + ] diff --git a/transforms/images/polus-rolling-ball-plugin/tests/__init__.py b/transforms/images/polus-rolling-ball-plugin/tests/__init__.py index 5b5284915..9313f2b78 100644 --- a/transforms/images/polus-rolling-ball-plugin/tests/__init__.py +++ b/transforms/images/polus-rolling-ball-plugin/tests/__init__.py @@ -1,6 +1,7 @@ from unittest import TestSuite -from .version_test import VersionTest + from .correctness_test import CorrectnessTest +from .version_test import VersionTest test_cases = ( VersionTest, diff --git a/transforms/images/polus-rolling-ball-plugin/tests/correctness_test.py b/transforms/images/polus-rolling-ball-plugin/tests/correctness_test.py index 337151d3f..e35fc11a1 100644 --- a/transforms/images/polus-rolling-ball-plugin/tests/correctness_test.py +++ b/transforms/images/polus-rolling-ball-plugin/tests/correctness_test.py @@ -5,7 +5,6 @@ from bfio import BioReader from bfio import BioWriter from skimage import restoration - from src.rolling_ball import rolling_ball @@ -24,21 +23,19 @@ class CorrectnessTest(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - cls.infile = tempfile.NamedTemporaryFile(suffix='.ome.tif') - cls.outfile = tempfile.NamedTemporaryFile(suffix='.ome.tif') + cls.infile = tempfile.NamedTemporaryFile(suffix=".ome.tif") + cls.outfile = tempfile.NamedTemporaryFile(suffix=".ome.tif") with BioWriter(cls.infile.name) as writer: writer.X = cls.image_shape[0] writer.Y = cls.image_shape[1] writer[:] = cls.random_image[:] - return @classmethod def tearDownClass(cls) -> None: cls.infile.close() cls.outfile.close() - return def test_correctness(self): # calculate the result with the plugin code @@ -56,9 +53,10 @@ def test_correctness(self): plugin_result = reader[:] # calculate the true result - background = restoration.rolling_ball(self.random_image, radius=self.ball_radius) + background = restoration.rolling_ball( + self.random_image, radius=self.ball_radius, + ) true_result = self.random_image - background # assert correctness - self.assertTrue(numpy.all(numpy.equal(true_result, plugin_result)), f'The plugin resulted in a different image') - return + assert numpy.all(numpy.equal(true_result, plugin_result)), "The plugin resulted in a different image" diff --git a/transforms/images/polus-rolling-ball-plugin/tests/version_test.py b/transforms/images/polus-rolling-ball-plugin/tests/version_test.py index b742dbb65..71d6c7d6d 100644 --- a/transforms/images/polus-rolling-ball-plugin/tests/version_test.py +++ b/transforms/images/polus-rolling-ball-plugin/tests/version_test.py @@ -4,22 +4,20 @@ class VersionTest(unittest.TestCase): - version_path = Path(__file__).parent.parent.joinpath("VERSION") json_path = Path(__file__).parent.parent.joinpath("plugin.json") - + def test_plugin_manifest(self): # Get the plugin version - with open(self.version_path, 'r') as file: + with open(self.version_path) as file: version = file.readline() - + # Load the plugin manifest - with open(self.json_path, 'r') as file: + with open(self.json_path) as file: plugin_json = json.load(file) - - self.assertEqual(plugin_json['version'], version) - self.assertTrue(plugin_json['containerId'].endswith(version)) - return + + assert plugin_json["version"] == version + assert plugin_json["containerId"].endswith(version) if __name__ == "__main__": diff --git a/transforms/images/polus-stack-z-slice-plugin/Dockerfile b/transforms/images/polus-stack-z-slice-plugin/Dockerfile index b5b077a2e..d71b80a3c 100644 --- a/transforms/images/polus-stack-z-slice-plugin/Dockerfile +++ b/transforms/images/polus-stack-z-slice-plugin/Dockerfile @@ -2,7 +2,7 @@ FROM polusai/bfio:2.1.9 COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -19,4 +19,4 @@ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir WORKDIR ${EXEC_DIR} # Default command. Additional arguments are provided through the command line -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/transforms/images/polus-stack-z-slice-plugin/README.md b/transforms/images/polus-stack-z-slice-plugin/README.md index 8cde5e459..2a6f36407 100644 --- a/transforms/images/polus-stack-z-slice-plugin/README.md +++ b/transforms/images/polus-stack-z-slice-plugin/README.md @@ -18,7 +18,7 @@ To build the Docker image for the conversion plugin, run ## Input Filename Pattern -This plugin uses the +This plugin uses the [filepattern](https://github.com/LabShare/polus-plugins/tree/master/utils/polus-filepattern-util) utility to indicate which files to stack. In particular, defining a filename variable is surrounded by `{}`, and the variable name and number of spaces diff --git a/transforms/images/polus-stack-z-slice-plugin/VERSION b/transforms/images/polus-stack-z-slice-plugin/VERSION index b966e81a4..e8ea05db8 100644 --- a/transforms/images/polus-stack-z-slice-plugin/VERSION +++ b/transforms/images/polus-stack-z-slice-plugin/VERSION @@ -1 +1 @@ -1.2.4 \ No newline at end of file +1.2.4 diff --git a/transforms/images/polus-stack-z-slice-plugin/build-docker.sh b/transforms/images/polus-stack-z-slice-plugin/build-docker.sh index 4638bde0a..58f6abaa7 100755 --- a/transforms/images/polus-stack-z-slice-plugin/build-docker.sh +++ b/transforms/images/polus-stack-z-slice-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: - + + +def main(input_dir: pathlib.Path, file_pattern: str, output_dir: pathlib.Path) -> None: # create the filepattern object - fp = filepattern.FilePattern(input_dir,file_pattern) - - for files in fp(group_by='z'): + fp = filepattern.FilePattern(input_dir, file_pattern) + for files in fp(group_by="z"): output_name = fp.output_name(files) output_file = output_dir.joinpath(output_name) - ProcessManager.submit_process(_merge_layers,files,output_file) - + ProcessManager.submit_process(_merge_layers, files, output_file) + ProcessManager.join_processes() + if __name__ == "__main__": # Initialize the main thread logger - logger = logging.getLogger('main') + logger = logging.getLogger("main") logger.setLevel(logging.INFO) # Setup the Argument parsing - logger.info('Parsing arguments...') - parser = argparse.ArgumentParser(prog='main', description='Compile individual tiled tiff images into a single volumetric tiled tiff.') - - parser.add_argument('--inpDir', dest='input_dir', type=str, - help='Path to folder with tiled tiff files', required=True) - parser.add_argument('--outDir', dest='output_dir', type=str, - help='The output directory for ome.tif files', required=True) - parser.add_argument('--filePattern', dest='file_pattern', type=str, - help='A filename pattern specifying variables in filenames.', required=True) + logger.info("Parsing arguments...") + parser = argparse.ArgumentParser( + prog="main", + description="Compile individual tiled tiff images into a single volumetric tiled tiff.", + ) + + parser.add_argument( + "--inpDir", + dest="input_dir", + type=str, + help="Path to folder with tiled tiff files", + required=True, + ) + parser.add_argument( + "--outDir", + dest="output_dir", + type=str, + help="The output directory for ome.tif files", + required=True, + ) + parser.add_argument( + "--filePattern", + dest="file_pattern", + type=str, + help="A filename pattern specifying variables in filenames.", + required=True, + ) args = parser.parse_args() input_dir = pathlib.Path(args.input_dir) @@ -126,13 +153,11 @@ def main(input_dir: pathlib.Path, input_dir = input_dir.joinpath("images") output_dir = pathlib.Path(args.output_dir) file_pattern = args.file_pattern - logger.info(f'input_dir = {input_dir}') - logger.info(f'output_dir = {output_dir}') - logger.info(f'file_pattern = {file_pattern}') - logger.info(f'max_threads: {ProcessManager.num_processes()}') - - ProcessManager.init_processes('main','stack') - - main(input_dir, - file_pattern, - output_dir) \ No newline at end of file + logger.info(f"input_dir = {input_dir}") + logger.info(f"output_dir = {output_dir}") + logger.info(f"file_pattern = {file_pattern}") + logger.info(f"max_threads: {ProcessManager.num_processes()}") + + ProcessManager.init_processes("main", "stack") + + main(input_dir, file_pattern, output_dir) diff --git a/transforms/images/remove-border-objects-plugin/Dockerfile b/transforms/images/remove-border-objects-plugin/Dockerfile index 477454ea8..790e4dd42 100644 --- a/transforms/images/remove-border-objects-plugin/Dockerfile +++ b/transforms/images/remove-border-objects-plugin/Dockerfile @@ -5,4 +5,4 @@ COPY VERSION ${EXEC_DIR} COPY src ${EXEC_DIR}/ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir && \ pip3 install "bfio[all]" -ENTRYPOINT ["python3", "main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "main.py"] diff --git a/transforms/images/remove-border-objects-plugin/README.md b/transforms/images/remove-border-objects-plugin/README.md index 97e4d88fe..2d5dd9e43 100644 --- a/transforms/images/remove-border-objects-plugin/README.md +++ b/transforms/images/remove-border-objects-plugin/README.md @@ -16,7 +16,7 @@ At the moment this plugin supports label images with two dimensions only. We wil -**a -** Original image contains 67 unique label objects +**a -** Original image contains 67 unique label objects **b -** Image with 16 detected border objects **c -** Removing Border objects and sequential relabelling @@ -41,7 +41,3 @@ This plugin takes two input arguments and | `--inpDir` | Input image directory | Input | collection | | `--pattern` | Filepattern to parse image files | Input | string | | `--outDir` | Output collection | Output | collection | - - - - diff --git a/transforms/images/remove-border-objects-plugin/VERSION b/transforms/images/remove-border-objects-plugin/VERSION index 6da28dde7..17e51c385 100644 --- a/transforms/images/remove-border-objects-plugin/VERSION +++ b/transforms/images/remove-border-objects-plugin/VERSION @@ -1 +1 @@ -0.1.1 \ No newline at end of file +0.1.1 diff --git a/transforms/images/remove-border-objects-plugin/build-docker.sh b/transforms/images/remove-border-objects-plugin/build-docker.sh index 00f49fd0a..70ca7937e 100755 --- a/transforms/images/remove-border-objects-plugin/build-docker.sh +++ b/transforms/images/remove-border-objects-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: self.inpDir = inpDir - self.outDir= outDir + self.outDir = outDir self.filename = filename self.imagepath = os.path.join(self.inpDir, self.filename) self.br_image = BioReader(self.imagepath) self.label_img = self.br_image.read().squeeze() def discard_borderobjects(self): - """ This functions identifies which label pixels touches image borders and - setting the values of those label pixels to background pixels values which is 0 + """This functions identifies which label pixels touches image borders and + setting the values of those label pixels to background pixels values which is 0. """ borderobj = list(self.label_img[0, :]) borderobj.extend(self.label_img[:, 0]) - borderobj.extend(self.label_img[- 1, :]) - borderobj.extend(self.label_img[:, - 1]) + borderobj.extend(self.label_img[-1, :]) + borderobj.extend(self.label_img[:, -1]) borderobj = np.unique(borderobj).tolist() for obj in borderobj: @@ -40,21 +43,19 @@ def discard_borderobjects(self): return self.label_img def relabel_sequential(self): - """ Sequential relabelling of objects in a label image - """ - relabel_img, _, inverse_map = relabel_sequential(self.label_img) + """Sequential relabelling of objects in a label image.""" + relabel_img, _, inverse_map = relabel_sequential(self.label_img) return relabel_img, inverse_map - def save_relabel_image(self, x): - """ Writing images with relabelled and cleared border touching objects - """ - with BioWriter(file_path = Path(self.outDir, self.filename), - backend='python', - metadata = self.br_image.metadata, - X=self.label_img.shape[0], - Y=self.label_img.shape[0], - dtype=self.label_img.dtype) as bw: + """Writing images with relabelled and cleared border touching objects.""" + with BioWriter( + file_path=Path(self.outDir, self.filename), + backend="python", + metadata=self.br_image.metadata, + X=self.label_img.shape[0], + Y=self.label_img.shape[0], + dtype=self.label_img.dtype, + ) as bw: bw[:] = x - bw.close() - return \ No newline at end of file + bw.close() diff --git a/transforms/images/remove-border-objects-plugin/src/main.py b/transforms/images/remove-border-objects-plugin/src/main.py index e713ade19..d7cafc160 100644 --- a/transforms/images/remove-border-objects-plugin/src/main.py +++ b/transforms/images/remove-border-objects-plugin/src/main.py @@ -1,89 +1,92 @@ -import argparse, logging, os, time, filepattern +import argparse +import logging +import os +import time from pathlib import Path -from functions import * +import filepattern +from functions import * -#Import environment variables -POLUS_LOG = getattr(logging,os.environ.get('POLUS_LOG','INFO')) -POLUS_EXT = os.environ.get('POLUS_EXT','.ome.tif') +# Import environment variables +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) +POLUS_EXT = os.environ.get("POLUS_EXT", ".ome.tif") # Initialize the logger -logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) logger = logging.getLogger("main") logger.setLevel(POLUS_LOG) -def main(inpDir:Path, - pattern:str, - outDir:Path, - ): - starttime= time.time() - if pattern is None: - logger.info( - "No filepattern was provided so filepattern uses all input files" - ) +def main( + inpDir: Path, + pattern: str, + outDir: Path, +): + starttime = time.time() + if pattern is None: + logger.info("No filepattern was provided so filepattern uses all input files") - assert inpDir.exists(), logger.info("Input directory does not exist") - count=0 - fp = filepattern.FilePattern(inpDir,pattern) - imagelist = len([f for f in fp]) + assert inpDir.exists(), logger.info("Input directory does not exist") + count = 0 + fp = filepattern.FilePattern(inpDir, pattern) + imagelist = len(list(fp)) - for f in fp(): - count += 1 - file = f[0]['file'].name - logger.info(f'Label image: {file}') - db = Discard_borderobjects(inpDir, outDir, file) - db.discard_borderobjects() - relabel_img, _ = db.relabel_sequential() - db.save_relabel_image(relabel_img) - logger.info(f'Saving {count}/{imagelist} Relabelled image with discarded objects: {file}') - logger.info('Finished all processes') - endtime = (time.time() - starttime)/60 - logger.info(f'Total time taken to process all images: {endtime}') + for f in fp(): + count += 1 + file = f[0]["file"].name + logger.info(f"Label image: {file}") + db = Discard_borderobjects(inpDir, outDir, file) + db.discard_borderobjects() + relabel_img, _ = db.relabel_sequential() + db.save_relabel_image(relabel_img) + logger.info( + f"Saving {count}/{imagelist} Relabelled image with discarded objects: {file}", + ) + logger.info("Finished all processes") + endtime = (time.time() - starttime) / 60 + logger.info(f"Total time taken to process all images: {endtime}") # ''' Argument parsing ''' logger.info("Parsing arguments...") -parser = argparse.ArgumentParser(prog='main', description='Discard Border Objects Plugin') +parser = argparse.ArgumentParser( + prog="main", description="Discard Border Objects Plugin", +) # # Input arguments parser.add_argument( - "--inpDir", - dest="inpDir", - type=str, - help="Input image collection to be processed by this plugin", - required=True - ) + "--inpDir", + dest="inpDir", + type=str, + help="Input image collection to be processed by this plugin", + required=True, +) parser.add_argument( - "--pattern", - dest="pattern", - type=str, - default=".+", - help="Filepattern regex used to parse image files", - required=False - ) -# # Output arguments -parser.add_argument('--outDir', - dest='outDir', + "--pattern", + dest="pattern", type=str, - help='Output directory', - required=True - ) + default=".+", + help="Filepattern regex used to parse image files", + required=False, +) +# # Output arguments +parser.add_argument( + "--outDir", dest="outDir", type=str, help="Output directory", required=True, +) # # Parse the arguments args = parser.parse_args() inpDir = Path(args.inpDir) -if (inpDir.joinpath('images').is_dir()): - inputDir = inpDir.joinpath('images').absolute() -logger.info('inpDir = {}'.format(inpDir)) +if inpDir.joinpath("images").is_dir(): + inputDir = inpDir.joinpath("images").absolute() +logger.info(f"inpDir = {inpDir}") pattern = args.pattern -logger.info("pattern = {}".format(pattern)) +logger.info(f"pattern = {pattern}") outDir = Path(args.outDir) -logger.info('outDir = {}'.format(outDir)) +logger.info(f"outDir = {outDir}") -if __name__=="__main__": - main(inpDir=inpDir, - pattern=pattern, - outDir=outDir - ) \ No newline at end of file +if __name__ == "__main__": + main(inpDir=inpDir, pattern=pattern, outDir=outDir) diff --git a/transforms/images/remove-border-objects-plugin/src/requirements.txt b/transforms/images/remove-border-objects-plugin/src/requirements.txt index 41e2d5ea5..280ee3cfe 100644 --- a/transforms/images/remove-border-objects-plugin/src/requirements.txt +++ b/transforms/images/remove-border-objects-plugin/src/requirements.txt @@ -1,2 +1,2 @@ filepattern==1.4.7 -scikit-image>=0.17.2 \ No newline at end of file +scikit-image>=0.17.2 diff --git a/transforms/images/remove-border-objects-plugin/tests/test_main.py b/transforms/images/remove-border-objects-plugin/tests/test_main.py index 7d844c963..e98b12ba8 100644 --- a/transforms/images/remove-border-objects-plugin/tests/test_main.py +++ b/transforms/images/remove-border-objects-plugin/tests/test_main.py @@ -1,48 +1,48 @@ - from pathlib import Path import numpy as np import os, sys, unittest from bfio import BioReader + dirpath = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(os.path.join(dirpath, '../')) -from src.functions import Discard_borderobjects +sys.path.append(os.path.join(dirpath, "../")) +from src.functions import Discard_borderobjects -inpDir = Path(dirpath).parent.joinpath('images') -outDir = Path(dirpath).parent.joinpath('out') +inpDir = Path(dirpath).parent.joinpath("images") +outDir = Path(dirpath).parent.joinpath("out") -class Test_Discard_borderobjects(unittest.TestCase): +class Test_Discard_borderobjects(unittest.TestCase): def setUp(self) -> None: - self.inpDir = inpDir - self.outDir=outDir + self.outDir = outDir self.flist = os.listdir(self.inpDir) def test_discard_borderobjects(self): - for f in self.flist: - if f.endswith('.ome.tif'): - br = BioReader(Path(self.inpDir, f)) - image = br.read().squeeze() - dc = Discard_borderobjects(self.inpDir, self.outDir, f) - dc_image = dc.discard_borderobjects() - self.assertTrue(np.unique(image) != np.unique(dc_image)) - self.assertFalse(len(np.unique(image)) < len(np.unique(dc_image))) + for f in self.flist: + if f.endswith(".ome.tif"): + br = BioReader(Path(self.inpDir, f)) + image = br.read().squeeze() + dc = Discard_borderobjects(self.inpDir, self.outDir, f) + dc_image = dc.discard_borderobjects() + self.assertTrue(np.unique(image) != np.unique(dc_image)) + self.assertFalse(len(np.unique(image)) < len(np.unique(dc_image))) - def boundary_labels(x:np.ndarray): - borderobj = list(x[0, :]) - borderobj.extend(x[:, 0]) - borderobj.extend(x[x.shape[0] - 1, :]) - borderobj.extend(x[:, x.shape[1] - 1]) - borderobj = np.unique(borderobj) - return borderobj - boundary_obj = boundary_labels(image) - dc_labels = np.unique(dc_image)[1:] - self.assertTrue(np.isin(dc_labels, boundary_obj)[0] ==False) + def boundary_labels(x: np.ndarray): + borderobj = list(x[0, :]) + borderobj.extend(x[:, 0]) + borderobj.extend(x[x.shape[0] - 1, :]) + borderobj.extend(x[:, x.shape[1] - 1]) + borderobj = np.unique(borderobj) + return borderobj + + boundary_obj = boundary_labels(image) + dc_labels = np.unique(dc_image)[1:] + self.assertTrue(np.isin(dc_labels, boundary_obj)[0] == False) def test_relabel_sequential(self): for f in self.flist: - if f.endswith('.ome.tif'): + if f.endswith(".ome.tif"): br = BioReader(Path(self.inpDir, f)) image = br.read().squeeze() dc = Discard_borderobjects(self.inpDir, self.outDir, f) @@ -53,17 +53,18 @@ def test_relabel_sequential(self): def test_save_relabel_image(self): for f in self.flist: - if f.endswith('.ome.tif'): + if f.endswith(".ome.tif"): br = BioReader(Path(self.inpDir, f)) image = br.read().squeeze() dc = Discard_borderobjects(self.inpDir, self.outDir, f) dc_image = dc.discard_borderobjects() relabel_img, _ = dc.relabel_sequential() dc.save_relabel_image(relabel_img) - imagelist = [f for f in os.listdir(self.inpDir) if f.endswith('.ome.tif')] - relabel_list = [f for f in os.listdir(self.outDir) if f.endswith('.ome.tif')] + imagelist = [f for f in os.listdir(self.inpDir) if f.endswith(".ome.tif")] + relabel_list = [f for f in os.listdir(self.outDir) if f.endswith(".ome.tif")] self.assertTrue(len(imagelist) == len(relabel_list)) self.assertFalse(len(relabel_list) == 0) - -if __name__=="__main__": + + +if __name__ == "__main__": unittest.main() diff --git a/transforms/images/remove-border-objects-plugin/tests/version_test.py b/transforms/images/remove-border-objects-plugin/tests/version_test.py index c9d2c1c91..bcdc415de 100644 --- a/transforms/images/remove-border-objects-plugin/tests/version_test.py +++ b/transforms/images/remove-border-objects-plugin/tests/version_test.py @@ -1,43 +1,44 @@ -import unittest, json +import json +import unittest from pathlib import Path -import urllib.request as request +from urllib import request + class VersionTest(unittest.TestCase): - """ Verify VERSION is correct """ - + """Verify VERSION is correct.""" + version_path = Path(__file__).parent.parent.joinpath("VERSION") json_path = Path(__file__).parent.parent.joinpath("plugin.json") - url = 'https://hub.docker.com/repository/docker/polusai/discard-border-objects-plugin/tags?page=1&ordering=last_updated' - + url = "https://hub.docker.com/repository/docker/polusai/discard-border-objects-plugin/tags?page=1&ordering=last_updated" + def test_plugin_manifest(self): - """ Tests VERSION matches the version in the plugin manifest """ - + """Tests VERSION matches the version in the plugin manifest.""" # Get the plugin version - with open(self.version_path,'r') as file: + with open(self.version_path) as file: version = file.readline() - + # Load the plugin manifest - with open(self.json_path,'r') as file: + with open(self.json_path) as file: plugin_json = json.load(file) - - self.assertEqual(plugin_json['version'],version) - self.assertTrue(plugin_json['containerId'].endswith(version)) + + assert plugin_json["version"] == version + assert plugin_json["containerId"].endswith(version) def test_docker_hub(self): - """ Tests VERSION matches the latest docker container tag """ - + """Tests VERSION matches the latest docker container tag.""" # Get the plugin version - with open(self.version_path,'r') as file: + with open(self.version_path) as file: version = file.readline() - + response = json.load(request.urlopen(self.url)) - if len(response['results']) == 0: - self.fail('Could not find repository or no containers are in the repository.') - latest_tag = json.load(response)['results'][0]['name'] - - self.assertEqual(latest_tag,version) - -if __name__=="__main__": - + if len(response["results"]) == 0: + self.fail( + "Could not find repository or no containers are in the repository.", + ) + latest_tag = json.load(response)["results"][0]["name"] + + assert latest_tag == version + + +if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/transforms/polus-recycle-vector-plugin/Dockerfile b/transforms/polus-recycle-vector-plugin/Dockerfile index d3be2c268..ef5f38bc2 100644 --- a/transforms/polus-recycle-vector-plugin/Dockerfile +++ b/transforms/polus-recycle-vector-plugin/Dockerfile @@ -15,10 +15,10 @@ RUN mkdir -p ${EXEC_DIR} \ #Copy executable COPY src ${EXEC_DIR}/ - + RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir WORKDIR ${EXEC_DIR} # Default command. Additional arguments are provided through the command line -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/transforms/polus-recycle-vector-plugin/README.md b/transforms/polus-recycle-vector-plugin/README.md index a4f053d4f..3b946c73f 100644 --- a/transforms/polus-recycle-vector-plugin/README.md +++ b/transforms/polus-recycle-vector-plugin/README.md @@ -11,7 +11,7 @@ multi-channel image and apply the same stitching vector to the other channels. This plugin creates a new stitching vector that will apply an existing single stitching vector to a different set of images. -This plugin uses +This plugin uses [filepatterns](https://filepattern.readthedocs.io/en/latest/), which is a variant of regular expressions similar to what [MIST](https://github.com/usnistgov/MIST) @@ -25,7 +25,7 @@ variable name and number of spaces dedicated to the variable are denoted by repeated characters for the variable. For example, if all filenames follow the structure `filename_TTT.ome.tif`, where TTT indicates the timepoint the image was captured at, then the filename pattern would be `filename_{ttt}.ome.tif` or -`filename_{t+}.ome.tif`. For more information on `filepattern`, see the +`filename_{t+}.ome.tif`. For more information on `filepattern`, see the [documentation](https://filepattern.readthedocs.io/en/latest/). ## Build the plugin @@ -48,4 +48,3 @@ This plugin takes 3 input arguments and 1 output argument: | `--collectionDir` | Image collection | Input | collection | | `--filepattern` | `filepattern` for filenames in image collection | Input | String | | `--outDir` | Output stitching vector | Output | stitchingVector | - diff --git a/transforms/polus-recycle-vector-plugin/VERSION b/transforms/polus-recycle-vector-plugin/VERSION index 3e1ad720b..bc80560fa 100644 --- a/transforms/polus-recycle-vector-plugin/VERSION +++ b/transforms/polus-recycle-vector-plugin/VERSION @@ -1 +1 @@ -1.5.0 \ No newline at end of file +1.5.0 diff --git a/transforms/polus-recycle-vector-plugin/build-docker.sh b/transforms/polus-recycle-vector-plugin/build-docker.sh index 893aa978c..6fc711918 100755 --- a/transforms/polus-recycle-vector-plugin/build-docker.sh +++ b/transforms/polus-recycle-vector-plugin/build-docker.sh @@ -1,3 +1,3 @@ #!/bin/bash version=$( Tuple[str, int]: - +) -> tuple[str, int]: """This function produces the best combination of variables for a given chunksize Args: inpDir (Path): Path to Image files @@ -19,9 +23,8 @@ def get_grouping( groupBy (str, optional): Specify variable to group image filenames chunk_size (str, optional): Number of images to generate collective filepattern Returns: - variables for grouping image filenames, count + variables for grouping image filenames, count. """ - fp = filepattern.FilePattern(inpDir, pattern) # Get the number of unique values for each variable @@ -68,13 +71,13 @@ def get_grouping( return best_group, best_count -def save_generator_outputs(x: Dict[str, int], outDir: Path): +def save_generator_outputs(x: dict[str, int], outDir: Path): """Convert dictionary of filepatterns and number of image files which can be parsed with each filepattern to json file Args: x (Dict): A dictionary of filepatterns and number of image files which can be parsed with each filepattern outDir (Path): Path to save the outputs Returns: - json file with array of file patterns + json file with array of file patterns. """ data = json.loads('{"filePatterns": []}') with open(os.path.join(outDir, "file_patterns.json"), "w") as cwlout: @@ -82,7 +85,6 @@ def save_generator_outputs(x: Dict[str, int], outDir: Path): data["filePatterns"].append(key) json.dump(data, cwlout) - return def main( @@ -92,7 +94,6 @@ def main( groupBy: str, outDir: Path, ): - starttime = time.time() # If the pattern isn't given, try to infer one @@ -102,7 +103,7 @@ def main( except ValueError: logger.error( "Could not infer a filepattern from the input files, " - + "and no filepattern was provided." + + "and no filepattern was provided.", ) raise @@ -119,7 +120,7 @@ def main( fp_temp = filepattern.FilePattern(inpDir, fps[-1]) counts.append(sum(len(f) for f in fp_temp)) - assert sum(counts) == len([f for f in fp]) + assert sum(counts) == len(list(fp)) save_generator_outputs(dict(zip(fps, counts)), outDir) @@ -128,7 +129,6 @@ def main( if __name__ == "__main__": - # Import environment variables POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) @@ -143,7 +143,7 @@ def main( # Argument parsing logger.info("Parsing arguments...") parser = argparse.ArgumentParser( - prog="main", description="Filepattern generator Plugin" + prog="main", description="Filepattern generator Plugin", ) # Input arguments parser.add_argument( @@ -176,7 +176,7 @@ def main( required=False, ) parser.add_argument( - "--outDir", dest="outDir", type=str, help="Output collection", required=True + "--outDir", dest="outDir", type=str, help="Output collection", required=True, ) # Parse the arguments @@ -185,15 +185,15 @@ def main( if inpDir.joinpath("images").is_dir(): inpDir = inpDir.joinpath("images").absolute() - logger.info("inputDir = {}".format(inpDir)) + logger.info(f"inputDir = {inpDir}") outDir = Path(args.outDir) - logger.info("outDir = {}".format(outDir)) + logger.info(f"outDir = {outDir}") pattern = args.pattern - logger.info("pattern = {}".format(pattern)) + logger.info(f"pattern = {pattern}") chunkSize = args.chunkSize - logger.info("chunkSize = {}".format(chunkSize)) + logger.info(f"chunkSize = {chunkSize}") groupBy = args.groupBy - logger.info("groupBy = {}".format(groupBy)) + logger.info(f"groupBy = {groupBy}") main( inpDir=inpDir, diff --git a/utils/filepattern-generator-plugin/tests/test_main.py b/utils/filepattern-generator-plugin/tests/test_main.py index caeae1f0d..04f56fbcb 100644 --- a/utils/filepattern-generator-plugin/tests/test_main.py +++ b/utils/filepattern-generator-plugin/tests/test_main.py @@ -21,7 +21,6 @@ class Test_Filepattern_Generator(unittest.TestCase): def setUp(self) -> None: - self.inpDir = inpDir self.pattern = pattern self.chunkSize = chunkSize diff --git a/utils/filepattern-generator-plugin/tests/version_test.py b/utils/filepattern-generator-plugin/tests/version_test.py index 25f0ea2e3..d1efb2554 100644 --- a/utils/filepattern-generator-plugin/tests/version_test.py +++ b/utils/filepattern-generator-plugin/tests/version_test.py @@ -1,46 +1,44 @@ -import unittest, json +import json +import unittest from pathlib import Path -import urllib.request as request +from urllib import request class VersionTest(unittest.TestCase): - """Verify VERSION is correct""" + """Verify VERSION is correct.""" version_path = Path(__file__).parent.parent.joinpath("VERSION") json_path = Path(__file__).parent.parent.joinpath("plugin.json") url = "https://hub.docker.com/repository/docker/polusai/filepattern-generator-plugin/tags?page=1&ordering=last_updated" def test_plugin_manifest(self): - """Tests VERSION matches the version in the plugin manifest""" - + """Tests VERSION matches the version in the plugin manifest.""" # Get the plugin version - with open(self.version_path, "r") as file: + with open(self.version_path) as file: version = file.readline() # Load the plugin manifest - with open(self.json_path, "r") as file: + with open(self.json_path) as file: plugin_json = json.load(file) - self.assertEqual(plugin_json["version"], version) - self.assertTrue(plugin_json["containerId"].endswith(version)) + assert plugin_json["version"] == version + assert plugin_json["containerId"].endswith(version) def test_docker_hub(self): - """Tests VERSION matches the latest docker container tag""" - + """Tests VERSION matches the latest docker container tag.""" # Get the plugin version - with open(self.version_path, "r") as file: + with open(self.version_path) as file: version = file.readline() response = json.load(request.urlopen(self.url)) if len(response["results"]) == 0: self.fail( - "Could not find repository or no containers are in the repository." + "Could not find repository or no containers are in the repository.", ) latest_tag = json.load(response)["results"][0]["name"] - self.assertEqual(latest_tag, version) + assert latest_tag == version if __name__ == "__main__": - unittest.main() diff --git a/utils/midrc-download-tool/src/polus/images/utils/midrc_download/__init__.py b/utils/midrc-download-tool/src/polus/images/utils/midrc_download/__init__.py index 3ea450b4a..8bdfc0eca 100644 --- a/utils/midrc-download-tool/src/polus/images/utils/midrc_download/__init__.py +++ b/utils/midrc-download-tool/src/polus/images/utils/midrc_download/__init__.py @@ -1,5 +1,3 @@ """midrc_download.""" __version__ = "0.1.0" - - diff --git a/utils/polus-generic-to-image-collection-plugin/Dockerfile b/utils/polus-generic-to-image-collection-plugin/Dockerfile index 5a64d22d4..bae699ff7 100644 --- a/utils/polus-generic-to-image-collection-plugin/Dockerfile +++ b/utils/polus-generic-to-image-collection-plugin/Dockerfile @@ -11,4 +11,4 @@ COPY src ${EXEC_DIR}/ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/utils/polus-generic-to-image-collection-plugin/VERSION b/utils/polus-generic-to-image-collection-plugin/VERSION index 6da28dde7..17e51c385 100644 --- a/utils/polus-generic-to-image-collection-plugin/VERSION +++ b/utils/polus-generic-to-image-collection-plugin/VERSION @@ -1 +1 @@ -0.1.1 \ No newline at end of file +0.1.1 diff --git a/utils/polus-generic-to-image-collection-plugin/build-docker.sh b/utils/polus-generic-to-image-collection-plugin/build-docker.sh index 927d8b280..df84b574d 100755 --- a/utils/polus-generic-to-image-collection-plugin/build-docker.sh +++ b/utils/polus-generic-to-image-collection-plugin/build-docker.sh @@ -1,2 +1,2 @@ version=$( None: - + +def validate_and_copy( + file: Path, + outDir: Path, +) -> None: # Enter context manager to verify the file is a tiled tiff - with BioReader(file['file'],backend='python') as br: - - shutil.copy2(file['file'],outDir.joinpath(file['file'].name)) - -def main(inpDir: Path, - outDir: Path, - ) -> None: - + with BioReader(file["file"], backend="python"): + shutil.copy2(file["file"], outDir.joinpath(file["file"].name)) + + +def main( + inpDir: Path, + outDir: Path, +) -> None: pattern = ".*.ome.tif" - fp = filepattern.FilePattern(inpDir,pattern) - + fp = filepattern.FilePattern(inpDir, pattern) + files = [f[0] for f in fp] - + threads = [] - + with ThreadPoolExecutor(cpu_count()) as executor: - - for ind,file in enumerate(files): - - threads.append(executor.submit(validate_and_copy,file,outDir)) - - done, not_done = wait(threads,timeout=0) - - logger.info('Copy progress: {:6.2f}%'.format(100*len(done)/len(threads))) - + for ind, file in enumerate(files): + threads.append(executor.submit(validate_and_copy, file, outDir)) + + done, not_done = wait(threads, timeout=0) + + logger.info("Copy progress: {:6.2f}%".format(100 * len(done) / len(threads))) + while len(not_done) > 0: - - done, not_done = wait(threads,timeout=5) - - logger.info('Copy progress: {:6.2f}%'.format(100*len(done)/len(threads))) + done, not_done = wait(threads, timeout=5) -if __name__=="__main__": + logger.info( + "Copy progress: {:6.2f}%".format(100 * len(done) / len(threads)), + ) - ''' Argument parsing ''' + +if __name__ == "__main__": + """Argument parsing""" logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='Copies .ome.tif files with proper tile format from a generic data type to an image collection.') - + parser = argparse.ArgumentParser( + prog="main", + description="Copies .ome.tif files with proper tile format from a generic data type to an image collection.", + ) + # Input arguments - parser.add_argument('--inpDir', dest='inpDir', type=str, - help='Input image collection to be processed by this plugin', required=True) + parser.add_argument( + "--inpDir", + dest="inpDir", + type=str, + help="Input image collection to be processed by this plugin", + required=True, + ) # Output arguments - parser.add_argument('--outDir', dest='outDir', type=str, - help='Output collection', required=True) - + parser.add_argument( + "--outDir", dest="outDir", type=str, help="Output collection", required=True, + ) + # Parse the arguments args = parser.parse_args() inpDir = args.inpDir fpath = Path(args.inpDir) - if (fpath.joinpath('images').is_dir()): + if fpath.joinpath("images").is_dir(): # switch to images folder if present - fpath = str(Path(args.inpDir).joinpath('images').absolute()) - logger.info('inpDir = {}'.format(inpDir)) + fpath = str(Path(args.inpDir).joinpath("images").absolute()) + logger.info(f"inpDir = {inpDir}") outDir = Path(args.outDir) - logger.info('outDir = {}'.format(outDir)) - - main(inpDir=inpDir, - outDir=outDir) \ No newline at end of file + logger.info(f"outDir = {outDir}") + + main(inpDir=inpDir, outDir=outDir) diff --git a/utils/polus-generic-to-image-collection-plugin/src/requirements.txt b/utils/polus-generic-to-image-collection-plugin/src/requirements.txt index 6dd96c62d..aae7cb2da 100644 --- a/utils/polus-generic-to-image-collection-plugin/src/requirements.txt +++ b/utils/polus-generic-to-image-collection-plugin/src/requirements.txt @@ -1 +1 @@ -filepattern==1.4.7 \ No newline at end of file +filepattern==1.4.7 diff --git a/utils/polus-generic-to-image-collection-plugin/tests/__init__.py b/utils/polus-generic-to-image-collection-plugin/tests/__init__.py index ce1651fca..e33d76590 100644 --- a/utils/polus-generic-to-image-collection-plugin/tests/__init__.py +++ b/utils/polus-generic-to-image-collection-plugin/tests/__init__.py @@ -1,11 +1,13 @@ from unittest import TestSuite + from .version_test import VersionTest test_cases = (VersionTest,) + def load_tests(loader, tests, pattern): suite = TestSuite() for test_class in test_cases: tests = loader.loadTestsFromTestCase(test_class) suite.addTests(tests) - return suite \ No newline at end of file + return suite diff --git a/utils/polus-generic-to-image-collection-plugin/tests/version_test.py b/utils/polus-generic-to-image-collection-plugin/tests/version_test.py index 43a019625..71d6c7d6d 100644 --- a/utils/polus-generic-to-image-collection-plugin/tests/version_test.py +++ b/utils/polus-generic-to-image-collection-plugin/tests/version_test.py @@ -1,24 +1,24 @@ -import unittest, json +import json +import unittest from pathlib import Path + class VersionTest(unittest.TestCase): - version_path = Path(__file__).parent.parent.joinpath("VERSION") json_path = Path(__file__).parent.parent.joinpath("plugin.json") - + def test_plugin_manifest(self): - # Get the plugin version - with open(self.version_path,'r') as file: + with open(self.version_path) as file: version = file.readline() - + # Load the plugin manifest - with open(self.json_path,'r') as file: + with open(self.json_path) as file: plugin_json = json.load(file) - - self.assertEqual(plugin_json['version'],version) - self.assertTrue(plugin_json['containerId'].endswith(version)) - -if __name__=="__main__": - - unittest.main() \ No newline at end of file + + assert plugin_json["version"] == version + assert plugin_json["containerId"].endswith(version) + + +if __name__ == "__main__": + unittest.main() diff --git a/utils/polus-imagej-macro-plugin/Dockerfile b/utils/polus-imagej-macro-plugin/Dockerfile index 16b6812a5..9d74dc0e5 100644 --- a/utils/polus-imagej-macro-plugin/Dockerfile +++ b/utils/polus-imagej-macro-plugin/Dockerfile @@ -18,4 +18,4 @@ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir RUN mkdir /.jgo && chmod 777 /.jgo # Default command. Additional arguments are provided through the command line -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/utils/polus-imagej-macro-plugin/README.md b/utils/polus-imagej-macro-plugin/README.md index 11a10b31f..6bd21e119 100644 --- a/utils/polus-imagej-macro-plugin/README.md +++ b/utils/polus-imagej-macro-plugin/README.md @@ -1,15 +1,15 @@ # ImageJ Macro Plugin The plugin implements the ImageJ macro. Any macro can be run on an image as long -as the macro is performed on some input image. Note that running ImageJ macros -in headless mode is not yet fully supported by -[pyimagej](https://github.com/imagej/pyimagej); therefore, there are several +as the macro is performed on some input image. Note that running ImageJ macros +in headless mode is not yet fully supported by +[pyimagej](https://github.com/imagej/pyimagej); therefore, there are several items which are important to note when scripting macros. 1. All macros must have this line at the top `setBatchMode(true);`. If this is not present the plugin will fail to find the input image. -2. After the macro has been run on the input image it will retrieve the image +2. After the macro has been run on the input image it will retrieve the image with the same title as the input image + '-output'. An example of how to achieve this in the macro script is below. @@ -23,12 +23,12 @@ run("Gaussian Blur...", "sigma=10"); rename(original + "-output"); ``` -3. To ensure the macro was performed on the correct image the output image must +3. To ensure the macro was performed on the correct image the output image must be a different version of the original input image or the plugin will fail. The optional `--maxIterations` argument (defaults to 10) can be used to specify how many times a macro should be attempted before terminating the plugin. -For more information on what this plugin does, contact the author, Benjamin +For more information on what this plugin does, contact the author, Benjamin Houghton (benjamin.houghton@axleinfo.com). For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). diff --git a/utils/polus-imagej-macro-plugin/VERSION b/utils/polus-imagej-macro-plugin/VERSION index 7693c96bf..b1e80bb24 100644 --- a/utils/polus-imagej-macro-plugin/VERSION +++ b/utils/polus-imagej-macro-plugin/VERSION @@ -1 +1 @@ -0.1.3 \ No newline at end of file +0.1.3 diff --git a/utils/polus-imagej-macro-plugin/build-docker.sh b/utils/polus-imagej-macro-plugin/build-docker.sh index bdddc8bd8..db530d7c1 100644 --- a/utils/polus-imagej-macro-plugin/build-docker.sh +++ b/utils/polus-imagej-macro-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: - """ - Attempts to close an open image window via the macro command. - """ - + +def close_image(image_title: str, ij: object) -> None: + """Attempts to close an open image window via the macro command.""" available_images = list(ij.WindowManager.getImageTitles()) - + if image_title in available_images: - - logger.debug('Closing {}'.format(image_title)) - logger.debug('Current availalbe images:') + logger.debug(f"Closing {image_title}") + logger.debug("Current availalbe images:") logger.debug(available_images) - + # Attempt to close the image via macro command - ij.py.run_macro(jpype.JString('close("{}");'.format(image_title))) - + ij.py.run_macro(jpype.JString(f'close("{image_title}");')) + else: - logger.debug('Cannot close {} not in available images'.format(image_title)) - + logger.debug(f"Cannot close {image_title} not in available images") + + # Define method to repeatedly run macro def run_macro( - numpy_input: np.array, - image_title: str, - script: str, - ij: object, - maxIterations: int - ) -> np.array: - """ - Runs macro on input image until the output received is modified version of + numpy_input: np.array, image_title: str, script: str, ij: object, maxIterations: int, +) -> np.array: + """Runs macro on input image until the output received is modified version of the input or max iterations have been reached. """ - # Set input as copy of input for first while loop numpy_output = np.copy(numpy_input) - + # Set iteration counter i = 0 - + # Run macro until input image does not equal output while i < maxIterations: - # Increment iteration counter i += 1 - logger.debug('Running macro attempt {}'.format(i)) - + logger.debug(f"Running macro attempt {i}") + try: - # Make sure the input image is closed before opening or re-opening close_image(image_title, ij) - # HACK: Work around ImagePlus#show() failure if no ImagePlus objects + # HACK: Work around ImagePlus#show() failure if no ImagePlus objects # are already registered. if ij.WindowManager.getIDList() is None: - logger.debug('Creating dummy image...') + logger.debug("Creating dummy image...") ij.py.run_macro('newImage("dummy", "8-bit", 1, 1, 1);') - - logger.debug('Converting to ImagePlus Object...') + logger.debug("Converting to ImagePlus Object...") # Convert to ImagePlus object java_input = ij.py.to_imageplus(numpy_input) - - logger.debug('Changing the title of the image') + + logger.debug("Changing the title of the image") # Chanage the input image title java_input.setTitle(image_title) - - logger.debug('Registering as active image...') + + logger.debug("Registering as active image...") # HACK sets the ImagePlus object as the active image java_input.show() - + # Use window manager to close all windows ij.WindowManager.closeAllWindows() - + # Get the available images available_images = list(ij.WindowManager.getImageTitles()) - logger.debug('Available Images:') + logger.debug("Available Images:") logger.debug(available_images) - + # Close all images except the input image for title in available_images: if title != image_title: close_image(title, ij) - + # Check if the intended image is the active assert image_title == ij.py.active_imageplus(sync=False).getTitle() - + # Run the macro on the active image ij.py.run_macro(jpype.JString(script)) - - logger.debug('Getting available images from WindowManager after macro...') + + logger.debug("Getting available images from WindowManager after macro...") for imp_id in ij.WindowManager.getIDList(): logger.debug(ij.WindowManager.getImage(imp_id)) - - logger.debug('Getting the output image...') - + + logger.debug("Getting the output image...") + # Get the active image after running the macro - java_output = ij.WindowManager.getImage(jpype.JString(image_title + '-output')) - - logger.debug('Duplicating the output image') + java_output = ij.WindowManager.getImage( + jpype.JString(image_title + "-output"), + ) + + logger.debug("Duplicating the output image") # HACK ensures the modifications to the image are sent to python java_duplicate = java_output.duplicate() - java_duplicate.setTitle(jpype.JString(image_title + '-duplicate')) - - logger.debug('Sending image to python...') + java_duplicate.setTitle(jpype.JString(image_title + "-duplicate")) + + logger.debug("Sending image to python...") # Send the macro output to python xarr_output = ij.py.from_java(java_duplicate) - logger.debug('Creating numpy output array...') + logger.debug("Creating numpy output array...") # Convert the xarray to numpy array numpy_output = xarr_output.to_numpy() - + # Log no changes were made to the image and attempt failed if np.array_equal(numpy_input, numpy_output): - logger.info('Macro attempt {} failed'.format(i)) + logger.info(f"Macro attempt {i} failed") continue - + # If changes were not made break the while loop else: break - + except: # Log the attempt failed on exception - logger.info('Macro attempt {} failed'.format(i)) + logger.info(f"Macro attempt {i} failed") finally: # Use window manager to close all windows - clears up memory ij.WindowManager.closeAllWindows() - + # Terminate the plugin after max attempts to run macro on same image if i >= maxIterations: - raise Exception('Failed to run macro on image {} after {} attempts'.format(image_title, i)) + msg = f"Failed to run macro on image {image_title} after {i} attempts" + raise Exception( + msg, + ) return numpy_output - + + def main(inpDir, macroDir, outDir, maxIterations): - # Define the macro path - macro = [m for m in macroDir.iterdir() if m.suffix == '.txt'][0] + macro = [m for m in macroDir.iterdir() if m.suffix == ".txt"][0] # Load and define the macro script script = "" @@ -165,72 +162,72 @@ def main(inpDir, macroDir, outDir, maxIterations): for line in fhand: script += line - - logger.debug('Macro script:\n' + script) - logger.info('macro = {}'.format(macro)) + logger.debug("Macro script:\n" + script) + logger.info(f"macro = {macro}") # Infer the file pattern of the collection pattern_guess = filepattern.infer_pattern(inpDir.iterdir()) - + # Instantiate the filepatter object fp = filepattern.FilePattern(inpDir, pattern_guess) # Get the collection's image paths - image_paths = [f[0]['file'] for f in fp() if f[0]['file'].is_file()] - + image_paths = [f[0]["file"] for f in fp() if f[0]["file"].is_file()] + # Disable the loci debug logs def disable_loci_logs(): DebugTools = scyjava.jimport("loci.common.DebugTools") DebugTools.setRootLevel("WARN") - + scyjava.when_jvm_starts(disable_loci_logs) - - logger.debug('Is JVM running: {}'.format(jpype.isJVMStarted())) - logger.info('Starting JVM...') - + + logger.debug(f"Is JVM running: {jpype.isJVMStarted()}") + logger.info("Starting JVM...") + # Instantiate imagej instance and launch JVM - ij = imagej.init('sc.fiji:fiji:2.5.0', add_legacy=True) + ij = imagej.init("sc.fiji:fiji:2.5.0", add_legacy=True) jpype.config.destroy_jvm = False - + # Define the ImageJ apps and versions - apps = ['ImageJ1', 'ImageJ2', 'Fiji'] + apps = ["ImageJ1", "ImageJ2", "Fiji"] versions = {app: None for app in apps} - + # Get the loaded apps and versions for app in apps: if ij.app().getApp(app) is not None: versions[app] = ij.app().getApp(app).getVersion() - logger.info('Loaded {} version {}'.format(app, versions[app])) - + logger.info(f"Loaded {app} version {versions[app]}") + # Iterate over the collection for path in image_paths: - - logger.info("Processing image: {}".format(path)) - + logger.info(f"Processing image: {path}") + # Load the current image - with BioReader(path, backend='python') as br: + with BioReader(path, backend="python") as br: numpy_input = np.squeeze(br[:, :, 0:1, 0, 0]) metadata = br.metadata br.close() - + # Define the image stem - path_stem = path.stem.split('.')[0] - logger.debug('Path stem is {}'.format(path_stem)) - + path_stem = path.stem.split(".")[0] + logger.debug(f"Path stem is {path_stem}") + # Run the macro until correct output is returned form java numpy_output = run_macro( numpy_input=numpy_input, image_title=path_stem, script=script, ij=ij, - maxIterations=maxIterations + maxIterations=maxIterations, ) - + # Make sure the input and output are not the same iamge - assert not np.array_equal(numpy_input, numpy_output), 'The input and output images are identical' - - logger.info('Saving Image...') - + assert not np.array_equal( + numpy_input, numpy_output, + ), "The input and output images are identical" + + logger.info("Saving Image...") + # Save the numpy output with BioWriter(outDir.joinpath(path.name), metadata=metadata) as bw: bw.Y = numpy_output.shape[0] @@ -239,77 +236,61 @@ def disable_loci_logs(): bw.dtype = numpy_output.dtype bw[:] = numpy_output bw.close() - - logger.info('Complete!') + + logger.info("Complete!") -if __name__ == '__main__': - +if __name__ == "__main__": # Setup Command Line Arguments logger.info("Parsing arguments...") - + # Instantiate argparser object - parser = argparse.ArgumentParser( - prog="main", description="ImageJ Macro Plugin" - ) + parser = argparse.ArgumentParser(prog="main", description="ImageJ Macro Plugin") # Add the plugin arguments parser.add_argument( - "--inpDir", - dest="inpDir", - type=str, - help="Collection to be processed by this plugin", - required=True - ) - + "--inpDir", + dest="inpDir", + type=str, + help="Collection to be processed by this plugin", + required=True, + ) + parser.add_argument( - "--macroDir", - dest="macroDir", - type=str, - help="The macro to apply to the collection", - required=True + "--macroDir", + dest="macroDir", + type=str, + help="The macro to apply to the collection", + required=True, ) - + parser.add_argument( - "--outDir", - dest="outDir", - type=str, - help="Output collection", - required=True + "--outDir", dest="outDir", type=str, help="Output collection", required=True, ) parser.add_argument( - "--maxIterations", - dest="maxIterations", - type=int, - help="The maximum number of macro attempts", - required=False + "--maxIterations", + dest="maxIterations", + type=int, + help="The maximum number of macro attempts", + required=False, ) - + # Parse and log the arguments args = parser.parse_args() - + _inpDir = Path(args.inpDir) - logger.info('inpDir = {}'.format(_inpDir)) - + logger.info(f"inpDir = {_inpDir}") + _macroDir = Path(args.macroDir) - logger.info('macroDir = {}'.format(_macroDir)) - + logger.info(f"macroDir = {_macroDir}") + _outDir = Path(args.outDir) - logger.info('outDir = {}'.format(_outDir)) - - - if args.maxIterations is not None: - _maxIterations = args.maxIterations - - else: - _maxIterations = 10 - logger.info('maxIterations = {}'.format(_maxIterations)) - - + logger.info(f"outDir = {_outDir}") + + _maxIterations = args.maxIterations if args.maxIterations is not None else 10 + logger.info(f"maxIterations = {_maxIterations}") + main( - inpDir=_inpDir, - macroDir=_macroDir, - outDir=_outDir, - maxIterations=_maxIterations - ) + inpDir=_inpDir, macroDir=_macroDir, outDir=_outDir, maxIterations=_maxIterations, + ) diff --git a/utils/polus-imagej-util/.gitignore b/utils/polus-imagej-util/.gitignore index 2db53aa74..f0a62d9ff 100644 --- a/utils/polus-imagej-util/.gitignore +++ b/utils/polus-imagej-util/.gitignore @@ -1,4 +1,4 @@ cookietin utils/polus-imagej-util/imagej-testing.py full.log -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints diff --git a/utils/polus-imagej-util/README.md b/utils/polus-imagej-util/README.md index f180f0638..f5f4bc647 100644 --- a/utils/polus-imagej-util/README.md +++ b/utils/polus-imagej-util/README.md @@ -3,11 +3,11 @@ The Imagej util pipeline is used to create WIPP plugins from the [Imagej image processing operations (ops)](https://github.com/imagej/tutorials/tree/master/notebooks/1-Using-ImageJ). As of now this pipeline had the ability to generate approximately 92 different plugins with a total of 253 overloading methods. Since the Imagej ops were written in Java each op has a number of different overloading methods for different data type inputs. When an op is called the appropriate overloading method is used based upon the input data types. However, it should be noted that this pipeline is still under development and about 40% currently pass the automatically generated unit tests. In order for an op to be generated at least one of its overloading methods must be currently supported. Below are the criteria for op overloading method generation. -- All data types of the required inputs must map from a WIPP data type to an Imagej data type. Not all data type conversions are currently supported in this version. +- All data types of the required inputs must map from a WIPP data type to an Imagej data type. Not all data type conversions are currently supported in this version. - The output data type must map from an Imagej data type to a WIPP data type. - At least one of the required inputs must map to a `collection` data type. -In addition to the above criteria, at this time only the required inputs of the Imagej ops can be used when the op is called. The optional inputs are documented in the log files but not used when generating plugins. +In addition to the above criteria, at this time only the required inputs of the Imagej ops can be used when the op is called. The optional inputs are documented in the log files but not used when generating plugins. ## How to use @@ -93,7 +93,7 @@ This file should contain a list of the packages (including versions) that are us ### tests/unit_test.py -This file is automatically generated during plugin generation, it is not intended to be run directly. Instead, it is run using a shell command, see Imagej Testing in the next section. +This file is automatically generated during plugin generation, it is not intended to be run directly. Instead, it is run using a shell command, see Imagej Testing in the next section. ## Imagej Testing diff --git a/utils/polus-imagej-util/bfio.cfg b/utils/polus-imagej-util/bfio.cfg index 20119b031..8ea94c314 100644 --- a/utils/polus-imagej-util/bfio.cfg +++ b/utils/polus-imagej-util/bfio.cfg @@ -9,4 +9,4 @@ replace = labshare/polus-bfio-util:{new_version} [bumpversion:file:{{cookiecutter.project_slug}}/src/requirements.txt] search = bfio=={current_version} -replace = bfio=={new_version} \ No newline at end of file +replace = bfio=={new_version} diff --git a/utils/polus-imagej-util/classes/logtemplates/mainlog.txt b/utils/polus-imagej-util/classes/logtemplates/mainlog.txt index b6ffdb836..7d97e1f95 100644 --- a/utils/polus-imagej-util/classes/logtemplates/mainlog.txt +++ b/utils/polus-imagej-util/classes/logtemplates/mainlog.txt @@ -3,26 +3,26 @@ Log is specified in the following format: Op Number: The count of imagej ops parsed -Op Name: The namespace.op of the op +Op Name: The namespace.op of the op e.g. morphology.erode - -Op Method: The imagej op overload method + +Op Method: The imagej op overload method e.g. 'ListErode' -Full Path: Full path of the overloading method namespace.op.method +Full Path: Full path of the overloading method namespace.op.method e.g. 'net.imagej.ops.morphology.erode.ListErode' -Inputs: A list of imagej and WIPP input data types +Inputs: A list of imagej and WIPP input data types e.g. [ - ((imagej data type of var1, imagej title of var1), WIPP data type of var1), + ((imagej data type of var1, imagej title of var1), WIPP data type of var1), ((imagej data type of var2, imagej title of var2), WIPP data type var2)... ] -Outputs: The output data type +Outputs: The output data type e.g. [(imagej data type, WIPP data type)] -Supported: True if supported, if not a boolean list indicating why op not supported +Supported: True if supported, if not a boolean list indicating why op not supported e.g. [ - True/False, # Indicates if input/output can be mapped to WIPP data types + True/False, # Indicates if input/output can be mapped to WIPP data types True/False # Indicates if input and output both contain collection data types ] diff --git a/utils/polus-imagej-util/classes/populate.py b/utils/polus-imagej-util/classes/populate.py index 17abe0151..9405b1c9e 100644 --- a/utils/polus-imagej-util/classes/populate.py +++ b/utils/polus-imagej-util/classes/populate.py @@ -1,1107 +1,1096 @@ -import re import json import logging -import scyjava -import imagej +import re from pathlib import Path +import imagej +import scyjava + """ -This file provides classes to parse the imagej ops help and create cookiecutter -json templates. This file is should be run before generating ops with -generate.py or using the ImageJ UI. +This file provides classes to parse the imagej ops help and create cookiecutter +json templates. This file is should be run before generating ops with +generate.py or using the ImageJ UI. """ + # Disable warning message def disable_loci_logs(): - DebugTools = scyjava.jimport('loci.common.DebugTools') - DebugTools.setRootLevel('WARN') + DebugTools = scyjava.jimport("loci.common.DebugTools") + DebugTools.setRootLevel("WARN") + + scyjava.when_jvm_starts(disable_loci_logs) class Op: - - """A class to represent each Imagej overload method with corresponding + """A class to represent each Imagej overload method with corresponding inputs and outputs. - - The Op class is intended to be used in conjunction with the Plugin and - Populator classes. Altogether the three classes parse and store the imagej - ops help and finally construct the json template files used to construct the - main program and unit testing. Each Op represents a single imagej - overloading method. The attributes of the op store the various input and - output titles and their corresponding WIPP and imagej data types. The class - also stores the required and optional inputs as indicated by a '?' directly + + The Op class is intended to be used in conjunction with the Plugin and + Populator classes. Altogether the three classes parse and store the imagej + ops help and finally construct the json template files used to construct the + main program and unit testing. Each Op represents a single imagej + overloading method. The attributes of the op store the various input and + output titles and their corresponding WIPP and imagej data types. The class + also stores the required and optional inputs as indicated by a '?' directly following an input title in the imagej ops help (e.g. ?optional_input1). - + Attributes: name: A string representing the imagej name of the overloading method - plugin: A Plugin class member representing the imagej op of which + plugin: A Plugin class member representing the imagej op of which the overloading method belongs. - _inputs: A list of tuples containing the input title, imagej data type + _inputs: A list of tuples containing the input title, imagej data type and WIPP data type (see full.log for structure of _inputs list). - _output: A list containing a single tuple of the output title, imagej - data type and imagej data type (see full.log for structure of + _output: A list containing a single tuple of the output title, imagej + data type and imagej data type (see full.log for structure of _output list). - _required_inputs: A list of tuples containing input title, imagej data + _required_inputs: A list of tuples containing input title, imagej data type and WIPP data type of the required inputs of the method. - _optional_inputs: A list of tuples containing input title, imagej data + _optional_inputs: A list of tuples containing input title, imagej data type and WIPP data type of the optional inputs of the method. - full_support: A boolean indicating if the overloading method is - supported using required and optional inputs. At this time no - optional inputs are supported. - partial_support: A boolean indicating if the overloading method is - supported using the required inputs. Additionally a method must take - a collection as input and output a collection to be partially + full_support: A boolean indicating if the overloading method is + supported using required and optional inputs. At this time no + optional inputs are supported. + partial_support: A boolean indicating if the overloading method is + supported using the required inputs. Additionally a method must take + a collection as input and output a collection to be partially supported. support_msg: A list of booleans indicating why an op method is or is not - supported. The first value indicates if all required inputs and - output can be mapped to a WIPP data type and the second value - indcates if both the required inputs and output contain a collection + supported. The first value indicates if all required inputs and + output can be mapped to a WIPP data type and the second value + indcates if both the required inputs and output contain a collection data type. - imagej_input_data_types: A list of strings representing the imagej data + imagej_input_data_types: A list of strings representing the imagej data types of the method's inputs. - imagej_input_titles: A list of strings representing the imagej input + imagej_input_titles: A list of strings representing the imagej input titles of the method. - wipp_type_inputs: A list of strings representing the WIPP data types of + wipp_type_inputs: A list of strings representing the WIPP data types of the method's inputs. - wipp_type_output: A string representing the WIPP data type of the + wipp_type_output: A string representing the WIPP data type of the method's output. - imagej_type_output: A string representing the imagej data type of the + imagej_type_output: A string representing the imagej data type of the method's output. - imagej_title_output: A string representing the imagej output title of + imagej_title_output: A string representing the imagej output title of the method. - wipp_type_required_inputs: A list of strings representing the WIPP data + wipp_type_required_inputs: A list of strings representing the WIPP data type of the required inputs. - imagej_type_required_inputs: A list of strings representing the imagej + imagej_type_required_inputs: A list of strings representing the imagej data type of the required inputs. - imagej_title_required_inputs A list of strings representing the imagej + imagej_title_required_inputs A list of strings representing the imagej input titles the required inputs. - + """ - - def __init__(self, - plugin: 'Plugin', - name: str, - full_path: str, - inputs: list, - output: tuple): - - """A method to instantiate an Op class member - + + def __init__( + self, plugin: "Plugin", name: str, full_path: str, inputs: list, output: tuple, + ) -> None: + """A method to instantiate an Op class member. + Args: - plugin: The Plugin object representing the imagej op that the + plugin: The Plugin object representing the imagej op that the overloading method belongs. Plugin instance. name: A string of representing the overloading method name. - full_path: A string representing the full Java call for the op and + full_path: A string representing the full Java call for the op and its overloading method. - inputs: A list of tuples containing the imagej input titles and + inputs: A list of tuples containing the imagej input titles and imagej data types. - output: A tuple containing the imagej output title and imagej data + output: A tuple containing the imagej output title and imagej data type. - + Raises: TypeError: Raises if inputs is not a list. """ - if not isinstance(inputs, list): - raise TypeError('inputs must be an instance of a list') - + msg = "inputs must be an instance of a list" + raise TypeError(msg) + # Define class attributes self.plugin = plugin self.name = name self.full_path = full_path self._inputs = [] self._output = [] - + # Check and update input titles that will interfere with other variable # names and/or python reserved words for input_index, input in enumerate(inputs): # Check if input title will interfere with reserved python keyword - if input[1] == 'in': - # Change the input name from "in" to "in1" - inputs[input_index] = (input[0], 'in1') - - # Change the name of the out as input argument so it does not + if input[1] == "in": + # Change the input name from "in" to "in1" + inputs[input_index] = (input[0], "in1") + + # Change the name of the out as input argument so it does not # interfere with output from op - elif input[1] == 'out': + elif input[1] == "out": # Change name from "out" to "out_input" - inputs[input_index] = (input[0], 'out_input') + inputs[input_index] = (input[0], "out_input") - elif input[1] == 'out?': + elif input[1] == "out?": # Change name from "out" to "out_input" - inputs[input_index] = (input[0], 'out_input?') - - # Check if the output is not titled 'out' and change to 'out' if + inputs[input_index] = (input[0], "out_input?") + + # Check if the output is not titled 'out' and change to 'out' if # neccessary - if output[1] != 'out': + if output[1] != "out": output = list(output) - output[1] = 'out' + output[1] = "out" output = tuple(output) - + # Map the inputs and output from ImageJ data type to WIPP data type - #self.__dataMap(inputs, output) - self._inputs.extend([(_input,Op.imagej_to_Wipp_map.get(_input[0],'unknown')) for _input in inputs]) - self._output.extend([(output,Op.imagej_to_Wipp_map.get(output[0],'unknown'))]) - - # Define required and optional inputs by testing last character in each + self._inputs.extend( + [ + (_input, Op.imagej_to_Wipp_map.get(_input[0], "unknown")) + for _input in inputs + ], + ) + self._output.extend([(output, Op.imagej_to_Wipp_map.get(output[0], "unknown"))]) + + # Define required and optional inputs by testing last character in each # input title self._required_inputs = [ - _input for _input in self._inputs if _input[0][1][-1] != '?' + _input for _input in self._inputs if _input[0][1][-1] != "?" ] self._optional_inputs = [ - _input for _input in self._inputs if _input[0][1][-1] == '?' + _input for _input in self._inputs if _input[0][1][-1] == "?" ] - - # Determine if the op is currently supported and define member + + # Determine if the op is currently supported and define member # attributes for partial and full support self.__support() - + @property def imagej_input_data_types(self): return [var[0][0] for var in self._inputs] - + @property def imagej_input_titles(self): return [var[0][1] for var in self._inputs] - + @property def wipp_type_inputs(self): return [var[1] for var in self._inputs] - + @property def wipp_type_output(self): return self._output[0][1] - + @property def imagej_type_output(self): return self._output[0][0][0] - + @property def imagej_title_output(self): - return self._output[0][0][1].replace('?', '') - + return self._output[0][0][1].replace("?", "") + @property def wipp_type_required_inputs(self): return [var[1] for var in self._required_inputs] - + @property def imagej_type_required_inputs(self): return [var[0][0] for var in self._required_inputs] - - @ property + + @property def imagej_title_required_inputs(self): return [var[0][1] for var in self._required_inputs] - - + # Define the imagej data types that map to collection COLLECTION_TYPES = [ - 'Iterable', - 'Interval', - 'IterableInterval', + "Iterable", + "Interval", + "IterableInterval", # 'IterableRegion', - 'RandomAccessibleInterval', - 'ImgPlus', - 'PlanarImg', + "RandomAccessibleInterval", + "ImgPlus", + "PlanarImg", # 'ImgFactory', # 'ImgLabeling', - 'ArrayImg', - 'Img' + "ArrayImg", + "Img", ] # Define the imagej data types that map to number NUMBER_TYPES = [ - 'RealType', - 'NumericType', - 'byte', 'ByteType', 'UnsignedByteType', - 'short','ShortType','UnsignedShortType', - 'int','Integer','IntegerType', - 'long', 'Long', 'LongType', 'UnsignedLongType', - 'float','FloatType', - 'double','Double','DoubleType' + "RealType", + "NumericType", + "byte", + "ByteType", + "UnsignedByteType", + "short", + "ShortType", + "UnsignedShortType", + "int", + "Integer", + "IntegerType", + "long", + "Long", + "LongType", + "UnsignedLongType", + "float", + "FloatType", + "double", + "Double", + "DoubleType", ] # Define the imagej data types that map to boolean - BOOLEAN_TYPES = [ - 'boolean','Boolean','BooleanType' - ] + BOOLEAN_TYPES = ["boolean", "Boolean", "BooleanType"] # Define the imagej data types that map to array ARRAY_TYPES = [ - # 'double[][]', - 'List', - 'double[]', - 'long[]', - 'ArrayList', - # 'Object[]', - 'int[]' + "List", + "double[]", + "long[]", + "ArrayList", + "int[]", ] # Define the imagej data types that map to string - STRING_TYPES = [ - 'RealLocalizable', - 'String' - ] + STRING_TYPES = ["RealLocalizable", "String"] # Save all imagej data types as key and corresponding WIPP data type as value in dictionary - imagej_to_Wipp_map = {imagej_data_type: 'collection' for imagej_data_type in COLLECTION_TYPES} - imagej_to_Wipp_map.update({imagej_data_type: 'number' for imagej_data_type in NUMBER_TYPES}) - imagej_to_Wipp_map.update({imagej_data_type: 'boolean' for imagej_data_type in BOOLEAN_TYPES}) - imagej_to_Wipp_map.update({imagej_data_type: 'array' for imagej_data_type in ARRAY_TYPES}) - imagej_to_Wipp_map.update({imagej_data_type: 'string' for imagej_data_type in STRING_TYPES}) - - - def __dataMap( - self, - inputs: list, - output: tuple - ) -> None: - - """This method is DEPRACTED - A method to map each imagej input data + imagej_to_Wipp_map = { + imagej_data_type: "collection" for imagej_data_type in COLLECTION_TYPES + } + imagej_to_Wipp_map.update( + {imagej_data_type: "number" for imagej_data_type in NUMBER_TYPES}, + ) + imagej_to_Wipp_map.update( + {imagej_data_type: "boolean" for imagej_data_type in BOOLEAN_TYPES}, + ) + imagej_to_Wipp_map.update( + {imagej_data_type: "array" for imagej_data_type in ARRAY_TYPES}, + ) + imagej_to_Wipp_map.update( + {imagej_data_type: "string" for imagej_data_type in STRING_TYPES}, + ) + + def __dataMap(self, inputs: list, output: tuple) -> None: + """This method is DEPRACTED - A method to map each imagej input data type to a WIPP data type. - - This method is called when parsing the imagej ops help and is not - intended to be called directly. The method attempts to map all inputs - and the output from an imagej data type to a WIPP data type. Note that - the method does not create a WIPP data object, the data type is only - stored as a string in the input and output attributes of each member - method. If a data type conversion is not currently supported the method + + This method is called when parsing the imagej ops help and is not + intended to be called directly. The method attempts to map all inputs + and the output from an imagej data type to a WIPP data type. Note that + the method does not create a WIPP data object, the data type is only + stored as a string in the input and output attributes of each member + method. If a data type conversion is not currently supported the method will store 'unknown' for the data type. - - + + Args: - inputs: A list of tuples containing the imagej input titles and data + inputs: A list of tuples containing the imagej input titles and data types. output: A tuple containing the imagej output title and data type. - + Returns: None - + Raises: None """ - # Create empty lists to store input and output data types self._inputs = [] self._output = [] - + # Iterate over all inputs for _input in inputs: - # Try to map from imagej data type to WIPP data type try: self._inputs.append((_input, Op.imagej_to_Wipp_map[_input[0]])) - + # Place WIPP data type as unknown if not currently supported except: - self._inputs.append((_input, 'unknown')) - + self._inputs.append((_input, "unknown")) + # Try to map output imagej data type to WIPP data type try: self._output.append((output, Op.imagej_to_Wipp_map[output[0]])) - + # Place WIPP data type as unknown if not currently supported except: - self._output.append((output, 'unknown')) - + self._output.append((output, "unknown")) - def __support(self): - - """A method to determine if the imagej op is currently supported by the + """A method to determine if the imagej op is currently supported by the op generation pipeline. - - This method uses the input and output data types to determine if an op - is currently supported. For an op to be supported is must have - collection as one of the required inputs and the output must also be a - collection. Additionally, all the required inputs and the output - must be able to map from imagej to WIPP for partial support. For full - support all of the inputs and output must be able to map from imagej to - WIPP. If the data type conversion is not supported 'unknown' will be - stored as the WIPP type. At this time, this pipeline only supports - required inputs. Therefore, full support is arbitrary for the purposes - of plugin generation, this feature was only added for future + + This method uses the input and output data types to determine if an op + is currently supported. For an op to be supported is must have + collection as one of the required inputs and the output must also be a + collection. Additionally, all the required inputs and the output + must be able to map from imagej to WIPP for partial support. For full + support all of the inputs and output must be able to map from imagej to + WIPP. If the data type conversion is not supported 'unknown' will be + stored as the WIPP type. At this time, this pipeline only supports + required inputs. Therefore, full support is arbitrary for the purposes + of plugin generation, this feature was only added for future development. - + Args: None - + Returns: None - + Raises: None """ # Create initial support message for partial - self.support_msg = [ - True, - True - ] - + self.support_msg = [True, True] + """Check for full support""" - + # Initially set full support True self.full_support = True - + # If inputs or output cannot be mapped to WIPP data type - if 'unknown' in self.wipp_type_inputs + [self.wipp_type_output]: + if "unknown" in [*self.wipp_type_inputs, self.wipp_type_output]: self.full_support = False - + # Check if the input and output both contain collection data types - elif 'collection' not in self.wipp_type_inputs or 'collection' not in self.wipp_type_output: + elif ( + "collection" not in self.wipp_type_inputs + or "collection" not in self.wipp_type_output + ): self.full_support = False - + """Check for partial support""" - + # Initially set partial support to True self.partial_support = True - + # Check that required inputs and ouput can be mapped to WIPP data type - if 'unknown' in self.wipp_type_required_inputs + [self.wipp_type_output]: + if "unknown" in [*self.wipp_type_required_inputs, self.wipp_type_output]: self.partial_support = False self.support_msg[0] = False - + # Check if the input and output both contain collection data types - if 'collection' not in self.wipp_type_required_inputs or 'collection' not in self.wipp_type_output: + if ( + "collection" not in self.wipp_type_required_inputs + or "collection" not in self.wipp_type_output + ): self.partial_support = False self.support_msg[1] = False - - - - + + class Plugin: - """A class to represent imagej ops and plugins. - - The Plugin class is used to store all the information about each plugin, - which is later used to build the plugin directory and files. Each Plugin - can be thought of as a single imagej op. Each op in turn has a number of - overloading methods for different data types. The attributes of a Plugin - object store the relevant information about the op and its child overloading - methods. The Populate class also uses to build the cookiecutter json files + + The Plugin class is used to store all the information about each plugin, + which is later used to build the plugin directory and files. Each Plugin + can be thought of as a single imagej op. Each op in turn has a number of + overloading methods for different data types. The attributes of a Plugin + object store the relevant information about the op and its child overloading + methods. The Populate class also uses to build the cookiecutter json files for plugin generation. - + Attributes: _name: A string representing the imagej op - _ops: A dictionary containing the overloading methods of the op as keys + _ops: A dictionary containing the overloading methods of the op as keys and class Op objects as values. - _all_required_inputs: A dictionary containing information about the + _all_required_inputs: A dictionary containing information about the required inputs of all overloading methods. - _all_outputs: A dictionary containing information about the outputs of + _all_outputs: A dictionary containing information about the outputs of all overloading methods. - supported_ops: A dictionary containing the supported overloading methods + supported_ops: A dictionary containing the supported overloading methods as keys and the corresponding class Op objects as values. """ - - def __init__(self, - name: str): - + + def __init__(self, name: str) -> None: """A method to instantiate a Plugin object. - + Args: name: A string representing imagej op name. - + Raises: None """ - self._name = name self._ops = {} self._all_required_inputs = {} self._all_outputs = {} self.supported_ops = {} - - def add_op(self, - op: 'Op') -> None: - - """A method to store information about an overloading method in the + + def add_op(self, op: "Op") -> None: + """A method to store information about an overloading method in the class member's attributes. - - This method's function is to store information about an imagej op and - its overloading methods. As overloading methods are parsed from the - imagej ops help, class Ops objects are instantiated and referenced in - the _ops attribute. The method also stores information about the op - which is used to build cookiecutter json template files. - + + This method's function is to store information about an imagej op and + its overloading methods. As overloading methods are parsed from the + imagej ops help, class Ops objects are instantiated and referenced in + the _ops attribute. The method also stores information about the op + which is used to build cookiecutter json template files. + Args: - op: An object of class Op, representing one of the ops imagej - overloading methods. - + op: An object of class Op, representing one of the ops imagej + overloading methods. + Returns: None - + Raises: None """ - # Add the op to the _ops dicitonary attribute self._ops[op.name] = op - + # Check if the op is currently supported if op.partial_support: - # Add op to list of supported ops self.supported_ops[op.name] = op - + # Add each var to plugin's input dictionary - for title, dtype, wippType in zip(op.imagej_title_required_inputs, - op.imagej_type_required_inputs, - op.wipp_type_required_inputs): - + for title, dtype, wippType in zip( + op.imagej_title_required_inputs, + op.imagej_type_required_inputs, + op.wipp_type_required_inputs, + ): # Check if variable exists in input dictionary if title not in self._all_required_inputs: self._all_required_inputs[title] = { - 'type':wippType, - 'title':title, - 'description':title, - 'required':False, - 'call_types':{op.name:dtype}, - 'wipp_type':{op.name:wippType} - } - + "type": wippType, + "title": title, + "description": title, + "required": False, + "call_types": {op.name: dtype}, + "wipp_type": {op.name: wippType}, + } + # If variable key exists update it else: - self._all_required_inputs[title]['wipp_type'].update({op.name:wippType}) - self._all_required_inputs[title]['call_types'].update({op.name:dtype}) - if self._all_required_inputs[title]['type'] != wippType: - #raise Exception - #print('The', self._name, 'plugin has multiple input data types for the same input title across different op overloading calls') + self._all_required_inputs[title]["wipp_type"].update( + {op.name: wippType}, + ) + self._all_required_inputs[title]["call_types"].update( + {op.name: dtype}, + ) + if self._all_required_inputs[title]["type"] != wippType: + # raise Exception pass - + # Check if the output dictionary is empty if self._all_outputs == {}: - # Add the output to Library's output dictionary self._all_outputs = { - op.imagej_title_output:{ - 'type': op.wipp_type_output, - 'title': op.imagej_title_output, - 'description':'out', - 'call_types': { - op.name:op.imagej_type_output - } - } - } - + op.imagej_title_output: { + "type": op.wipp_type_output, + "title": op.imagej_title_output, + "description": "out", + "call_types": {op.name: op.imagej_type_output}, + }, + } + # Check if the output title is not in dictionary elif op.imagej_title_output not in self._all_outputs: - self._all_outputs.update({ - op.imagej_title_output:{ - 'type': op.wipp_type_output, - 'title': op.imagej_title_output, - 'description':'out', - 'call_types': { - op.name:op.imagej_type_output - } - } - }) - + self._all_outputs.update( + { + op.imagej_title_output: { + "type": op.wipp_type_output, + "title": op.imagej_title_output, + "description": "out", + "call_types": {op.name: op.imagej_type_output}, + }, + }, + ) + else: - self._all_outputs[op.imagej_title_output]['call_types'][op.name] = op.imagej_type_output + self._all_outputs[op.imagej_title_output]["call_types"][ + op.name + ] = op.imagej_type_output + class Populate: - """A class to parse imagej ops information and build json templates for + """A class to parse imagej ops information and build json templates for plugin generation. - - The Populate class has several methods that utilize the Op and Plugin - classes to parse store, and finally build cookiecutter json templates from - the imagej ops help. The attributes of a class Populate member store the - information about all imagej ops and their overloading methods. Note that - this class is not intended to be called directly; instead, a class member is + + The Populate class has several methods that utilize the Op and Plugin + classes to parse store, and finally build cookiecutter json templates from + the imagej ops help. The attributes of a class Populate member store the + information about all imagej ops and their overloading methods. Note that + this class is not intended to be called directly; instead, a class member is instantiated with Generate.py. The populate class also instantiates an ImageJ instance for op parsing. - + Attributes: - _ij: A net.imagej.Imagej instance from which to parse the imagej ops + _ij: A net.imagej.Imagej instance from which to parse the imagej ops help. log_file: A str representing the path to the log file. - log_template: A str representing the path to a txt file which is used as - the log header. This file should explain the format of the final log + log_template: A str representing the path to a txt file which is used as + the log header. This file should explain the format of the final log file. - _logger: A logging.Logger object which logs information about all imagej + _logger: A logging.Logger object which logs information about all imagej ops and methods. - _log_formatter: A logging.Formatter object to set to format of the log + _log_formatter: A logging.Formatter object to set to format of the log file. _file_handler: A logging.FileHandler object to handle to log file. - _plugins: A dic with op names as keys and class Plugin objects as - values. This dic contains the information about all imagej ops and + _plugins: A dic with op names as keys and class Plugin objects as + values. This dic contains the information about all imagej ops and their overloading methods. - json_dic: A dictionary with op names as keys and the cookiecutter json + json_dic: A dictionary with op names as keys and the cookiecutter json dictionaries to be used for plugin generation. - scale (dict): Plugins represented as keys and scale type/class + scale (dict): Plugins represented as keys and scale type/class represented as values. - + """ - + def __init__( self, - log_file='./utils/polus-imagej-util/full.log', - log_template='./utils/polus-imagej-util/classes/logtemplates/mainlog.txt' - ): - - """A method to instantiate a class Populate object - + log_file="./utils/polus-imagej-util/full.log", + log_template="./utils/polus-imagej-util/classes/logtemplates/mainlog.txt", + ) -> None: + """A method to instantiate a class Populate object. + Args: log_file: A str representing the path to the log file. - log_template: A str representing the path to a txt file which is - used as the log header. This file should explain the format of + log_template: A str representing the path to a txt file which is + used as the log header. This file should explain the format of the final log file. - + Raises: None - + """ - # Instantiate the imagej instance - self._ij = imagej.init('sc.fiji:fiji:2.1.1+net.imagej:imagej-legacy:0.37.4', headless=True) - + self._ij = imagej.init( + "sc.fiji:fiji:2.1.1+net.imagej:imagej-legacy:0.37.4", headless=True, + ) + # Store the log output file and log template file path self.log_file = log_file self.log_template = log_template - + # Load the scalability configuration file - with open(Path(__file__).parents[1].joinpath('scale.json'), 'r') as f: + with open(Path(__file__).parents[1].joinpath("scale.json")) as f: self.scale = json.load(f) - + # Create dictionary to store all plugins self._plugins = {} - + # Create logger for class member self.__logger(self.log_file, self.log_template) - + # Create imagej plug in by calling the parser member method self._parser() - + def _parser(self) -> None: - """"A method to parse imagej ops help and extract imagej op information. - - This method utilizes the python re module to parse the imagej instance - ops help. The method then instantiates class Op and class Plugin - objects to store information about the ops and methods. Finally relevant + """ "A method to parse imagej ops help and extract imagej op information. + + This method utilizes the python re module to parse the imagej instance + ops help. The method then instantiates class Op and class Plugin + objects to store information about the ops and methods. Finally relevant information about the ops and methods is written to the log file. - + Args: None - + Returns: None - + Raises: None """ - # Get list of all available ops to be converted to plugins plugins = scyjava.to_python(self._ij.op().ops().iterator()) - - # Complile the regular expression search pattern for op overloading + + # Complile the regular expression search pattern for op overloading # methods - re_path = re.compile(r'\t(?P.*\.)(?P.*)(?=\()') - - # Coompile the regular expression search pattern for the input data + re_path = re.compile(r"\t(?P.*\.)(?P.*)(?=\()") + + # Coompile the regular expression search pattern for the input data # types and title - re_inputs = re.compile(r'(?<=\t\t)(.*?)\s(.*)(?=,|\))') - + re_inputs = re.compile(r"(?<=\t\t)(.*?)\s(.*)(?=,|\))") + # Complile the regular expression search pattern for the outputs - re_output = re.compile(r'^\((.*?)\s(.*)\)') - + re_output = re.compile(r"^\((.*?)\s(.*)\)") + # Create a counter for number of ops parsed ops_count = 0 - + # Iterate over all ops for plugin in plugins: - # Add the plugin to the dictionary self._plugins[plugin] = Plugin(plugin) - + # Get the help info about the plugin/op op_docs = scyjava.to_python(self._ij.op().help(plugin)) - + # Split the help string into seperate ops - split_ops = re.split(r'\t(?=\()', op_docs) - + split_ops = re.split(r"\t(?=\()", op_docs) + # Iterate over all op methods in the plugin/op for op_doc in split_ops[1:]: - # Increment the ops parsed count ops_count += 1 - + # Search for op path and name op_path = re_path.search(op_doc).groupdict() - + # Save op name and full path - name = op_path['name'] - full_path = op_path['path'] + name - + name = op_path["name"] + full_path = op_path["path"] + name + # Find all inputs inputs = re_inputs.findall(op_doc) - + # Search for output output = re_output.findall(op_doc)[0] - + # Create an Op object to store the op data op = Op(plugin, name, full_path, inputs, output) # Check if the op is supported - if op.partial_support: - support_msg = True - else: - support_msg = op.support_msg - - + support_msg = True if op.partial_support else op.support_msg + # Log the plugin info to the main log self._logger.info( - self._msg.format( - counter = ops_count, - plugin = plugin, - name = name, - full_path = full_path, - inputs = op._inputs, - output = op._output, - support = support_msg - ) + self._msg.format( + counter=ops_count, + plugin=plugin, + name=name, + full_path=full_path, + inputs=op._inputs, + output=op._output, + support=support_msg, + ), ) - + # Add the overlaoding method to the plugin/op self._plugins[plugin].add_op(op) - - def __logger(self, - log_file: str, - log_template: str) -> None: - - """A method to initialize a logger and log information about the imagej + + def __logger(self, log_file: str, log_template: str) -> None: + """A method to initialize a logger and log information about the imagej ops and overloading methods. - - The logger makes use of python's built-in logger module to log relevant - information about each op and its overloading methods as they are parsed + + The logger makes use of python's built-in logger module to log relevant + information about each op and its overloading methods as they are parsed from the imagej ops help. - + Args: log_file: A str representing the path to the log file. - log_template: A str representing the path to a txt file which is - used as the log header. This file should explain the format of + log_template: A str representing the path to a txt file which is + used as the log header. This file should explain the format of the final log file. - + Returns: None - + Raises: None """ - # Check if excluded log exists if Path(log_file).exists(): # Unlink excluded log Path(log_file).unlink() - + # Create a logger object with name of module self._logger = logging.getLogger(__name__) - + # Set the logger level self._logger.setLevel(logging.INFO) - + # Create a log formatter - self._log_formatter = logging.Formatter('%(message)s') - + self._log_formatter = logging.Formatter("%(message)s") + # Create handler with log file name self._file_handler = logging.FileHandler(log_file) - + # Set format of logs self._file_handler.setFormatter(self._log_formatter) - + # Add the handler to the class logger self._logger.addHandler(self._file_handler) - + # Create header info for the main log - loginfo = '' - + loginfo = "" + # Open the main log info template with open(log_template) as fhand: for line in fhand: loginfo += line - + # Close the file connection fhand.close() - + # Set the header info self._logger.info(loginfo) - # Create default message for logger - self._msg = \ - 'Op Number: {counter}\n' +\ - 'Op Name: {plugin}\n' +\ - 'Op Method: {name}\n' +\ - 'Full Path: {full_path}\n' +\ - 'Inputs: {inputs}\n' +\ - 'Output: {output}\n' +\ - 'Supported: {support}\n\n' - - - def build_json(self, - author: str, - email: str, - github_username: str, - version: str, - cookietin_path: str) -> None: - - """A method to create cookiecutter json dictionaries for plugin + self._msg = ( + "Op Number: {counter}\n" + + "Op Name: {plugin}\n" + + "Op Method: {name}\n" + + "Full Path: {full_path}\n" + + "Inputs: {inputs}\n" + + "Output: {output}\n" + + "Supported: {support}\n\n" + ) + + def build_json( + self, + author: str, + email: str, + github_username: str, + version: str, + cookietin_path: str, + ) -> None: + """A method to create cookiecutter json dictionaries for plugin generation. - - This method uses the information stored in each class Op object and - class Plugin object to create the final cookiecutter json + + This method uses the information stored in each class Op object and + class Plugin object to create the final cookiecutter json dictionaries to be used for plugin directories and files. Upon creation - of the json dictionary the method utilizes the json module to write - ( json.dump() ) the dictionary contents of each op into a json file in + of the json dictionary the method utilizes the json module to write + ( json.dump() ) the dictionary contents of each op into a json file in the cookietin directory. - + Args: author: A string representing the author of the plugin. email: A string representing the email of the author of the plugin. - github_username: A string representing the GitHub username of the - author of the plugin. - version: A string representing the version number of the plugin. - cookietin_path: A str representing the path to the cookietin + github_username: A string representing the GitHub username of the + author of the plugin. + version: A string representing the version number of the plugin. + cookietin_path: A str representing the path to the cookietin directory. - + Returns: None - + Raises: None """ - - - # Instantiate empty dictionary to store the dictionary to be converted + # Instantiate empty dictionary to store the dictionary to be converted # to json self.json_dic = {} - + # Create dic of characters to be replaced in overloading method call - # (plugin_namespace) - char_to_replace = { - '[':'(', - ']':')', - "'":'', - ' ':'' - } - + char_to_replace = {"[": "(", "]": ")", "'": "", " ": ""} + # Iterate over all imagej libraries that were parsed - for name, plugin, in self._plugins.items(): - + for ( + name, + plugin, + ) in self._plugins.items(): # Check if any ops are suppported if len(plugin.supported_ops) > 0: - - # Add the json "template" for the library to the dictionary + # Add the json "template" for the library to the dictionary # containing all library "templates" self.json_dic[name] = { - 'author': author, - 'email': email, - 'github_username': github_username, - 'version': version, - 'project_name': 'ImageJ ' + name.replace('.', ' '), - 'project_short_description': - str([op for op in plugin.supported_ops.keys()]).replace("'", '')[1:-1], - 'citation': '', - 'plugin_namespace':{ - op.name: - 'out = ij.op().' + \ - op.plugin.replace('.', '().') + \ - re.sub(r"[\s'\[\]]", - lambda x: char_to_replace[x.group(0)], - str(op.imagej_title_required_inputs)) - for op in plugin.supported_ops.values() - }, - '_inputs':{ - 'opName':{ - 'title': 'opName', - 'type': 'enum', - 'options':[ + "author": author, + "email": email, + "github_username": github_username, + "version": version, + "project_name": "ImageJ " + name.replace(".", " "), + "project_short_description": str( + list(plugin.supported_ops.keys()), + ).replace("'", "")[1:-1], + "citation": "", + "plugin_namespace": { + op.name: "out = ij.op()." + + op.plugin.replace(".", "().") + + re.sub( + r"[\s'\[\]]", + lambda x: char_to_replace[x.group(0)], + str(op.imagej_title_required_inputs), + ) + for op in plugin.supported_ops.values() + }, + "_inputs": { + "opName": { + "title": "opName", + "type": "enum", + "options": [ op.name for op in plugin.supported_ops.values() - ], - 'description': 'Operation to perform', - 'required': 'False' - } + ], + "description": "Operation to perform", + "required": "False", }, - '_outputs': - plugin._all_outputs, - 'project_slug': "polus-{{ cookiecutter.project_name|lower|replace(' ', '-') }}-plugin", - 'docker_repo' : "{{ cookiecutter.project_name|lower|replace(' ', '-') }}-plugin", - 'scalability': self.scale.get(name.replace('.', '-'), "independent") - } - - + }, + "_outputs": plugin._all_outputs, + "project_slug": "polus-{{ cookiecutter.project_name|lower|replace(' ', '-') }}-plugin", + "docker_repo": "{{ cookiecutter.project_name|lower|replace(' ', '-') }}-plugin", + "scalability": self.scale.get( + name.replace(".", "-"), "independent", + ), + } + # If threhsolding op add compute threhsold call to template - if self.json_dic[name]['scalability'] == 'threshold': - self.json_dic[name]['compute_threshold'] = 'threshold = ij.op().' \ - + name.replace('.', '().') + '(histogram)' - - # Update the _inputs section dictionary with the inputs + if self.json_dic[name]["scalability"] == "threshold": + self.json_dic[name]["compute_threshold"] = ( + "threshold = ij.op()." + + name.replace(".", "().") + + "(histogram)" + ) + + # Update the _inputs section dictionary with the inputs # dictionary stored in the Library attribute - self.json_dic[name]['_inputs'].update(plugin._all_required_inputs) - - # Create Path object with directory path to store + self.json_dic[name]["_inputs"].update(plugin._all_required_inputs) + + # Create Path object with directory path to store # cookiecutter.json file for each plugin - file_path = Path(cookietin_path).with_name('cookietin').joinpath(plugin._name.replace('.','-')) - + file_path = ( + Path(cookietin_path) + .with_name("cookietin") + .joinpath(plugin._name.replace(".", "-")) + ) + # Create the directory - file_path.mkdir(exist_ok=True,parents=True) - + file_path.mkdir(exist_ok=True, parents=True) + # Open the directory and place json file in directory - with open(file_path.joinpath('cookiecutter.json'),'w') as fw: - json.dump(self.json_dic[name], fw,indent=4) + with open(file_path.joinpath("cookiecutter.json"), "w") as fw: + json.dump(self.json_dic[name], fw, indent=4) class GeneratedParser: """A class to update cookiecutter templates with previously generated ImageJ plugin manifests. - + The advantage to using this class is it allows the user to update cookiecutter template files with plugin descriptions, input names and input descriptions which have to be determined through manual research about the - ImageJ op. Before using this class's update method all ImageJ plugins which + ImageJ op. Before using this class's update method all ImageJ plugins which have erroneous manifests should be stashed or removed from polus-plugins directory to avoid overwriting clean cookiecutter templates. Alternatively, - pass a list for which plugins to specifically target when updating - templates. - + pass a list for which plugins to specifically target when updating + templates. + Attributes: polus_plugins (pathlib.Path): The sysytem file path to the polus-plugins directory. - cookietin (pathlib.Path): The system file path to the cookiecutter + cookietin (pathlib.Path): The system file path to the cookiecutter template directory. manifests (dict): The previously generated ImageJ plugin manifests with short plugin names as keys (i.e. "filter-convolve"). cookiecutter (dict): The cookiecutter templates with short plugin names as keys (i.e. "threshold-maxLikelihood"). - + Methods: - update_templates(self, ops): A method to update the cookiecutter - template files with the previously generated ImageJ plugin + update_templates(self, ops): A method to update the cookiecutter + template files with the previously generated ImageJ plugin manifests. """ - - def __init__( - self, polus_plugins_directory=None, - cookiecutter_directory=None - ): + + def __init__(self, polus_plugins_directory=None, cookiecutter_directory=None) -> None: """Instantiates a member of the GeneratedParser class and parses all of the cookiecutter templates and plugin manifests from their respective - directories. - + directories. + Arguments: - polus_plugins_directory (str): The sysytem file path to the - polus-plugins directory. If passed as None it will automatically + polus_plugins_directory (str): The sysytem file path to the + polus-plugins directory. If passed as None it will automatically be defined assuming that populate.py is in standard location. (polus-plugins/utils/polus-imagej-util/classes/populate.py) - cookiecutter_directory (str): The system file path to the - cookiecutter template directory. If passed as None it will + cookiecutter_directory (str): The system file path to the + cookiecutter template directory. If passed as None it will automatically be defined assuming that cookietin and populate.py are in standard locations. (polus-plugins/utils/polus-imagej-util/cookietin/). - + Raises: None """ - # Check if polus-plugins path passed by caller if polus_plugins_directory is None: - # Define the polus-plugins directory as member attribute self.polus_plugins = Path(__file__).parents[3] - + else: # Define the polus-plugins directory as member attribute self.polus_plugins = Path(polus_plugins_directory) - + # Check if cookiecutter directory passed by caller if cookiecutter_directory is None: - # Define the cookiecutter template directory as member attribute - self.cookietin = Path(__file__).parents[1].joinpath('cookietin') - + self.cookietin = Path(__file__).parents[1].joinpath("cookietin") + # Get all ImageJ plugin manifests in polus-plugins - manifest_paths = [p for p in self.polus_plugins.rglob('polus-imagej*/plugin.json')] - + manifest_paths = list(self.polus_plugins.rglob("polus-imagej*/plugin.json")) + # Instantiate dictionary to store the plugin manifests self.manifests = {} - + # Iterate over all the plugin manifest paths for p in manifest_paths: - # Define plugin name as would be defined in cookiecutter template - plugin_name = str(p.parent.name).replace('polus-imagej-', '').replace('-plugin', '') - + plugin_name = ( + str(p.parent.name).replace("polus-imagej-", "").replace("-plugin", "") + ) + # Load the plugins manifest and save in the member attribute dict self.manifests[plugin_name] = json.load(open(p)) - - + # Define path to all cookiecutter template files - cookietin_paths = [p for p in self.cookietin.rglob('cookiecutter.json')] - + cookietin_paths = list(self.cookietin.rglob("cookiecutter.json")) + # Instantiate dictionary to store the cookiecutter templates self.cookiecutter = {} - + # iterate over all cookiecutter paths for p in cookietin_paths: - # Define plugin name as would be defined in cookiecutter template plugin_name = str(p.parent.name) - + # Load the cookiecutter template files self.cookiecutter[plugin_name] = json.load(open(p)) - - - def update_templates(self, ops='all', skip=[]): - """A method to update the cookiecutter template files with the + + def update_templates(self, ops="all", skip=[]): + """A method to update the cookiecutter template files with the previously generated ImageJ plugin manifests. - + This method will use the plugin manifests found in the polus-plugins - directory to update the cookiecutter template and update all of the + directory to update the cookiecutter template and update all of the fields which are typically manually entered when gerenating a plugin. This method can be used to update all templates or just a selected list. Indicates ops = 'all' to update all templates for which a manifest exists. - + Arguments: ops (list): A list of strings representing the plugin cookiecutter templates to update in shortened form (i.e. filter-gauss) skip (list): A list of plugins to not update, can be used to skip plugins with known varying number of inputs due to manual updates to the plugin. - + Returns: None - + Raises: AssertionError: Raised if the number of inputs in the cookiecutter template file do no match the number of inputs in the plugin - manifest file. This accounts for ops which have a out input for - memory allocation which does not appear in both files. + manifest file. This accounts for ops which have a out input for + memory allocation which does not appear in both files. """ - # Iterate over all cookiecutter template files for plugin, template in self.cookiecutter.items(): - # Check if the current op should be updated - if plugin.lower() not in ops and ops != 'all': + if plugin.lower() not in ops and ops != "all": continue - + # Check if caller wants to skip plugin elif plugin.lower() in skip: continue - + # Check if a plugin manifest exists for the current plugin elif plugin.lower() not in self.manifests.keys(): - print('{} has no manifest'.format(plugin)) + print(f"{plugin} has no manifest") continue - + else: - print('Updating {}'.format(plugin)) - + print(f"Updating {plugin}") + # Define the manifest to be used to update cookiecutter template manifest = self.manifests[plugin.lower()] - + # Determine if out_input is a cookiecutter template input - if 'out_input' in template['_inputs'].keys(): - # Check if the input lengths match - if len(template['_inputs']) - 1 != len(manifest['inputs']): - raise AssertionError('The {} template and manifest have a varying number of inputs'.format(plugin)) - + if "out_input" in template["_inputs"]: + # Check if the input lengths match + if len(template["_inputs"]) - 1 != len(manifest["inputs"]): + msg = f"The {plugin} template and manifest have a varying number of inputs" + raise AssertionError( + msg, + ) + else: # Check the input lengths for other case - if len(template['_inputs']) != len(manifest['inputs']): - raise AssertionError('The {} template and manifest have a varying number of inputs'.format(plugin)) - - + if len(template["_inputs"]) != len(manifest["inputs"]): + msg = f"The {plugin} template and manifest have a varying number of inputs" + raise AssertionError( + msg, + ) + # Update the plugins header information - template['author'] = manifest['author'] - template['project_short_description'] = manifest['description'] - template['citation'] = manifest['citation'] - + template["author"] = manifest["author"] + template["project_short_description"] = manifest["description"] + template["citation"] = manifest["citation"] + # Start a manifest input index i = 0 - + # Iterate over the cookiecutter template inputs and thier data - for cinp, d in template['_inputs'].items(): - + for cinp, d in template["_inputs"].items(): # Skip the memory allocation input "out_input" - if cinp == 'out_input': + if cinp == "out_input": continue - + # Update the inputs data/description - d['title'] = manifest['inputs'][i]['name'] - d['description'] = manifest['inputs'][i]['description'] - + d["title"] = manifest["inputs"][i]["name"] + d["description"] = manifest["inputs"][i]["description"] + # Increment the manifest input index i += 1 - + # Start a manifest output index i = 0 - + # Iterate over the cookiecutter template outputs and thier data - for _, d in template['_outputs'].items(): - + for _, d in template["_outputs"].items(): # Update the inputs data/description - d['title'] = manifest['outputs'][i]['name'] - d['description'] = manifest['outputs'][i]['description'] - + d["title"] = manifest["outputs"][i]["name"] + d["description"] = manifest["outputs"][i]["description"] + # Increment the manifest input index i += 1 - + # Create the plugin's cookiecutter template directory self.cookietin.joinpath(plugin).mkdir(exist_ok=True, parents=True) - + # Define path to plugin's cookiecutter template file - cookiecutter_path = self.cookietin.joinpath(plugin).joinpath('cookiecutter.json') - + cookiecutter_path = self.cookietin.joinpath(plugin).joinpath( + "cookiecutter.json", + ) + # Write the cookiecutter template file - with open(cookiecutter_path,'w') as fw: - json.dump(template, fw,indent=4) - + with open(cookiecutter_path, "w") as fw: + json.dump(template, fw, indent=4) + """This section of uses the above classes to generate cookiecutter templates""" -if __name__ == '__main__': - - import jpype +if __name__ == "__main__": import os - + + import jpype + try: - print('Starting JVM and parsing ops help\n') - + print("Starting JVM and parsing ops help\n") + # Populate ops by parsing the imagej operations help populater = Populate() - - print('Building json templates\n') - + + print("Building json templates\n") + # Get the current working directory cwd = Path(os.getcwd()) - + # Save a directory for the cookietin json files - cookietin_path = cwd.joinpath('utils/polus-imagej-util/cookietin') - + cookietin_path = cwd.joinpath("utils/polus-imagej-util/cookietin") + # Get the pipeline VERSION - version_path = Path(__file__).parents[1].joinpath('VERSION') - with open(version_path, 'r') as fhand: + version_path = Path(__file__).parents[1].joinpath("VERSION") + with open(version_path) as fhand: version = next(fhand) - - # Build the json dictionary to be passed to the cookiecutter module + + # Build the json dictionary to be passed to the cookiecutter module populater.build_json( - 'Benjamin Houghton', - 'benjamin.houghton@axleinfo.com', - 'bthoughton', - version, - cookietin_path) - + "Benjamin Houghton", + "benjamin.houghton@axleinfo.com", + "bthoughton", + version, + cookietin_path, + ) + finally: - - print('Shutting down JVM\n') - + print("Shutting down JVM\n") + # Remove the imagej instance del populater._ij - + # Shut down JVM jpype.shutdownJVM() - - print('Updating templates with previously genreated ops') - + + print("Updating templates with previously genreated ops") + # Instantiate the generated ops parser and update templates with manifests parser = GeneratedParser() - parser.update_templates('all', ['threshold-apply']) + parser.update_templates("all", ["threshold-apply"]) diff --git a/utils/polus-imagej-util/cookiecutter.json b/utils/polus-imagej-util/cookiecutter.json index f42da4191..cf66d48d5 100644 --- a/utils/polus-imagej-util/cookiecutter.json +++ b/utils/polus-imagej-util/cookiecutter.json @@ -1,81 +1,81 @@ { - "author": "Benjamin Houghton", - "email": "benjamin.houghton@axleinfo.com", - "github_username": "bthoughton", - "version": "0.2.0", - "project_name": "ImageJ deconvolve richardsonLucyCorrection", - "project_short_description": "RichardsonLucyCorrection", - "plugin_namespace": { - "RichardsonLucyCorrection": "out = ij.op().deconvolve().richardsonLucyCorrection(in1,in2,fftBuffer,fftKernel)" + "author": "Benjamin Houghton", + "email": "benjamin.houghton@axleinfo.com", + "github_username": "bthoughton", + "version": "0.2.0", + "project_name": "ImageJ deconvolve richardsonLucyCorrection", + "project_short_description": "RichardsonLucyCorrection", + "plugin_namespace": { + "RichardsonLucyCorrection": "out = ij.op().deconvolve().richardsonLucyCorrection(in1,in2,fftBuffer,fftKernel)" + }, + "_inputs": { + "opName": { + "title": "Operation", + "type": "enum", + "options": [ + "RichardsonLucyCorrection" + ], + "description": "Operation to peform", + "required": "False" }, - "_inputs": { - "opName": { - "title": "Operation", - "type": "enum", - "options": [ - "RichardsonLucyCorrection" - ], - "description": "Operation to peform", - "required": "False" - }, - "in1": { - "type": "collection", - "title": "in1", - "description": "in1", - "required": false, - "call_types": { - "RichardsonLucyCorrection": "RandomAccessibleInterval" - }, - "wipp_type": { - "RichardsonLucyCorrection": "collection" - } - }, - "in2": { - "type": "collection", - "title": "in2", - "description": "in2", - "required": false, - "call_types": { - "RichardsonLucyCorrection": "RandomAccessibleInterval" - }, - "wipp_type": { - "RichardsonLucyCorrection": "collection" - } - }, - "fftBuffer": { - "type": "collection", - "title": "fftBuffer", - "description": "fftBuffer", - "required": false, - "call_types": { - "RichardsonLucyCorrection": "RandomAccessibleInterval" - }, - "wipp_type": { - "RichardsonLucyCorrection": "collection" - } - }, - "fftKernel": { - "type": "collection", - "title": "fftKernel", - "description": "fftKernel", - "required": false, - "call_types": { - "RichardsonLucyCorrection": "RandomAccessibleInterval" - }, - "wipp_type": { - "RichardsonLucyCorrection": "collection" - } - } + "in1": { + "type": "collection", + "title": "in1", + "description": "in1", + "required": false, + "call_types": { + "RichardsonLucyCorrection": "RandomAccessibleInterval" + }, + "wipp_type": { + "RichardsonLucyCorrection": "collection" + } }, - "_outputs": { - "out": { - "type": "collection", - "title": "out", - "description": "out", - "call_types": { - "RichardsonLucyCorrection": "RandomAccessibleInterval" - } - } + "in2": { + "type": "collection", + "title": "in2", + "description": "in2", + "required": false, + "call_types": { + "RichardsonLucyCorrection": "RandomAccessibleInterval" + }, + "wipp_type": { + "RichardsonLucyCorrection": "collection" + } }, - "project_slug": "polus-{{ cookiecutter.project_name|lower|replace(' ', '-') }}-plugin" -} \ No newline at end of file + "fftBuffer": { + "type": "collection", + "title": "fftBuffer", + "description": "fftBuffer", + "required": false, + "call_types": { + "RichardsonLucyCorrection": "RandomAccessibleInterval" + }, + "wipp_type": { + "RichardsonLucyCorrection": "collection" + } + }, + "fftKernel": { + "type": "collection", + "title": "fftKernel", + "description": "fftKernel", + "required": false, + "call_types": { + "RichardsonLucyCorrection": "RandomAccessibleInterval" + }, + "wipp_type": { + "RichardsonLucyCorrection": "collection" + } + } + }, + "_outputs": { + "out": { + "type": "collection", + "title": "out", + "description": "out", + "call_types": { + "RichardsonLucyCorrection": "RandomAccessibleInterval" + } + } + }, + "project_slug": "polus-{{ cookiecutter.project_name|lower|replace(' ', '-') }}-plugin" +} diff --git a/utils/polus-imagej-util/filepattern.cfg b/utils/polus-imagej-util/filepattern.cfg index e33877483..5e5072cf1 100644 --- a/utils/polus-imagej-util/filepattern.cfg +++ b/utils/polus-imagej-util/filepattern.cfg @@ -5,4 +5,4 @@ tag = False [bumpversion:file:{{cookiecutter.project_slug}}/src/requirements.txt] search = filepattern=={current_version} -replace = filepattern=={new_version} \ No newline at end of file +replace = filepattern=={new_version} diff --git a/utils/polus-imagej-util/generate.py b/utils/polus-imagej-util/generate.py index c373124ac..25fb670df 100644 --- a/utils/polus-imagej-util/generate.py +++ b/utils/polus-imagej-util/generate.py @@ -1,79 +1,76 @@ -import os -import json -import shutil import argparse +import json import logging +import os +import shutil from pathlib import Path - -"""This file uses the classes in populate.py and cookiecutter to automatically +"""This file uses the classes in populate.py and cookiecutter to automatically parse the ImageJ ops help and create plugins""" -if __name__ == '__main__': - - +if __name__ == "__main__": # Define the logger logger = logging.getLogger(__name__) - + # Set log level logger.setLevel(logging.DEBUG) - + # Define the logger format formatter = logging.Formatter( - fmt = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%d-%b-%y %H:%M:%S' - ) - + fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) + # Define the logger file handler - file_handler = logging.FileHandler('generate.log') - + file_handler = logging.FileHandler("generate.log") + # Set the filehandler format file_handler.setFormatter(formatter) - + # Add the file handler logger.addHandler(file_handler) - + # Define the parser - parser = argparse.ArgumentParser(prog='main', description='Generate Plugins') + parser = argparse.ArgumentParser(prog="main", description="Generate Plugins") # Add command-line argument for plugin name, docker repo and version parser.add_argument( - '--plugins', - dest='plugins_to_generate', - type=str, - nargs="+", + "--plugins", + dest="plugins_to_generate", + type=str, + nargs="+", default=["a", "b"], - help='Plugins which will be generated', - required=True + help="Plugins which will be generated", + required=True, ) # Parse the arguments args = parser.parse_args() plugins_to_generate = args.plugins_to_generate - + # Add plugins to generate to logger - logger.debug('Plugins to Generate: {}'.format(plugins_to_generate)) + logger.debug(f"Plugins to Generate: {plugins_to_generate}") # Get the polus plugins directory polus_plugins_dir = Path(__file__).parents[2] # Add logger message - logger.debug('Generating plugins with cookiecutter') + logger.debug("Generating plugins with cookiecutter") # Get the generic.py file path so that it can find the cookietin directory base_path = Path(__file__).parent # Get path to cookietin dicrectory - cookietin_path = base_path.joinpath('cookietin') + cookietin_path = base_path.joinpath("cookietin") # Get path to cookiecutter.json which is passed to cookiecutter - cookiecutter_path = base_path.joinpath('cookiecutter.json') + cookiecutter_path = base_path.joinpath("cookiecutter.json") - # Get list of all plugin directories in the cookietin directory + # Get list of all plugin directories in the cookietin directory plugins = list(cookietin_path.iterdir()) # Create path to imagej testing directory - test_dir = Path(polus_plugins_dir.joinpath('imagej-testing')) + test_dir = Path(polus_plugins_dir.joinpath("imagej-testing")) # Check if the imagej testing directory already exists if test_dir.exists(): @@ -84,16 +81,16 @@ os.mkdir(test_dir) # Create path to the imagej shell script testing file - shell_test_path = test_dir.joinpath('shell_test.py') + shell_test_path = test_dir.joinpath("shell_test.py") # Create test summary file - test_summary_path = shell_test_path.with_name('test-summary.log') + test_summary_path = shell_test_path.with_name("test-summary.log") # Create testing shell script file - with open(shell_test_path, 'w') as fhand: + with open(shell_test_path, "w") as fhand: fhand.write( - 'import os, sys\nfrom pathlib import Path\n' \ - 'src_path = Path(__file__).parents[1]\nsys.path.append(str(src_path))\n' + "import os, sys\nfrom pathlib import Path\n" + "src_path = Path(__file__).parents[1]\nsys.path.append(str(src_path))\n", ) fhand.close() @@ -101,44 +98,52 @@ pluging_count = 0 op_count = 0 - # Iterate over all plugin directories in the cookietin directory + # Iterate over all plugin directories in the cookietin directory for plugin in plugins: - if plugin.name in plugins_to_generate: - # Define the plugin dir path - path = polus_plugins_dir.joinpath('polus-imagej-' + plugin.name.lower() + '-plugin') + path = polus_plugins_dir.joinpath( + "polus-imagej-" + plugin.name.lower() + "-plugin", + ) - # If the plugin path is already a directory remove it recursively + # If the plugin path is already a directory remove it recursively if path.exists(): shutil.rmtree(path) - # Move the cookiecutter.json file for the current plug in to the + # Move the cookiecutter.json file for the current plug in to the # polus-imagej-util directory overwriting last plugin json file - shutil.copy(str(plugin.joinpath('cookiecutter.json')), cookiecutter_path) + shutil.copy(str(plugin.joinpath("cookiecutter.json")), cookiecutter_path) # Run the cookiecutter utility for the plugin - os.system('cookiecutter {} --output-dir {} --no-input'.format(str(base_path), str(polus_plugins_dir))) + os.system( + "cookiecutter {} --output-dir {} --no-input".format( + str(base_path), str(polus_plugins_dir), + ), + ) # Use python black to format code - os.system('black {}'.format(path)) + os.system(f"black {path}") # Get the overloading methods from the op - with open(cookiecutter_path, 'r') as f: - op_methods = json.load(f)['_inputs']['opName']['options'] + with open(cookiecutter_path) as f: + op_methods = json.load(f)["_inputs"]["opName"]["options"] f.close() # Open the shell script in append mode - with open(shell_test_path, 'a') as fhand: - + with open(shell_test_path, "a") as fhand: # Get plugin dictionary key - plugin_key = plugin.name.replace('-', '.') + plugin_key = plugin.name.replace("-", ".") # Create a list of the operating sytem commands - commands = ["python "+str(path)+"/tests/unit_test.py --opName '{}'".format(op) for op in op_methods] + commands = [ + "python " + + str(path) + + f"/tests/unit_test.py --opName '{op}'" + for op in op_methods + ] # Generate the shell script lines - lines = ['os.system("{}")\n'.format(command) for command in commands] + lines = [f'os.system("{command}")\n' for command in commands] # Write command for each plugin to the shell script for line in lines: @@ -149,5 +154,5 @@ pluging_count += 1 - logger.debug('There were {} plugins generated\n'.format(pluging_count)) - logger.debug('There were {} plugin overloading methods created\n'.format(op_count)) \ No newline at end of file + logger.debug(f"There were {pluging_count} plugins generated\n") + logger.debug(f"There were {op_count} plugin overloading methods created\n") diff --git a/utils/polus-imagej-util/imagej_kaniko.py b/utils/polus-imagej-util/imagej_kaniko.py index 1fbba5f37..de9832179 100644 --- a/utils/polus-imagej-util/imagej_kaniko.py +++ b/utils/polus-imagej-util/imagej_kaniko.py @@ -1,12 +1,11 @@ -from kubernetes import client, config -from kubernetes.client.rest import ApiException import argparse +from kubernetes import client +from kubernetes import config + def setup_k8s_api(): - """ - Common actions to setup Kubernetes API access to Argo workflows - """ + """Common actions to setup Kubernetes API access to Argo workflows.""" config.load_incluster_config() # Only works inside of JupyterLab Pod return client.CustomObjectsApi() @@ -20,17 +19,27 @@ def setup_k8s_api(): plural = "workflows" # str | The custom resource's plural name. For TPRs this would be lowercase plural kind. -if __name__ == '__main__': - - parser = argparse.ArgumentParser(prog='main', description='Build Docker Container') +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="main", description="Build Docker Container") # Add command-line argument for plugin name, docker repo and version - parser.add_argument('--plugin_name', dest='plugin_name', type=str, - help='Plugin Name', required=True) - parser.add_argument('--docker_hub_repo', dest='docker_hub_repo', type=str, - help='Docker Repo Name', required=True) - parser.add_argument('--version', dest='version', type=str, - help='Docker Image Version', required=True) + parser.add_argument( + "--plugin_name", dest="plugin_name", type=str, help="Plugin Name", required=True, + ) + parser.add_argument( + "--docker_hub_repo", + dest="docker_hub_repo", + type=str, + help="Docker Repo Name", + required=True, + ) + parser.add_argument( + "--version", + dest="version", + type=str, + help="Docker Image Version", + required=True, + ) # Parse the arguments args = parser.parse_args() @@ -38,14 +47,14 @@ def setup_k8s_api(): docker_hub_repo = args.docker_hub_repo docker_version = args.version - generated_name = "build-polus-imagej-{}".format(plugin_name) - print('generated_name: {}'.format(generated_name)) + generated_name = f"build-polus-imagej-{plugin_name}" + print(f"generated_name: {generated_name}") - subpath = "temp/plugins/polus-imagej-{}".format(plugin_name) - print('subpath: {}'.format(subpath)) + subpath = f"temp/plugins/polus-imagej-{plugin_name}" + print(f"subpath: {subpath}") - destination = 'polusai/{}:{}'.format(docker_hub_repo, docker_version) - print('destination: {}'.format(destination)) + destination = f"polusai/{docker_hub_repo}:{docker_version}" + print(f"destination: {destination}") body = { "apiVersion": "argoproj.io/v1alpha1", @@ -58,9 +67,7 @@ def setup_k8s_api(): "name": "kaniko-secret", "secret": { "secretName": "labshare-docker", - "items": [ - {"key": ".dockerconfigjson", "path": "config.json"} - ], + "items": [{"key": ".dockerconfigjson", "path": "config.json"}], }, }, { @@ -76,7 +83,7 @@ def setup_k8s_api(): "args": [ "--dockerfile=/workspace/Dockerfile", "--context=dir:///workspace", - "--destination={}".format(destination) + f"--destination={destination}", ], "volumeMounts": [ { @@ -86,13 +93,15 @@ def setup_k8s_api(): { "name": "workdir", "mountPath": "/workspace", - "subPath": subpath + "subPath": subpath, }, ], }, - } + }, ], }, } - api_response = api_instance.create_namespaced_custom_object(group, version, namespace, plural, body) \ No newline at end of file + api_response = api_instance.create_namespaced_custom_object( + group, version, namespace, plural, body, + ) diff --git a/utils/polus-imagej-util/imagej_ui.ipynb b/utils/polus-imagej-util/imagej_ui.ipynb index 001461a65..af2ea35e4 100644 --- a/utils/polus-imagej-util/imagej_ui.ipynb +++ b/utils/polus-imagej-util/imagej_ui.ipynb @@ -1,251 +1,251 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "db4ec7ec-3910-4b64-985b-3cdd2ce73fce", - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "26db647e1db1481699aff8f916d83909", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "interactive(children=(Dropdown(description='Select Plugin:', options=('convert-bit', 'convert-cfloat32', 'conv…" + "cell_type": "code", + "execution_count": 2, + "id": "db4ec7ec-3910-4b64-985b-3cdd2ce73fce", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "26db647e1db1481699aff8f916d83909", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='Select Plugin:', options=('convert-bit', 'convert-cfloat32', 'conv\u2026" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import os\n", + "import json\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "from pathlib import Path\n", + "\n", + "\n", + "class Imagej_UI:\n", + "\n", + " \"\"\"\n", + " A class to display a UI of cookiecutter template file fields.\n", + " \"\"\"\n", + "\n", + " def __init__(self, path=None):\n", + " self.wd = path\n", + "\n", + " if path is None:\n", + " self.wd = Path(os.path.abspath(''))\n", + "\n", + " # Recursively search working directory for cookietin directory\n", + " self.cookietin = [path for path in self.wd.rglob('*') if path.name == 'cookietin']\n", + "\n", + " self.plugins = []\n", + " self.names = []\n", + " self.layout = widgets.Layout(width='auto', height='40px')\n", + " self.style = {'description_width': 'initial'}\n", + "\n", + " # Get all the cookie cutter template file paths and plugin names\n", + " for path in self.cookietin:\n", + " self.plugins += [plugin for plugin in self.wd.rglob('*') if plugin.name == 'cookiecutter.json']\n", + " self.names += [plugin.parent.name for plugin in self.plugins]\n", + " self.d = {name: path for name, path in zip(self.names, self.plugins)}\n", + "\n", + " # Sort plugin names\n", + " self.names.sort()\n", + "\n", + " # Create dropdown to select plugin template file\n", + " self.p = widgets.Dropdown(\n", + " options=self.names,\n", + " value=self.names[0],\n", + " description='Select Plugin:',\n", + " disabled=False,\n", + " )\n", + "\n", + " # Get initial data from first plugin in list\n", + " with open(self.plugins[0], 'r') as fhand:\n", + " self.data = json.load(fhand)\n", + " fhand.close()\n", + "\n", + " def get_fields(self, dic, key=''):\n", + " \"\"\"\n", + " A method to get all fields and values from cookiecutter template file.\n", + " \"\"\"\n", + " data = {}\n", + "\n", + " # Recursively search the dictionary for field values\n", + " for k in dic.keys():\n", + " if isinstance(dic[k], dict):\n", + " data.update(self.get_fields(dic[k], key+':'+k))\n", + "\n", + " elif isinstance(dic[k], str):\n", + " data[str(key+':'+k)[1:]] = str(dic[k])\n", + "\n", + " elif isinstance(dic[k], list):\n", + " data[str(key+':'+k)[1:]] = str(dic[k]).replace(\"'\", \"\").replace('\"', '')\n", + "\n", + " return data\n", + "\n", + " def show_fields(self, path):\n", + " \"\"\"\n", + " A method to create widgets using data from selected cookiecutter\n", + " template file.\n", + " \"\"\"\n", + "\n", + " with open(self.d[self.p.value], 'r') as fhand:\n", + " self.data = json.load(fhand)\n", + " fhand.close()\n", + "\n", + " data = self.get_fields(self.data)\n", + " self.w = []\n", + "\n", + " fields = [f for f in data.keys()]\n", + " for f in fields:\n", + " self.w += [widgets.Text(value=data[f], description=f, layout=self.layout, style=self.style)]\n", + "\n", + " self.save = widgets.Button(\n", + " value=False,\n", + " description='Save File',\n", + " disabled=False,\n", + " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", + " tooltip='Save to {}'.format(self.d[path]),\n", + " icon='save' # (FontAwesome names without the `fa-` prefix)\n", + " )\n", + "\n", + " self.generate = widgets.Button(\n", + " value=False,\n", + " description='Generate Plugin',\n", + " disabled=False,\n", + " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", + " tooltip='Generate plugin with {}'.format(self.d[path]),\n", + " icon='check' # (FontAwesome names without the `fa-` prefix)\n", + " )\n", + "\n", + " self.containerize = widgets.Button(\n", + " value=False,\n", + " description='Build Docker',\n", + " disabled=False,\n", + " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", + " tooltip='Build docker image and push to repo',\n", + " icon='check' # (FontAwesome names without the `fa-` prefix)\n", + " )\n", + "\n", + " self.save.on_click(self.on_save_clicked)\n", + " self.generate.on_click(self.on_generate_clicked)\n", + " self.containerize.on_click(self.on_containerize_clicked)\n", + " self.output = widgets.Output()\n", + " display(*self.w, self.save, self.generate, self.containerize, self.output)\n", + "\n", + " def update_data(self, d, key_list, new_value):\n", + " \"\"\"\n", + " A method to update template data with user's input.\n", + " \"\"\"\n", + "\n", + " k = key_list[0]\n", + "\n", + " if len(key_list) == 1:\n", + " if new_value[0] == '[' and new_value[-1] == ']':\n", + " # Remove first and last parenthesis and convert to list\n", + " d[k] = new_value[1:-1].split(',')\n", + " # Strip whitespace from values in list\n", + " d[k] = [value.strip() for value in d[k]]\n", + " else:\n", + " d[k] = new_value\n", + "\n", + " else:\n", + " d[key_list[0]] = self.update_data(d[k], key_list[1:], new_value)\n", + "\n", + " return d\n", + "\n", + " def on_save_clicked(self, b):\n", + " \"\"\"\n", + " A method to save the updated cookiecutter template file.\n", + " \"\"\"\n", + "\n", + " with self.output:\n", + " print(\"Saved to {}\".format(self.d[self.p.value]))\n", + "\n", + " for w in self.w:\n", + " k = w.description.split(':')\n", + " v = w.value\n", + " self.data = self.update_data(self.data, k, v)\n", + "\n", + " with open(self.d[self.p.value], 'w') as fhand:\n", + " json.dump(self.data, fhand, indent=4)\n", + " fhand.close()\n", + "\n", + " def on_generate_clicked(self, b):\n", + " \"\"\"\n", + " A method to genetate the plugin from the cookiecutter template file.\n", + " \"\"\"\n", + " with self.output:\n", + " print('Generating the {} plugin'.format(self.p.value))\n", + " os.system('python generate.py --plugins {}\\n'.format(self.p.value))\n", + "\n", + " def on_containerize_clicked(self, b):\n", + " \"\"\"\n", + " A method to build and push the plugin's docker container.\n", + " \"\"\"\n", + " with self.output:\n", + "\n", + " # Define the plugin directory\n", + " plugin_dir = self.wd.parents[1].joinpath('polus-imagej-{}-plugin'.format(self.p.value.lower()))\n", + "\n", + " # Define the temp destination of plugin directory\n", + " temp_dir = '~/shared/wipp/temp/plugins'\n", + "\n", + " # Copy plugin to temp/plugins directory\n", + " os.system('cp -r {} {}'.format(plugin_dir, temp_dir))\n", + "\n", + " plugin_name = self.p.value.lower() + '-plugin' \n", + " docker_hub_repo = 'imagej-{}-plugin'.format(self.p.value.lower())\n", + " version = self.data['version']\n", + "\n", + " print('Building Docker Container for {}'.format(plugin_name))\n", + "\n", + " # Run kaniko\n", + " os.system('python imagej_kaniko.py --plugin_name {} --docker_hub_repo {} --version {}' \\\n", + " .format(plugin_name, docker_hub_repo, version))\n", + "\n", + " def display_ui(self):\n", + " \"\"\"\n", + " A method to start and display the interactive dashboard.\n", + " \"\"\"\n", + "\n", + " self.ui = widgets.interactive(self.show_fields, path=self.p)\n", + " display(self.ui)\n", + "\n", + "\n", + "# Define the dashboard\n", + "ui = Imagej_UI()\n", + "# Start and display dashboard\n", + "ui.display_ui()" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "import os\n", - "import json\n", - "import ipywidgets as widgets\n", - "from IPython.display import display\n", - "from pathlib import Path\n", - "\n", - "\n", - "class Imagej_UI:\n", - "\n", - " \"\"\"\n", - " A class to display a UI of cookiecutter template file fields.\n", - " \"\"\"\n", - "\n", - " def __init__(self, path=None):\n", - " self.wd = path\n", - "\n", - " if path is None:\n", - " self.wd = Path(os.path.abspath(''))\n", - "\n", - " # Recursively search working directory for cookietin directory\n", - " self.cookietin = [path for path in self.wd.rglob('*') if path.name == 'cookietin']\n", - "\n", - " self.plugins = []\n", - " self.names = []\n", - " self.layout = widgets.Layout(width='auto', height='40px')\n", - " self.style = {'description_width': 'initial'}\n", - "\n", - " # Get all the cookie cutter template file paths and plugin names\n", - " for path in self.cookietin:\n", - " self.plugins += [plugin for plugin in self.wd.rglob('*') if plugin.name == 'cookiecutter.json']\n", - " self.names += [plugin.parent.name for plugin in self.plugins]\n", - " self.d = {name: path for name, path in zip(self.names, self.plugins)}\n", - "\n", - " # Sort plugin names\n", - " self.names.sort()\n", - "\n", - " # Create dropdown to select plugin template file\n", - " self.p = widgets.Dropdown(\n", - " options=self.names,\n", - " value=self.names[0],\n", - " description='Select Plugin:',\n", - " disabled=False,\n", - " )\n", - "\n", - " # Get initial data from first plugin in list\n", - " with open(self.plugins[0], 'r') as fhand:\n", - " self.data = json.load(fhand)\n", - " fhand.close()\n", - "\n", - " def get_fields(self, dic, key=''):\n", - " \"\"\"\n", - " A method to get all fields and values from cookiecutter template file.\n", - " \"\"\"\n", - " data = {}\n", - "\n", - " # Recursively search the dictionary for field values\n", - " for k in dic.keys():\n", - " if isinstance(dic[k], dict):\n", - " data.update(self.get_fields(dic[k], key+':'+k))\n", - "\n", - " elif isinstance(dic[k], str):\n", - " data[str(key+':'+k)[1:]] = str(dic[k])\n", - "\n", - " elif isinstance(dic[k], list):\n", - " data[str(key+':'+k)[1:]] = str(dic[k]).replace(\"'\", \"\").replace('\"', '')\n", - "\n", - " return data\n", - "\n", - " def show_fields(self, path):\n", - " \"\"\"\n", - " A method to create widgets using data from selected cookiecutter\n", - " template file.\n", - " \"\"\"\n", - "\n", - " with open(self.d[self.p.value], 'r') as fhand:\n", - " self.data = json.load(fhand)\n", - " fhand.close()\n", - "\n", - " data = self.get_fields(self.data)\n", - " self.w = []\n", - "\n", - " fields = [f for f in data.keys()]\n", - " for f in fields:\n", - " self.w += [widgets.Text(value=data[f], description=f, layout=self.layout, style=self.style)]\n", - "\n", - " self.save = widgets.Button(\n", - " value=False,\n", - " description='Save File',\n", - " disabled=False,\n", - " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", - " tooltip='Save to {}'.format(self.d[path]),\n", - " icon='save' # (FontAwesome names without the `fa-` prefix)\n", - " )\n", - "\n", - " self.generate = widgets.Button(\n", - " value=False,\n", - " description='Generate Plugin',\n", - " disabled=False,\n", - " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", - " tooltip='Generate plugin with {}'.format(self.d[path]),\n", - " icon='check' # (FontAwesome names without the `fa-` prefix)\n", - " )\n", - "\n", - " self.containerize = widgets.Button(\n", - " value=False,\n", - " description='Build Docker',\n", - " disabled=False,\n", - " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", - " tooltip='Build docker image and push to repo',\n", - " icon='check' # (FontAwesome names without the `fa-` prefix)\n", - " )\n", - "\n", - " self.save.on_click(self.on_save_clicked)\n", - " self.generate.on_click(self.on_generate_clicked)\n", - " self.containerize.on_click(self.on_containerize_clicked)\n", - " self.output = widgets.Output()\n", - " display(*self.w, self.save, self.generate, self.containerize, self.output)\n", - "\n", - " def update_data(self, d, key_list, new_value):\n", - " \"\"\"\n", - " A method to update template data with user's input.\n", - " \"\"\"\n", - "\n", - " k = key_list[0]\n", - "\n", - " if len(key_list) == 1:\n", - " if new_value[0] == '[' and new_value[-1] == ']':\n", - " # Remove first and last parenthesis and convert to list\n", - " d[k] = new_value[1:-1].split(',')\n", - " # Strip whitespace from values in list\n", - " d[k] = [value.strip() for value in d[k]]\n", - " else:\n", - " d[k] = new_value\n", - "\n", - " else:\n", - " d[key_list[0]] = self.update_data(d[k], key_list[1:], new_value)\n", - "\n", - " return d\n", - "\n", - " def on_save_clicked(self, b):\n", - " \"\"\"\n", - " A method to save the updated cookiecutter template file.\n", - " \"\"\"\n", - "\n", - " with self.output:\n", - " print(\"Saved to {}\".format(self.d[self.p.value]))\n", - "\n", - " for w in self.w:\n", - " k = w.description.split(':')\n", - " v = w.value\n", - " self.data = self.update_data(self.data, k, v)\n", - "\n", - " with open(self.d[self.p.value], 'w') as fhand:\n", - " json.dump(self.data, fhand, indent=4)\n", - " fhand.close()\n", - "\n", - " def on_generate_clicked(self, b):\n", - " \"\"\"\n", - " A method to genetate the plugin from the cookiecutter template file.\n", - " \"\"\"\n", - " with self.output:\n", - " print('Generating the {} plugin'.format(self.p.value))\n", - " os.system('python generate.py --plugins {}\\n'.format(self.p.value))\n", - "\n", - " def on_containerize_clicked(self, b):\n", - " \"\"\"\n", - " A method to build and push the plugin's docker container.\n", - " \"\"\"\n", - " with self.output:\n", - "\n", - " # Define the plugin directory\n", - " plugin_dir = self.wd.parents[1].joinpath('polus-imagej-{}-plugin'.format(self.p.value.lower()))\n", - "\n", - " # Define the temp destination of plugin directory\n", - " temp_dir = '~/shared/wipp/temp/plugins'\n", - "\n", - " # Copy plugin to temp/plugins directory\n", - " os.system('cp -r {} {}'.format(plugin_dir, temp_dir))\n", - "\n", - " plugin_name = self.p.value.lower() + '-plugin' \n", - " docker_hub_repo = 'imagej-{}-plugin'.format(self.p.value.lower())\n", - " version = self.data['version']\n", - "\n", - " print('Building Docker Container for {}'.format(plugin_name))\n", - "\n", - " # Run kaniko\n", - " os.system('python imagej_kaniko.py --plugin_name {} --docker_hub_repo {} --version {}' \\\n", - " .format(plugin_name, docker_hub_repo, version))\n", - "\n", - " def display_ui(self):\n", - " \"\"\"\n", - " A method to start and display the interactive dashboard.\n", - " \"\"\"\n", - "\n", - " self.ui = widgets.interactive(self.show_fields, path=self.p)\n", - " display(self.ui)\n", - "\n", - "\n", - "# Define the dashboard\n", - "ui = Imagej_UI()\n", - "# Start and display dashboard\n", - "ui.display_ui()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/utils/polus-imagej-util/requirements.txt b/utils/polus-imagej-util/requirements.txt index c42769074..c99d38adf 100644 --- a/utils/polus-imagej-util/requirements.txt +++ b/utils/polus-imagej-util/requirements.txt @@ -6,4 +6,4 @@ bfio[all]==2.1.9 black cookiecutter kubernetes -ipywidgets \ No newline at end of file +ipywidgets diff --git a/utils/polus-imagej-util/scale.json b/utils/polus-imagej-util/scale.json index 91a0d8877..65679ffec 100644 --- a/utils/polus-imagej-util/scale.json +++ b/utils/polus-imagej-util/scale.json @@ -1,46 +1,46 @@ { - "convert-bit": "independent", - "convert-cfloat32": "independent", - "convert-cfloat64": "independent", - "convert-float32": "independent", - "convert-float64": "independent", - "convert-int8": "independent", - "convert-int16": "independent", - "convert-int32": "independent", - "convert-int64": "independent", - "convert-uint2": "independent", - "convert-uint4": "independent", - "convert-uint8": "independent", - "convert-uint12": "independent", - "convert-uint16": "independent", - "convert-uint32": "independent", - "convert-uint64": "independent", - "convert-uint128": "independent", - "copy-img": "independent", - "threshold-apply": "independent", - "threshold-huang": "threshold", - "threshold-ij1": "threshold", - "threshold-intermodes": "threshold", - "threshold-isoData": "threshold", - "threshold-li": "threshold", - "threshold-maxEntropy": "threshold", - "threshold-maxLikelihood": "threshold", - "threshold-mean": "threshold", - "threshold-minError": "threshold", - "threshold-minimum": "threshold", - "threshold-moments": "threshold", - "threshold-otsu": "threshold", - "threshold-percintile": "threshold", - "threshold-renyiEntropy": "threshold", - "threshold-rosin": "threshold", - "threshold-shanbhag": "threshold", - "threshold-triangle": "threshold", - "threshold-yen": "threshold", - "filter-addNoise": "independent", - "filter-addPoissonNoise": "", - "filter-correlate": "fft-filter", - "filter-convolve": "fft-filter", - "filter-derivative": "fft-filter", - "filter-derivativeGauss": "fft-filter", - "filter-gauss": "fft-filter" -} \ No newline at end of file + "convert-bit": "independent", + "convert-cfloat32": "independent", + "convert-cfloat64": "independent", + "convert-float32": "independent", + "convert-float64": "independent", + "convert-int8": "independent", + "convert-int16": "independent", + "convert-int32": "independent", + "convert-int64": "independent", + "convert-uint2": "independent", + "convert-uint4": "independent", + "convert-uint8": "independent", + "convert-uint12": "independent", + "convert-uint16": "independent", + "convert-uint32": "independent", + "convert-uint64": "independent", + "convert-uint128": "independent", + "copy-img": "independent", + "threshold-apply": "independent", + "threshold-huang": "threshold", + "threshold-ij1": "threshold", + "threshold-intermodes": "threshold", + "threshold-isoData": "threshold", + "threshold-li": "threshold", + "threshold-maxEntropy": "threshold", + "threshold-maxLikelihood": "threshold", + "threshold-mean": "threshold", + "threshold-minError": "threshold", + "threshold-minimum": "threshold", + "threshold-moments": "threshold", + "threshold-otsu": "threshold", + "threshold-percintile": "threshold", + "threshold-renyiEntropy": "threshold", + "threshold-rosin": "threshold", + "threshold-shanbhag": "threshold", + "threshold-triangle": "threshold", + "threshold-yen": "threshold", + "filter-addNoise": "independent", + "filter-addPoissonNoise": "", + "filter-correlate": "fft-filter", + "filter-convolve": "fft-filter", + "filter-derivative": "fft-filter", + "filter-derivativeGauss": "fft-filter", + "filter-gauss": "fft-filter" +} diff --git a/utils/polus-imagej-util/tests/core_test.py b/utils/polus-imagej-util/tests/core_test.py index 4a46de6ec..07aaa5352 100644 --- a/utils/polus-imagej-util/tests/core_test.py +++ b/utils/polus-imagej-util/tests/core_test.py @@ -1,5 +1,5 @@ -from pathlib import Path import sys +from pathlib import Path src_dir = Path(__file__).parents[1].joinpath("{{cookiecutter.project_slug}}/src") print(src_dir) @@ -8,15 +8,17 @@ import ij_converter if __name__ == "__main__": - import imagej - import scyjava - import traceback import sys + import traceback + from pathlib import Path + + import imagej import imglyb import jpype import numpy as np - from bfio import BioReader, BioWriter - from pathlib import Path + import scyjava + from bfio import BioReader + from bfio import BioWriter # Bioformats throws a debug message, disable the loci debugger to mute it def disable_loci_logs(): @@ -29,15 +31,9 @@ def disable_loci_logs(): # This is the version of ImageJ pre-downloaded into the docker container ij = imagej.init( - "sc.fiji:fiji:2.1.1+net.imagej:imagej-legacy:0.37.4", headless=True + "sc.fiji:fiji:2.1.1+net.imagej:imagej-legacy:0.37.4", headless=True, ) - # ArrayImgs = scyjava.jimport('net.imglib2.img.array.ArrayImgs') - # UnsafeUtil = scyjava.jimport('net.imglib2.img.basictypelongaccess.unsafe.UnsafeUtil') - # Arrays = scyjava.jimport('java.util.Arrays') - # OwningFloatUnsafe = scyjava.jimport('net.imglib2.img.basictypelongaccess.unsafe.owning.OwningFloatUnsafe') - # Fraction = scyjava.jimport('net.imglib2.util.Fraction') - # LongStream = scyjava.jimport('java.util.stream.LongStream') NUMPY_TYPES = { "uint8": (np.uint8, imglyb.types.UnsignedByteType), @@ -55,14 +51,14 @@ def disable_loci_logs(): def tester(t, ij): try: - print("Testing {} data type...".format(t)) + print(f"Testing {t} data type...") shape = (2048, 2048) print("Creating Array...") array = np.random.randint(0, 255, size=shape, dtype=np.uint16) print("Converting Array...") array = NUMPY_TYPES[t][0](array) dtype0 = ij.py.dtype(array) - print("The initial data type is {}".format(dtype0)) + print(f"The initial data type is {dtype0}") temp_path = Path(__file__).with_name("data-convert-temp") print("Writing image array to file...") with BioWriter(temp_path) as writer: @@ -74,37 +70,35 @@ def tester(t, ij): arr = BioReader(temp_path) print("Getting data type after reading image...") dtype1 = ij.py.dtype(arr[:, :, 0:1, 0, 0]) - print("Data type after reading image is {}".format(dtype1)) - # print('Trying to convert to PlanarImg') - # planarimg = ij.planar(arr) + print(f"Data type after reading image is {dtype1}") if dtype0 != dtype1: - print("Manully forcing data type back to {}".format(dtype0)) + print(f"Manully forcing data type back to {dtype0}") arr = NUMPY_TYPES[t][0](arr[:, :, 0:1, 0, 0]) print("Converting to Java object...") arr = ij_converter.to_java(ij, np.squeeze(arr), "ArrayImg") print("Getting data type after manually forcing...") dtype2 = ij.py.dtype(arr) - print("Data type after manual forcing is {}".format(dtype2)) + print(f"Data type after manual forcing is {dtype2}") val_dtype = dtype2 else: arr = ij_converter.to_java( - ij, np.squeeze(arr[:, :, 0:1, 0, 0]), "ArrayImg" + ij, np.squeeze(arr[:, :, 0:1, 0, 0]), "ArrayImg", ) val_dtype = dtype1 value = 5 print( "Converting input (value) to Java primitive type {}...".format( - val_dtype - ) + val_dtype, + ), ) val = ij_converter.to_java(ij, value, t, val_dtype) print("Calling ImageJ op...") - out = ij.op().math().add(arr, val) - print("The op was SUCCESSFUL with data type {}".format(t)) + ij.op().math().add(arr, val) + print(f"The op was SUCCESSFUL with data type {t}") except: - print("Testing data type {} was NOT SUCCESSFUL".format(t)) + print(f"Testing data type {t} was NOT SUCCESSFUL") print(traceback.format_exc()) finally: diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/Dockerfile b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/Dockerfile index e53e26b1d..67e9d546c 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/Dockerfile +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/Dockerfile @@ -16,4 +16,4 @@ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir RUN mkdir /.jgo && chmod 777 /.jgo # Default command. Additional arguments are provided through the command line -ENTRYPOINT ["python3", "main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "main.py"] diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/README.md b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/README.md index 85de4e906..39d607e90 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/README.md +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/README.md @@ -4,7 +4,7 @@ This WIPP plugin was automatically generated by a utility that searches for ImageJ plugins and generates code to run them. For more information on what this -plugin does, contact one of the authors: Nick Schaub (nick.schaub@nih.gov), +plugin does, contact one of the authors: Nick Schaub (nick.schaub@nih.gov), Anjali Taneja or Benjamin Houghton (benjamin.houghton@axleinfo.com). For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/VERSION b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/VERSION index a8faae458..94fc77965 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/VERSION +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/VERSION @@ -1 +1 @@ -{{ cookiecutter.version }} \ No newline at end of file +{{ cookiecutter.version }} diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/build-docker.sh b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/build-docker.sh index 55c344d9b..580688afb 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/build-docker.sh +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: - - """ Initialize ImageJ """ + + """ Initialize ImageJ """ # Bioformats throws a debug message, disable the loci debugger to mute it def disable_loci_logs(): DebugTools = scyjava.jimport("loci.common.DebugTools") DebugTools.setRootLevel("WARN") scyjava.when_jvm_starts(disable_loci_logs) - + # This is the version of ImageJ pre-downloaded into the docker container logger.info('Starting ImageJ...') - + ij = imagej.init( "sc.fiji:fiji:2.1.1+net.imagej:imagej-legacy:0.37.4", headless=True ) - + logger.info('Loaded ImageJ version: {}'.format(ij.getVersion())) - + """ Validate and organize the inputs """ args = [] argument_types = [] @@ -188,59 +188,59 @@ def disable_loci_logs(): {{ inp }}_types = { {% for i,v in val.call_types.items() %} "{{ i }}": "{{ v }}",{% endfor %} } - - # Check that all inputs are specified + + # Check that all inputs are specified if _{{ inp }} is None and _opName in list({{ inp }}_types.keys()): raise ValueError('{} must be defined to run {}.'.format('{{ inp }}',_opName)) {%- if val.type == "collection"%} elif _{{ inp }} != None: {{ inp }}_type = {{ inp }}_types[_opName] - + # switch to images folder if present if _{{ inp }}.joinpath('images').is_dir(): _{{ inp }} = _{{ inp }}.joinpath('images').absolute() - + # Check that input path is a directory if not _{{ inp }}.is_dir(): raise FileNotFoundError('The {} collection directory does not exist'.format(_{{ inp }})) - + # Add the list of images to the arguments (images) list # There will be a single list for each collection input within args list {%- if cookiecutter.scalability == 'fft-filter' and val.title in ['in1', 'inpDir'] %} - + # Instantiate the filepatter object {{ inp }}_fp = filepattern.FilePattern(_{{inp}}, _pattern) - + # Add the list of images to the arguments (images) list # There will be a single list for each collection input within args list args.append([f[0] for f in {{ inp }}_fp() if f[0]['file'].is_file()]) if arg_len == 0: arg_len = len(args[-1]) - + {%- elif cookiecutter.scalability == 'fft-filter' and val.title in ['in2', 'kernel'] %} - + # Infer the file pattern of the collection pattern_guess = filepattern.infer_pattern(_{{inp}}.iterdir()) - + # Instantiate the filepatter object {{ inp }}_fp = filepattern.FilePattern(_{{inp}}, pattern_guess) - + {{ inp }}_path = [f[0]['file'] for f in {{ inp }}_fp() if f[0]['file'].is_file()] - + {% else %} # Infer the file pattern of the collection pattern_guess = filepattern.infer_pattern(_{{inp}}.iterdir()) - + # Instantiate the filepatter object {{ inp }}_fp = filepattern.FilePattern(_{{inp}}, pattern_guess) - + # Add the list of images to the arguments (images) list # There will be a single list for each collection input within args list args.append([f[0]['file'] for f in {{ inp }}_fp() if f[0]['file'].is_file()]) if arg_len == 0: arg_len = len(args[-1]) {% endif %} - + else: argument_types.append(None) args.append([None]) @@ -254,20 +254,20 @@ def disable_loci_logs(): ] assert _{{ inp }} in {{ inp }}_values, '{{ inp }} must be one of {}'.format({{ inp }}_values) {% endif %}{%- endfor %} - + # This ensures each input collection has the same number of images # If one collection is a single image it will be duplicated to match length # of the other input collection - only works when 1 input is a collection for i in range(len(args)): if len(args[i]) == 1: args[i] = args[i] * arg_len - + # Define the output data types for each overloading method {%- for out,val in cookiecutter._outputs.items() %} {{ out }}_types = { {% for i,v in val.call_types.items() %} "{{ i }}": "{{ v }}",{%- endfor %} }{%- endfor %} - + {%- if cookiecutter.scalability == 'independent' %} # Attempt to convert inputs to java types and run the pixel indepent op try: @@ -275,7 +275,7 @@ def disable_loci_logs(): {%- for inp,val in cookiecutter._inputs.items() -%} {%- if val.type=='collection' and inp != 'out_input' %}{{ inp }}_path,{% endif -%} {%- endfor %}) in enumerate(zip(*args)): - + {%- for inp,val in cookiecutter._inputs.items() if val.type=='collection' and inp != 'out_input' %} {%- if val.type=='collection' %} if {{ inp }}_path != None: @@ -283,7 +283,7 @@ def disable_loci_logs(): # Load the first plane of image in {{ inp }} collection logger.info('Processing image: {}'.format({{ inp }}_path)) {{ inp }}_br = BioReader({{ inp }}_path) - + # Convert to appropriate numpy array {{ inp }} = ij_converter.to_java(ij, np.squeeze({{ inp }}_br[:,:,0:1,0,0]),{{ inp }}_type) {%- if loop.first %} @@ -294,30 +294,30 @@ def disable_loci_logs(): shape = ij.py.dims({{ inp }}) {%- endif %} {%- endif %}{% endfor %} - + {%- for inp,val in cookiecutter._inputs.items() if val.type != 'collection' and inp != 'opName' and inp != 'out_input' %} if _{{ inp }} is not None: {{ inp }} = ij_converter.to_java(ij, _{{ inp }},{{ inp }}_types[_opName],dtype) {% endfor %} - + # Generate the out input variable if required {%- for inp,val in cookiecutter._inputs.items() if inp == 'out_input' %} {{ inp }} = ij_converter.to_java(ij, np.zeros(shape=shape, dtype=dtype), 'IterableInterval') {% endfor %} - + logger.info('Running op...') {% for i,v in cookiecutter.plugin_namespace.items() %} {%- if loop.first %}if{% else %}elif{% endif %} _opName == "{{ i }}": {{ v }} {% endfor %} logger.info('Completed op!') - + {%- for inp,val in cookiecutter._inputs.items() if inp != 'out_input' %} {%- if val.type=='collection' %} if {{ inp }}_path != None: {{ inp }}_br.close() {%- endif %}{% endfor %} - + {% for out,val in cookiecutter._outputs.items() -%} # Saving output file to {{ out }} @@ -329,70 +329,70 @@ def disable_loci_logs(): bw[:] = {{ out }}_array.astype(bw.dtype) bw.close() {%- endfor %} - + except: logger.error('There was an error, shutting down jvm before raising...') raise - + finally: # Exit the program logger.info('Shutting down jvm...') del ij jpype.shutdownJVM() logger.info('Complete!') - + {% elif cookiecutter.scalability == 'threshold' %} - + try: - + logger.info('Computing threshold value...') - + # Create a tile count tile_count = 0 - + for {%- for inp,val in cookiecutter._inputs.items() -%} {%- if val.type=='collection' and inp != 'out_input' %} {{ inp }}_path, in zip(*args): - + # Check if any tiles have been processed if tile_count == 0: - + # Create the initial histogram histogram = create_histogram({{ inp }}_path, ij) - + else: - + # Convert the image to an iterable interval iterable_interval, fname, metadata = create_iterable({{ inp }}_path, ij) - + # Add the image tile to the histogram histogram.addData(iterable_interval) - + tile_count += 1 {% endif -%}{%- endfor %} - + # Calculate the threshold value {{ cookiecutter.compute_threshold }} - + # Check if array was returned if isinstance(threshold, jpype.JClass('java.util.ArrayList')): - + # Get the threshold value, disregard the errMsg output threshold = threshold[0] - + logger.info('The threshold value is {}'.format(threshold)) - + for {%- for inp,val in cookiecutter._inputs.items() -%} {%- if val.type=='collection' and inp != 'out_input' %} {{ inp }}_path, in zip(*args): - + # Load the first plane of image in {{ inp }} collection logger.info('Processing image: {}'.format({{ inp }}_path)) - + # Convert the image to an iterable interval iterable_interval, fname, metadata = create_iterable({{ inp }}_path, ij) - + # Apply the threshold out = ij.op().threshold().apply(iterable_interval, threshold) - + # Write image to file logger.info('Saving image {}'.format(fname)) out_array = ij_converter.from_java(ij, out, 'Iterable') @@ -400,70 +400,70 @@ def disable_loci_logs(): bw.Z = 1 bw.dtype = out_array.dtype bw[:] = out_array.astype(bw.dtype) - bw.close() + bw.close() {% endif -%}{%- endfor %} - + except: logger.error('There was an error, shutting down jvm before raising...') raise - + finally: # Exit the program logger.info('Shutting down jvm...') del ij jpype.shutdownJVM() logger.info('JVM shutdown complete') - + try: - + logger.info('Computing threshold value...') - + # Create a tile count tile_count = 0 - + for {%- for inp,val in cookiecutter._inputs.items() -%} {%- if val.type=='collection' and inp != 'out_input' %} {{ inp }}_path, in zip(*args): - + # Check if any tiles have been processed if tile_count == 0: - + # Create the initial histogram histogram = create_histogram({{ inp }}_path, ij) - + else: - + # Convert the image to an iterable interval iterable_interval, fname, metadata = create_iterable({{ inp }}_path, ij) - + # Add the image tile to the histogram histogram.addData(iterable_interval) - + tile_count += 1 {% endif -%}{%- endfor %} - + # Calculate the threshold value {{ cookiecutter.compute_threshold }} - + # Check if array was returned if isinstance(threshold, jpype.JClass('java.util.ArrayList')): - + # Get the threshold value, disregard the errMsg output threshold = threshold[0] - + logger.info('The threshold value is {}'.format(threshold)) - + for {%- for inp,val in cookiecutter._inputs.items() -%} {%- if val.type=='collection' and inp != 'out_input' %} {{ inp }}_path, in zip(*args): - + # Load the first plane of image in {{ inp }} collection logger.info('Processing image: {}'.format({{ inp }}_path)) - + # Convert the image to an iterable interval iterable_interval, fname, metadata = create_iterable({{ inp }}_path, ij) - + # Apply the threshold out = ij.op().threshold().apply(iterable_interval, threshold) - + # Write image to file logger.info('Saving image {}'.format(fname)) out_array = ij_converter.from_java(ij, out, 'Iterable') @@ -471,129 +471,129 @@ def disable_loci_logs(): bw.Z = 1 bw.dtype = out_array.dtype bw[:] = out_array.astype(bw.dtype) - bw.close() + bw.close() {% endif -%}{%- endfor %} - + except: logger.error('There was an error, shutting down jvm before raising...') raise - + finally: # Exit the program logger.info('Shutting down jvm...') del ij jpype.shutdownJVM() logger.info('JVM shutdown complete') - + {% elif cookiecutter.scalability == 'fft-filter' %} - + # Attempt to convert inputs to java types and run the filter op try: - + {%- for inp,val in cookiecutter._inputs.items() if val.title in ['in2', 'kernel'] and val.type=='collection' %} # Load the kernel image logger.info('Loading image: {}'.format({{ inp }}_path[0])) {{ inp }}_br = BioReader({{ inp }}_path[0]) - + # Convert to appropriate numpy array {{ inp }} = ij_converter.to_java(ij, np.squeeze({{ inp }}_br[:,:,0:1,0,0]),{{ inp }}_type) {{ inp }}_br.close() - + kernel_shape = ij.py.dims({{ inp }}) - + # Check if padding argument was passed if _padding is None: # Set padding based upon kernel dimensions _padding = kernel_shape[0] - + {% endfor %} - + # Check if padding argument is defined if _padding is None: - + # Set a large arbitrary padding size _padding = 30 - + for ind, ( {%- for inp,val in cookiecutter._inputs.items() if val.type=='collection' and inp not in ['out_input', 'in2', 'kernel'] -%} {{ inp }}_path, {%- endfor %}) in enumerate(zip(*args)): - + {%- for inp,val in cookiecutter._inputs.items() if val.type=='collection' and inp not in ['out_input', 'in2', 'kernel'] %} if {{ inp }}_path != None: - + {%- if loop.first %} # Load the first plane of image in {{ inp }} collection logger.info('Processing image: {}'.format({{ inp }}_path)) - + # Define x and y spatial position of current tile x = {{ inp }}_path['x'] y = {{ inp }}_path['y'] - + # Save input collection file name and data type fname = {{ inp }}_path['file'].name - + # Pad the tile padded_img, orginal_shape, metadata = pad_image( fp={{ inp }}_fp, y=y, x=x, padding_size=_padding ) - + # Convert to appropriate numpy array {{ inp }} = ij_converter.to_java(ij, padded_img,{{ inp }}_type) - + # Save the shape and data type for out input array shape = ij.py.dims({{ inp }}) dtype = ij.py.dtype({{ inp }}) {%- endif %} {% endfor %} - + {%- for inp,val in cookiecutter._inputs.items() if val.type != 'collection' and inp != 'opName' and inp != 'out_input' %} if _{{ inp }} is not None: {{ inp }} = ij_converter.to_java(ij, _{{ inp }},{{ inp }}_types[_opName],dtype) {% endfor %} - + # Generate the out input variable if required {%- for inp,val in cookiecutter._inputs.items() if inp == 'out_input' %} {{ inp }} = ij_converter.to_java(ij, np.zeros(shape=shape, dtype=dtype), 'IterableInterval') {% endfor %} - + logger.info('Running op...') {% for i,v in cookiecutter.plugin_namespace.items() %} {%- if loop.first %}if{% else %}elif{% endif %} _opName == "{{ i }}": {{ v }} {% endfor %} logger.info('Completed op!') - + {% for out,val in cookiecutter._outputs.items() -%} # Saving output file to {{ out }} logger.info('Saving...') {{ out }}_array = ij_converter.from_java(ij, {{ out }},{{ out }}_types[_opName]) - - + + # Define padding indices to trim i1 = _padding i2 = orginal_shape[0] + _padding - + {{ out }}_array = {{ out }}_array[i1:i2, i1:i2] - + bw = BioWriter(_{{ out }}.joinpath(fname),metadata=metadata) bw.Z = 1 bw.dtype = {{ out }}_array.dtype bw[:] = {{ out }}_array.astype(bw.dtype) bw.close() {%- endfor %} - + except: logger.error('There was an error, shutting down jvm before raising...') raise - + finally: # Exit the program logger.info('Shutting down jvm...') del ij jpype.shutdownJVM() logger.info('Complete!') - + {% else %} # If plugin scale type is not defined logger.info('Plugin scale type not developed, shutting down jvm without running op...') @@ -606,7 +606,7 @@ def disable_loci_logs(): # Setup Command Line Arguments logger.info("Parsing arguments...") parser = argparse.ArgumentParser(prog='main', description='{{ cookiecutter.project_short_description }}') - + # Add command-line argument for each of the input arguments {% for inp,val in cookiecutter._inputs.items() if inp != 'out_input' -%} parser.add_argument('--{{ val.title }}', dest='{{ inp }}', type=str, @@ -625,7 +625,7 @@ def disable_loci_logs(): {% endif %} """ Parse the arguments """ args = parser.parse_args() - + # Input Args {%- for inp,val in cookiecutter._inputs.items() if inp != 'out_input' %} {% if val.type=="boolean" -%} @@ -640,7 +640,7 @@ def disable_loci_logs(): {%- if cookiecutter.scalability == 'fft-filter' %} _pattern = args.pattern logger.info('pattern = {}'.format(_pattern)) - + _padding = args.padding logger.info('padding = {}'.format(_padding)) {% endif %} @@ -649,7 +649,7 @@ def disable_loci_logs(): _{{ out }} = Path(args.{{ out }}) logger.info('{{ val.title }} = {}'.format(_{{ out }})) {%- endfor %} - + main( {%- filter indent(5) %} {%- for inp,val in cookiecutter._inputs.items() if inp != 'out_input' -%} diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/data_test.py b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/data_test.py index 374cf40ab..f2735da9f 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/data_test.py +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/data_test.py @@ -10,7 +10,7 @@ from bfio.bfio import BioWriter """ -This file is autogenerated from an ImageJ plugin generation pipeline. +This file is autogenerated from an ImageJ plugin generation pipeline. It is not intended to be run directly. Run imagej-testing/shell_test.py to begin testing. """ @@ -36,7 +36,7 @@ def setUpClass(cls) -> None: file_handler = logging.FileHandler('imagej-testing/data-test.log') file_handler.setFormatter(formatter) cls.logger.addHandler(file_handler) - + # Set up new log for summary of passed and failed tests cls.summary = logging.getLogger('summary') cls.summary.setLevel(logging.INFO) @@ -44,9 +44,9 @@ def setUpClass(cls) -> None: file_handler = logging.FileHandler('imagej-testing/test-summary.log') file_handler.setFormatter(formatter) cls.summary.addHandler(file_handler) - + def generate_data(self, input, wipp_type, imagej_type): - + numpy_types = { 'double' : np.float64, 'float' : np.float32, @@ -54,27 +54,27 @@ def generate_data(self, input, wipp_type, imagej_type): 'short' : np.int16, 'byte' : np.int8, } - + if wipp_type == None: return None - + # Determine if the input data type is a collection elif wipp_type == 'collection': - + # Create input dir path object for the randomly generated images input_dir = Path(__file__).parent.joinpath(input) - + # Check if "input" directory is a sub-directory of "tests" if input_dir.exists(): - + # Remove the "input" sub-directory shutil.rmtree(input_dir) - + # Create input and output sub-directories in tests os.mkdir(input_dir) - + """Using auto generated images""" - + # Create a random image to be used for plugin testing image_size = 2048 image_shape = (image_size, image_size) @@ -84,23 +84,23 @@ def generate_data(self, input, wipp_type, imagej_type): size = image_shape, dtype = np.uint16 ) - + # Iterate over all numpy data types for data_name, data_type in numpy_types.items(): - + array = data_type(random_image) - + # Define the image's file path image_path = input_dir.joinpath('random_{}.ome.tif'.format(data_name)) - + # Create a BioWriter object to write the ramdomly generated image file to tests/input dir with BioWriter(image_path) as writer: writer.X = image_shape[0] writer.Y = image_shape[1] writer.dtype = data_type writer[:] = array[:] - - + + """Using sample images""" # # TODO: use Imagej sample data for unit testing # # Get input source directory @@ -109,59 +109,59 @@ def generate_data(self, input, wipp_type, imagej_type): # # Create input directory in plugin test directory path # input_path = Path(__file__).with_name(input) - + # # Check if the input path already exists as a a sub-directory of "tests" # if input_path.exists(): - + # # Remove the "input" sub-directory # shutil.rmtree(input_path) - + # # Copy sample images to input folder # shutil.copytree(sample_dir, input_path) - - + + return input_dir - + elif wipp_type == 'array': # arr = np.random.rand(2048,2048) arr = '1,2' return arr - + elif wipp_type == 'number': number = np.random.randint(5) return number - + else: self.logger.info( 'FAILURE: The data type, {}, of input, {}, is currently not supported\n'.format(wipp_type, input) ) raise TypeError('The input data type is not currently supported') - + def output_handler(self, output, dtype): if dtype == 'collection': - + # Create output path object for the plugin output output_path = Path(__file__).with_name(output) - + # Check if output is a sub-directory of "tests" directory if output_path.exists(): - + # Delete the "output" sub-directory shutil.rmtree(output_path) - + # Create output as sub-directory of tests os.mkdir(output_path) - + return output_path - - + + def test_plugin(self): - + projectName = '{{ cookiecutter.project_name }}' self.logger.info('Testing the op: {} with overloading option: {}'.format(projectName, op)) - + method_call_types = {} - + supported_data_types = [ 'double', 'float', @@ -172,7 +172,7 @@ def test_plugin(self): 'byte', 'boolean', ] - + # Get WIPP and ImageJ data types {% for inp,val in cookiecutter._inputs.items() -%} {% if inp == 'opName' -%} @@ -185,24 +185,24 @@ def test_plugin(self): if dtype in supported_data_types}) {% endif -%} {% endfor -%} - + # Generate data for the inputs {% for inp,val in cookiecutter._inputs.items() -%} {% if inp != 'opName' -%} _{{ inp }} = self.generate_data( '{{ inp }}', - _{{ inp }}_wipp_types.get(op, None), + _{{ inp }}_wipp_types.get(op, None), method_call_types.get(op, None) ) {% endif -%} {% endfor -%} - + # Handle the op output {% for out,val in cookiecutter._outputs.items() -%} _{{ out }} = self.output_handler('{{ out }}', '{{ val.type }}') {% endfor -%} - - + + try: # Call the op main( @@ -221,22 +221,22 @@ def test_plugin(self): ) self.logger.info(traceback.format_exc()+'\n') self.summary.info('0') - + if __name__ == '__main__': - + # Instantiate a parser for command line arguments parser = argparse.ArgumentParser(prog='unit_test', description='Test imagej plugin') - + # Add command-line argument for each of the input arguments parser.add_argument('--opName', dest='opName', type=str, help='Operation to test', required=True) - + """ Parse the arguments """ args = parser.parse_args() - + # Input Args op = args.opName - + del sys.argv[1:] - unittest.main() \ No newline at end of file + unittest.main() diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/scale_test.py b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/scale_test.py index 9c536fb15..88f07a279 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/scale_test.py +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/scale_test.py @@ -6,74 +6,70 @@ from bfio.bfio import BioReader, BioWriter """ -This file is autogenerated from an ImageJ plugin generation pipeline. It is +This file is autogenerated from an ImageJ plugin generation pipeline. It is intended to test the accuracy of scaled ImageJ plugins. """ # Get plugin and test directories plugin_dir = Path(__file__).parents[1] -test_dir = plugin_dir.joinpath('tests') +test_dir = plugin_dir.joinpath("tests") # Get src directory -src_path = plugin_dir.joinpath('src') +src_path = plugin_dir.joinpath("src") # Get main src file path -main_path = src_path.joinpath('main.py') +main_path = src_path.joinpath("main.py") # Define file path to save test results -results_path = Path(__file__).with_name('scale_results.json') +results_path = Path(__file__).with_name("scale_results.json") -# Instantiate list to store the results of each test case +# Instantiate list to store the results of each test case all_results = [] def rmse(full, tiled): MEAN = full.mean() - RMSE = np.sqrt(((full-tiled)**2).mean()) + RMSE = np.sqrt(((full - tiled) ** 2).mean()) return RMSE, MEAN def create_base_name(pattern): - # Find the format locations of the file pattern flist = [k[1] for k in string.Formatter().parse(pattern) if k[1] is not None] - + # Instantiate the name name = pattern - + # Replace all format keys with '0' for k in flist: - name = name.replace('{'+ k + '}', '0'*len(k)) - + name = name.replace("{" + k + "}", "0" * len(k)) + return name def combine_images(img_dir, pattern): - # Define a filepattern object for the image directory fp = filepattern.FilePattern(img_dir, pattern) - + # Set row and column variables r = 0 c = 0 - + # Instantiate dictionary to store images that belong in a row together row_images = {} # Iterate over each row in the image - for row in fp(group_by='x'): - + for row in fp(group_by="x"): # Iterate over each image in the row for f in row: - # Get the current image's file name - fname = f['file'] - print('Reading {} - position r:{} c:{}'.format(fname, r, c)) - + fname = f["file"] + print("Reading {} - position r:{} c:{}".format(fname, r, c)) + # Read the image - br = BioReader(f['file'], backend='python') + br = BioReader(f["file"], backend="python") img = br[:] - + # Begin the row if first image or add to row if c == 0: row_image = img @@ -82,42 +78,39 @@ def combine_images(img_dir, pattern): # Close the reader br.close() - + # Increment the column index c += 1 - + # Reset the column index after finishing the row c = 0 - - print('The shape of the row is {}'.format(row_image.shape)) - + + print("The shape of the row is {}".format(row_image.shape)) + # Add the row to the dictionary of rows row_images[r] = row_image - + # Increement the row index r += 1 - + # Iterate over the row images and combine into single image for row, row_img in row_images.items(): - if row == 0: - print('Adding row {} to the final image'.format(row)) + print("Adding row {} to the final image".format(row)) final_image = row_img - + else: - print('Adding row {} to the final image'.format(row)) + print("Adding row {} to the final image".format(row)) final_image = np.vstack((final_image, row_img)) - final_shape = final_image.shape - print('Shape of the final image is {}'.format(final_image.shape)) - print('Final image data type: {}'.format(final_image.dtype)) - + print("Shape of the final image is {}".format(final_image.shape)) + print("Final image data type: {}".format(final_image.dtype)) + return final_image def write_image(img, out_dir): - # Save the kernel with BioWriter(out_dir) as bw: # Update the save dimensions @@ -138,157 +131,146 @@ def run_plugin(args, in_dir=None, out_dir=None): """ Runs the plugin for given input arguemnts """ - + # Check for user defined input/output directories if in_dir is not None: - args['inpDir'] = in_dir - + args["inpDir"] = in_dir + if out_dir is not None: - args['outDir'] = out_dir - + args["outDir"] = out_dir + # Define the python os command - command = 'python ' + str(main_path) - + command = "python " + str(main_path) + # Add the arguments to the command for arg, value in args.items(): if value is not None: - command = command + ' --{} {}'.format(arg, value) - + command = command + " --{} {}".format(arg, value) + # Run the plugin os.system(command) - + def pytest_generate_tests(metafunc): - # Create lists store test id's arguments and argument names idlist = [] - argnames = ['base', 'test'] + argnames = ["base", "test"] argvalues = [] - + global base_dirs base_dirs = [] - + # Get the list of test scenarios scenarios = metafunc.cls.scenarios - + # Iterate over each test scenario for s in scenarios: - # Get the base case key for that scenario for key in s.keys(): - if key[0] in ['b', 'B']: + if key[0] in ["b", "B"]: base_key = key - + # Get the base case arguments dictionary base = s[base_key] - + # Define temp directories for each base case - base_dir = Path(tempfile.mkdtemp(base['outDir'])) - base_input_dir = base_dir.joinpath('input') - base_output_dir = base_dir.joinpath('output') - + base_dir = Path(tempfile.mkdtemp(base["outDir"])) + base_input_dir = base_dir.joinpath("input") + base_output_dir = base_dir.joinpath("output") + # Create the input and output sub-directories base_input_dir.mkdir() base_output_dir.mkdir() - + # Define the base case file name - base_name = Path(create_base_name(base['pattern'])) - + base_name = Path(create_base_name(base["pattern"])) + # Add the base directory to list, so it can be cleaned up later base_dirs.append(base_dir) - + # Combine the base image tiles - base_img = combine_images(img_dir = base['inpDir'], pattern=base['pattern']) - + base_img = combine_images(img_dir=base["inpDir"], pattern=base["pattern"]) + # Write the base input image to temp file location write_image(img=base_img, out_dir=base_input_dir.joinpath(base_name)) - + # Run the plugin for the base case - run_plugin( - base, - in_dir=base_input_dir, - out_dir=base_output_dir - ) - + run_plugin(base, in_dir=base_input_dir, out_dir=base_output_dir) + # Organize the the arguments for each test in the scenario tests = [s[test_id] for test_id in s.keys() if test_id != base_key] scenario_ids = [test_id for test_id in s.keys() if test_id != base_key] - + # Iterate over each test for the scenario for test_id, test in zip(scenario_ids, tests): - # Add the test arguments and test id idlist.append(test_id) argvalues.append([base_output_dir.joinpath(base_name), test]) - - print('\n') - print('argnames:', argnames) - print('argvalues:', argvalues) - print('idlist:', idlist) - print('\n') - + + print("\n") + print("argnames:", argnames) + print("argvalues:", argvalues) + print("idlist:", idlist) + print("\n") + metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class") class TestScaleAccuracy: - # Get the input configuration file path - config_file_path = Path(__file__).with_name('scale_test_config.json') + config_file_path = Path(__file__).with_name("scale_test_config.json") # Load the config file to get test inputs with open(config_file_path) as fhand: scenarios = json.load(fhand) - + def test_accuracy(self, base, test, tmp_path, caplog): - # Run the plugin for the test case run_plugin(args=test, out_dir=str(tmp_path)) - + # Make a copy of the test arguments case_results = test.copy() - + # Combine the tiled output from the plugin - tiled = combine_images(img_dir=tmp_path, pattern=test['pattern']) - + tiled = combine_images(img_dir=tmp_path, pattern=test["pattern"]) + # Read the base case image with BioReader(base) as br: full = br[:] br.close() - + # Calcualte the RMSE and get the mean of the full image RMSE, MEAN = rmse(full=full, tiled=tiled) - + # Add the rmse, percent-rmse and mean to the results - case_results['mean'] = MEAN - case_results['rmse'] = RMSE - case_results['prmse'] = RMSE/MEAN - + case_results["mean"] = MEAN + case_results["rmse"] = RMSE + case_results["prmse"] = RMSE / MEAN + # Add the results to all results all_results.append(case_results) - - print('RMSE:', RMSE) - print('mean', MEAN) - + + print("RMSE:", RMSE) + print("mean", MEAN) + # Clean up routine to run at end of testing session -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def cleanup(request): - # Write the final results def write_results(): print(all_results) results_json = json.dumps(all_results, indent=4) - with open(results_path, 'w') as fhand: + with open(results_path, "w") as fhand: fhand.write(results_json) fhand.close() - + # Delete the base-case temp directories def remove_files(): for base_dir in base_dirs: print(os.listdir(base_dir)) - print('cleaning up {}'.format(base_dir)) - shutil.rmtree(base_dir) - + print("cleaning up {}".format(base_dir)) + shutil.rmtree(base_dir) + request.addfinalizer(write_results) request.addfinalizer(remove_files) - \ No newline at end of file diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/unit_test.py b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/unit_test.py index 3890acef2..a0af9794b 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/unit_test.py +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/unit_test.py @@ -10,7 +10,7 @@ from bfio.bfio import BioWriter """ -This file is autogenerated from an ImageJ plugin generation pipeline. +This file is autogenerated from an ImageJ plugin generation pipeline. It is not intended to be run directly. Run imagej-testing/shell_test.py to begin testing. """ @@ -36,7 +36,7 @@ def setUpClass(cls) -> None: file_handler = logging.FileHandler('imagej-testing/unit-test.log') file_handler.setFormatter(formatter) cls.logger.addHandler(file_handler) - + # Set up new log for summary of passed and failed tests cls.summary = logging.getLogger('summary') cls.summary.setLevel(logging.INFO) @@ -44,9 +44,9 @@ def setUpClass(cls) -> None: file_handler = logging.FileHandler('imagej-testing/test-summary.log') file_handler.setFormatter(formatter) cls.summary.addHandler(file_handler) - + def generate_data(self, input, wipp_type, imagej_type): - + numpy_types = { 'double' : np.float64, 'float' : np.float32, @@ -57,37 +57,37 @@ def generate_data(self, input, wipp_type, imagej_type): 'byte' : np.int8, 'boolean' : np.bool_ # np.bool_ not supported by bfio } - + if wipp_type == None: return None - + # Determine if the input data type is a collection elif wipp_type == 'collection': - + if imagej_type == None: dtype = np.double - + elif imagej_type in numpy_types.keys(): dtype = numpy_types[imagej_type] - + else: dtype = np.double - + # Create input and output path objects for the randomly generated image file input_path = Path(__file__).parent.joinpath('{}/random.ome.tif'.format(input)) #self.outputPath = Path(__file__).parent.joinpath('output/random.ome.tif') - + # Check if "input" is a sub-directory of "tests" if input_path.parent.exists(): - + # Remove the "input" sub-directory shutil.rmtree(input_path.parent) - + # Create input and output sub-directories in tests os.mkdir(input_path.parent) - + """Using auto generated images""" - + # Create a random image to be used for plugin testing infile = None outfile = None @@ -101,7 +101,7 @@ def generate_data(self, input, wipp_type, imagej_type): ) array = dtype(random_image) - + # Create a BioWriter object to write the ramdomly generated image file to tests/input dir with BioWriter(input_path) as writer: writer.X = image_shape[0] @@ -109,8 +109,8 @@ def generate_data(self, input, wipp_type, imagej_type): writer.dtype = array.dtype writer[:] = array[:] # Not neccessary: writer.close() - - + + """Using sample images""" # # TODO: use Imagej sample data for unit testing # # Get input source directory @@ -119,59 +119,59 @@ def generate_data(self, input, wipp_type, imagej_type): # # Create input directory in plugin test directory path # input_path = Path(__file__).with_name(input) - + # # Check if the input path already exists as a a sub-directory of "tests" # if input_path.exists(): - + # # Remove the "input" sub-directory # shutil.rmtree(input_path) - + # # Copy sample images to input folder # shutil.copytree(sample_dir, input_path) - - + + return input_path.parent - + elif wipp_type == 'array': # arr = np.random.rand(2048,2048) arr = '1,2' return arr - + elif wipp_type == 'number': number = np.random.randint(5) return number - + else: self.logger.info( 'FAILURE: The data type, {}, of input, {}, is currently not supported\n'.format(wipp_type, input) ) raise TypeError('The input data type is not currently supported') - + def output_handler(self, output, dtype): if dtype == 'collection': - + # Create output path object for the plugin output output_path = Path(__file__).with_name(output) - + # Check if output is a sub-directory of "tests" directory if output_path.exists(): - + # Delete the "output" sub-directory shutil.rmtree(output_path) - + # Create output as sub-directory of tests os.mkdir(output_path) - + return output_path - - + + def test_plugin(self): - + projectName = '{{ cookiecutter.project_name }}' self.logger.info('Testing the op: {} with overloading option: {}'.format(projectName, op)) - + method_call_types = {} - + supported_data_types = [ 'double', 'float', @@ -182,7 +182,7 @@ def test_plugin(self): 'byte', 'boolean', ] - + # Get WIPP and ImageJ data types {% for inp,val in cookiecutter._inputs.items() if inp != 'out_input' -%} {% if inp == 'opName' -%} @@ -195,24 +195,24 @@ def test_plugin(self): if dtype in supported_data_types}) {% endif -%} {% endfor -%} - + # Generate data for the inputs {% for inp,val in cookiecutter._inputs.items() if inp != 'out_input' -%} {% if inp != 'opName' -%} _{{ inp }} = self.generate_data( '{{ inp }}', - _{{ inp }}_wipp_types.get(op, None), + _{{ inp }}_wipp_types.get(op, None), method_call_types.get(op, None) ) {% endif -%} {% endfor -%} - + # Handle the op output {% for out,val in cookiecutter._outputs.items() -%} _{{ out }} = self.output_handler('{{ out }}', '{{ val.type }}') {% endfor -%} - - + + try: # Call the op main( @@ -231,22 +231,22 @@ def test_plugin(self): ) self.logger.info(traceback.format_exc()+'\n') self.summary.info('0') - + if __name__ == '__main__': - + # Instantiate a parser for command line arguments parser = argparse.ArgumentParser(prog='unit_test', description='Test imagej plugin') - + # Add command-line argument for each of the input arguments parser.add_argument('--opName', dest='opName', type=str, help='Operation to test', required=True) - + """ Parse the arguments """ args = parser.parse_args() - + # Input Args op = args.opName - + del sys.argv[1:] - unittest.main() \ No newline at end of file + unittest.main() diff --git a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/version_test.py b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/version_test.py index 21a365edf..c8f66e7dd 100644 --- a/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/version_test.py +++ b/utils/polus-imagej-util/{{cookiecutter.project_slug}}/tests/version_test.py @@ -3,12 +3,10 @@ class VersionTest(unittest.TestCase): - version_path = Path(__file__).parent.parent.joinpath("VERSION") json_path = Path(__file__).parent.parent.joinpath("plugin.json") def test_plugin_manifest(self): - # Get the plugin version with open(self.version_path, "r") as file: version = file.readline() @@ -22,5 +20,4 @@ def test_plugin_manifest(self): if __name__ == "__main__": - unittest.main() diff --git a/utils/polus-notebook-plugin/Dockerfile b/utils/polus-notebook-plugin/Dockerfile index 442754660..8d00af00c 100644 --- a/utils/polus-notebook-plugin/Dockerfile +++ b/utils/polus-notebook-plugin/Dockerfile @@ -19,4 +19,4 @@ WORKDIR ${EXEC_DIR} USER root # Default command. Additional arguments are provided through the command line -ENTRYPOINT ["python", "/opt/executables/execute_notebook.py"] \ No newline at end of file +ENTRYPOINT ["python", "/opt/executables/execute_notebook.py"] diff --git a/utils/polus-notebook-plugin/README.md b/utils/polus-notebook-plugin/README.md index 4a16dae6b..7f3f33ae1 100644 --- a/utils/polus-notebook-plugin/README.md +++ b/utils/polus-notebook-plugin/README.md @@ -39,7 +39,7 @@ Create a local folder to emulate WIPP data folder with the name `:/data/inputs labshare/polus-notebook-plugin:0.4.0 \ --input /data/inputs/input \ diff --git a/utils/polus-notebook-plugin/VERSION b/utils/polus-notebook-plugin/VERSION index 60a2d3e96..1d0ba9ea1 100644 --- a/utils/polus-notebook-plugin/VERSION +++ b/utils/polus-notebook-plugin/VERSION @@ -1 +1 @@ -0.4.0 \ No newline at end of file +0.4.0 diff --git a/utils/polus-notebook-plugin/plugin_csv_to_csv.json b/utils/polus-notebook-plugin/plugin_csv_to_csv.json index 24804b8f4..769095345 100644 --- a/utils/polus-notebook-plugin/plugin_csv_to_csv.json +++ b/utils/polus-notebook-plugin/plugin_csv_to_csv.json @@ -1,54 +1,54 @@ { - "name": "Notebook executor (CSV->CSV)", - "version": "0.4.0", - "title": "Jupyter Notebook executor", - "description": "Execute Jupyter notebook which take input of csv collection and outputs to csv collection.", - "author": "Konstantin taletskiy (konstantin.taletskiy@labshare.org),Gauhar Bains (gauhar.bains@labshare.org)", - "containerId": "labshare/polus-notebook-plugin:0.4.0", - "inputs": [ - { - "name": "input-collection", - "type": "csvCollection", - "description": "Input csv collection for the plugin." - }, - { - "name": "config-file", - "type": "collection", - "description": "Config file for the notebook" - }, - { - "name": "input-notebook", - "type": "notebook", - "description": "Jupyter Notebook to execute" - } - ], - "outputs": [ - { - "name": "output-collection", - "type": "csvCollection", - "description": "Output csv collection for the plugin" - }, - { - "name": "output-notebook", - "type": "notebook", - "description": "Jupyter Notebook after execution" - } - ], - "ui": [ - { - "key": "inputs.input-collection", - "title": "CSV Collection: ", - "description": "Pick a collection..." - }, - { - "key": "inputs.config-file", - "title": "Config File (Optional): ", - "description": "Pick config file..." - }, - { - "key": "inputs.input-notebook", - "title": "Notebook: ", - "description": "Pick a notebook to execute..." - } - ] -} \ No newline at end of file + "name": "Notebook executor (CSV->CSV)", + "version": "0.4.0", + "title": "Jupyter Notebook executor", + "description": "Execute Jupyter notebook which take input of csv collection and outputs to csv collection.", + "author": "Konstantin taletskiy (konstantin.taletskiy@labshare.org),Gauhar Bains (gauhar.bains@labshare.org)", + "containerId": "labshare/polus-notebook-plugin:0.4.0", + "inputs": [ + { + "name": "input-collection", + "type": "csvCollection", + "description": "Input csv collection for the plugin." + }, + { + "name": "config-file", + "type": "collection", + "description": "Config file for the notebook" + }, + { + "name": "input-notebook", + "type": "notebook", + "description": "Jupyter Notebook to execute" + } + ], + "outputs": [ + { + "name": "output-collection", + "type": "csvCollection", + "description": "Output csv collection for the plugin" + }, + { + "name": "output-notebook", + "type": "notebook", + "description": "Jupyter Notebook after execution" + } + ], + "ui": [ + { + "key": "inputs.input-collection", + "title": "CSV Collection: ", + "description": "Pick a collection..." + }, + { + "key": "inputs.config-file", + "title": "Config File (Optional): ", + "description": "Pick config file..." + }, + { + "key": "inputs.input-notebook", + "title": "Notebook: ", + "description": "Pick a notebook to execute..." + } + ] +} diff --git a/utils/polus-notebook-plugin/plugin_img_to_csv.json b/utils/polus-notebook-plugin/plugin_img_to_csv.json index b8b0d4ad6..9b62a5952 100644 --- a/utils/polus-notebook-plugin/plugin_img_to_csv.json +++ b/utils/polus-notebook-plugin/plugin_img_to_csv.json @@ -1,54 +1,54 @@ { - "name": "Notebook executor (Image->CSV)", - "version": "0.4.0", - "title": "Jupyter Notebook executor", - "description": "Execute Jupyter notebook which take input of image collection and outputs to csv collection.", - "author": "Konstantin taletskiy (konstantin.taletskiy@labshare.org),Gauhar Bains (gauhar.bains@labshare.org)", - "containerId": "labshare/polus-notebook-plugin:0.4.0", - "inputs": [ - { - "name": "input-collection", - "type": "collection", - "description": "Input image collection for the plugin." - }, - { - "name": "config-file", - "type": "collection", - "description": "Config file for the notebook" - }, - { - "name": "input-notebook", - "type": "notebook", - "description": "Jupyter Notebook to execute" - } - ], - "outputs": [ - { - "name": "output-collection", - "type": "csvCollection", - "description": "Output csv collection for the plugin" - }, - { - "name": "output-notebook", - "type": "notebook", - "description": "Jupyter Notebook after execution" - } - ], - "ui": [ - { - "key": "inputs.input-collection", - "title": "Image Collection: ", - "description": "Pick a collection..." - }, - { - "key": "inputs.config-file", - "title": "Config File (Optional): ", - "description": "Pick config file..." - }, - { - "key": "inputs.input-notebook", - "title": "Notebook: ", - "description": "Pick a notebook to execute..." - } - ] -} \ No newline at end of file + "name": "Notebook executor (Image->CSV)", + "version": "0.4.0", + "title": "Jupyter Notebook executor", + "description": "Execute Jupyter notebook which take input of image collection and outputs to csv collection.", + "author": "Konstantin taletskiy (konstantin.taletskiy@labshare.org),Gauhar Bains (gauhar.bains@labshare.org)", + "containerId": "labshare/polus-notebook-plugin:0.4.0", + "inputs": [ + { + "name": "input-collection", + "type": "collection", + "description": "Input image collection for the plugin." + }, + { + "name": "config-file", + "type": "collection", + "description": "Config file for the notebook" + }, + { + "name": "input-notebook", + "type": "notebook", + "description": "Jupyter Notebook to execute" + } + ], + "outputs": [ + { + "name": "output-collection", + "type": "csvCollection", + "description": "Output csv collection for the plugin" + }, + { + "name": "output-notebook", + "type": "notebook", + "description": "Jupyter Notebook after execution" + } + ], + "ui": [ + { + "key": "inputs.input-collection", + "title": "Image Collection: ", + "description": "Pick a collection..." + }, + { + "key": "inputs.config-file", + "title": "Config File (Optional): ", + "description": "Pick config file..." + }, + { + "key": "inputs.input-notebook", + "title": "Notebook: ", + "description": "Pick a notebook to execute..." + } + ] +} diff --git a/utils/polus-notebook-plugin/plugin_img_to_img.json b/utils/polus-notebook-plugin/plugin_img_to_img.json index 962f6e2ac..250e99f88 100644 --- a/utils/polus-notebook-plugin/plugin_img_to_img.json +++ b/utils/polus-notebook-plugin/plugin_img_to_img.json @@ -1,54 +1,54 @@ { - "name": "Notebook executor (Image->Image)", - "version": "0.4.0", - "title": "Jupyter Notebook executor", - "description": "Execute Jupyter notebook which take input of image collection and outputs to image collection.", - "author": "Konstantin taletskiy (konstantin.taletskiy@labshare.org),Gauhar Bains (gauhar.bains@labshare.org)", - "containerId": "labshare/polus-notebook-plugin:0.4.0", - "inputs": [ - { - "name": "input-collection", - "type": "collection", - "description": "Input image collection for the plugin." - }, - { - "name": "config-file", - "type": "collection", - "description": "Config file for the notebook" - }, - { - "name": "input-notebook", - "type": "notebook", - "description": "Jupyter Notebook to execute" - } - ], - "outputs": [ - { - "name": "output-collection", - "type": "collection", - "description": "Output image collection for the plugin" - }, - { - "name": "output-notebook", - "type": "notebook", - "description": "Jupyter Notebook after execution" - } - ], - "ui": [ - { - "key": "inputs.input-collection", - "title": "Image Collection: ", - "description": "Pick a collection..." - }, - { - "key": "inputs.config-file", - "title": "Config File (Optional): ", - "description": "Pick config file..." - }, - { - "key": "inputs.input-notebook", - "title": "Notebook: ", - "description": "Pick a notebook to execute..." - } - ] -} \ No newline at end of file + "name": "Notebook executor (Image->Image)", + "version": "0.4.0", + "title": "Jupyter Notebook executor", + "description": "Execute Jupyter notebook which take input of image collection and outputs to image collection.", + "author": "Konstantin taletskiy (konstantin.taletskiy@labshare.org),Gauhar Bains (gauhar.bains@labshare.org)", + "containerId": "labshare/polus-notebook-plugin:0.4.0", + "inputs": [ + { + "name": "input-collection", + "type": "collection", + "description": "Input image collection for the plugin." + }, + { + "name": "config-file", + "type": "collection", + "description": "Config file for the notebook" + }, + { + "name": "input-notebook", + "type": "notebook", + "description": "Jupyter Notebook to execute" + } + ], + "outputs": [ + { + "name": "output-collection", + "type": "collection", + "description": "Output image collection for the plugin" + }, + { + "name": "output-notebook", + "type": "notebook", + "description": "Jupyter Notebook after execution" + } + ], + "ui": [ + { + "key": "inputs.input-collection", + "title": "Image Collection: ", + "description": "Pick a collection..." + }, + { + "key": "inputs.config-file", + "title": "Config File (Optional): ", + "description": "Pick config file..." + }, + { + "key": "inputs.input-notebook", + "title": "Notebook: ", + "description": "Pick a notebook to execute..." + } + ] +} diff --git a/utils/polus-notebook-plugin/src/execute_notebook.py b/utils/polus-notebook-plugin/src/execute_notebook.py index 80f45473e..bea1f967c 100644 --- a/utils/polus-notebook-plugin/src/execute_notebook.py +++ b/utils/polus-notebook-plugin/src/execute_notebook.py @@ -1,68 +1,106 @@ import argparse +import json +import logging import os import time -import pathlib + import papermill as pm -import json -import logging + def main(): - # intitialize logging - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) - + # Setup the Argument parsing - parser = argparse.ArgumentParser(prog='script', description='Script to execute Jupyter Notebooks') + parser = argparse.ArgumentParser( + prog="script", description="Script to execute Jupyter Notebooks", + ) # Parse input arguments from WIPP format: '--PARAMETER VALUE' - parser.add_argument('--input-collection', dest='input_collection', type=str, help='input image collection', required=True) - parser.add_argument('--input-notebook', dest='input_notebook', type=str, help='Jupyter notebook to run', required=True) - parser.add_argument('--output-collection', dest='output_collection', type=str, help='output collection', required=True) - parser.add_argument('--output-notebook', dest='output_notebook', type=str, help='executed notebook', required=True) - parser.add_argument('--config-file', dest='config_file', type=str, help='configuration file', required=False) + parser.add_argument( + "--input-collection", + dest="input_collection", + type=str, + help="input image collection", + required=True, + ) + parser.add_argument( + "--input-notebook", + dest="input_notebook", + type=str, + help="Jupyter notebook to run", + required=True, + ) + parser.add_argument( + "--output-collection", + dest="output_collection", + type=str, + help="output collection", + required=True, + ) + parser.add_argument( + "--output-notebook", + dest="output_notebook", + type=str, + help="executed notebook", + required=True, + ) + parser.add_argument( + "--config-file", + dest="config_file", + type=str, + help="configuration file", + required=False, + ) args = parser.parse_args() - + input_collection = args.input_collection - input_notebook = os.path.join(args.input_notebook, 'notebook.ipynb') + input_notebook = os.path.join(args.input_notebook, "notebook.ipynb") output_collection = args.output_collection - output_notebook = os.path.join(args.output_notebook, 'notebook.ipynb') - config_file=args.config_file + output_notebook = os.path.join(args.output_notebook, "notebook.ipynb") + config_file = args.config_file + + logger.info("Arguments:") + logger.info(f"Input collection: {input_collection}") + logger.info(f"Input notebook: {input_notebook}") + logger.info(f"Config file: {config_file}") + logger.info(f"Output collection: {output_collection}") + logger.info(f"Output notebook: {output_notebook}") - - logger.info('Arguments:') - logger.info('Input collection: {}'.format(input_collection)) - logger.info('Input notebook: {}'.format(input_notebook)) - logger.info('Config file: {}'.format(config_file)) - logger.info('Output collection: {}'.format(output_collection)) - logger.info('Output notebook: {}'.format(output_notebook)) - - - logger.info('Beginning notebook execution...') + logger.info("Beginning notebook execution...") process_start = time.time() with open(input_notebook) as nbfile: - is_sos = json.load(nbfile)['metadata']['kernelspec']['language'] == 'sos' + is_sos = json.load(nbfile)["metadata"]["kernelspec"]["language"] == "sos" - if config_file == None: - out = pm.execute_notebook( - input_notebook, - output_notebook, - engine_name="sos" if is_sos else None, - parameters=dict(input_path=input_collection, output_path=output_collection) + if config_file is None: + pm.execute_notebook( + input_notebook, + output_notebook, + engine_name="sos" if is_sos else None, + parameters={"input_path": input_collection, "output_path": output_collection}, ) else: - out = pm.execute_notebook( - input_notebook, - output_notebook, - engine_name="sos" if is_sos else None, - parameters=dict(input_path=input_collection, output_path=output_collection, config_file_path=config_file) - ) + pm.execute_notebook( + input_notebook, + output_notebook, + engine_name="sos" if is_sos else None, + parameters={ + "input_path": input_collection, + "output_path": output_collection, + "config_file_path": config_file, + }, + ) + + logger.info( + f"Execution completed in {time.time() - process_start} seconds!", + ) + - - logger.info('Execution completed in {} seconds!'.format(time.time() - process_start)) - if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/utils/polus-python-template/.gitignore b/utils/polus-python-template/.gitignore index d27abdcf4..c04bc49f7 100644 --- a/utils/polus-python-template/.gitignore +++ b/utils/polus-python-template/.gitignore @@ -1 +1 @@ -poetry.lock \ No newline at end of file +poetry.lock diff --git a/utils/polus-python-template/README.md b/utils/polus-python-template/README.md index 021868354..d7a8583a5 100644 --- a/utils/polus-python-template/README.md +++ b/utils/polus-python-template/README.md @@ -10,9 +10,9 @@ polus plugin and add it to the polus plugins directory structure. 4. (optional) Create a dedicated environment with conda or venv. 5. Install the dependencies: `poetry install` 6. Ignore changes to `cookiecutter.json` using: `git update-index --assume-unchanged cookiecutter.json` -7. Modify `cookiecutter.json` to include author and plugin information.`plugin_package` should always start with `polus.plugins`. +7. Modify `cookiecutter.json` to include author and plugin information.`plugin_package` should always start with `polus.plugins`. ** NOTE: ** Do not edit values in brackets ({}) as they are edited by cookiecutter directly. -Those are automatically generated from the previous entries. If your plugin is called +Those are automatically generated from the previous entries. If your plugin is called "Awesome Function", then the plugin folder and docker container will have the name `awesome-function-plugin`. 8. Create your plugin skeleton: ` python -m cookiecutter . --no-input` diff --git a/utils/polus-python-template/cookiecutter.json b/utils/polus-python-template/cookiecutter.json index 030f8cf3c..a924ce99b 100644 --- a/utils/polus-python-template/cookiecutter.json +++ b/utils/polus-python-template/cookiecutter.json @@ -1,16 +1,15 @@ { - "author": "Data Scientist", - "author_email": "data.scientist@labshare.org", - "plugin_name": "Awesome Plugin", - "plugin_package": "polus.plugins.package1.package2.awesome_function", - "plugin_description": "An awesome function.", - "plugin_version": "0.1.0", - - "package_folders": "{%set folders = cookiecutter.plugin_package.replace('.', '/') %}{{folders}}", - "package_name": "{% set packages = cookiecutter.plugin_package.split('.') %}{{ packages | last }}", - "project_name": "{% set project_name = cookiecutter.plugin_package.replace('_', '-').replace('.', '-') %}{{ project_name }}", - "plugin_slug": "{% set plugin_slug = cookiecutter.package_name.replace('_', '-') %}polus-{{plugin_slug}}-plugin", - "container_name": "{% set container_name = ('-').join(cookiecutter.plugin_slug.split('-')[1:])%}{{ container_name }}", - "container_id": "polusai/{{cookiecutter.container_name}}", - "container_version": "{{cookiecutter.plugin_version}}" + "author": "Data Scientist", + "author_email": "data.scientist@labshare.org", + "plugin_name": "Awesome Plugin", + "plugin_package": "polus.plugins.package1.package2.awesome_function", + "plugin_description": "An awesome function.", + "plugin_version": "0.1.0", + "package_folders": "{%set folders = cookiecutter.plugin_package.replace('.', '/') %}{{folders}}", + "package_name": "{% set packages = cookiecutter.plugin_package.split('.') %}{{ packages | last }}", + "project_name": "{% set project_name = cookiecutter.plugin_package.replace('_', '-').replace('.', '-') %}{{ project_name }}", + "plugin_slug": "{% set plugin_slug = cookiecutter.package_name.replace('_', '-') %}polus-{{plugin_slug}}-plugin", + "container_name": "{% set container_name = ('-').join(cookiecutter.plugin_slug.split('-')[1:])%}{{ container_name }}", + "container_id": "polusai/{{cookiecutter.container_name}}", + "container_version": "{{cookiecutter.plugin_version}}" } diff --git a/utils/polus-python-template/hooks/post_gen_project.py b/utils/polus-python-template/hooks/post_gen_project.py index f3f0ee429..a9853390d 100644 --- a/utils/polus-python-template/hooks/post_gen_project.py +++ b/utils/polus-python-template/hooks/post_gen_project.py @@ -1,8 +1,8 @@ +import logging import os import shutil -from pathlib import Path -import logging from os import environ +from pathlib import Path logging.basicConfig( format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", @@ -14,15 +14,14 @@ def create_repository_directories(source_dir): - """ Buid the correct directories inside polus-plugins. + """Buid the correct directories inside polus-plugins. The directory structure must conforms to the plugin's spec : - dash-separated word in identifier. - folder hierarchy matches package namespace minus "polus.plugins" - plugin's folder name reflects the plugin package name but ends with "-plugin" Ex: polus.plugins.package1.package2.awesome_function becomes - package1/package2/awesome-function-plugin + package1/package2/awesome-function-plugin. """ - # try to find the project's root, otherwise we stay in the # staging directory final_dir = source_dir.parent @@ -31,7 +30,7 @@ def create_repository_directories(source_dir): final_dir = folder break - # by default we create a plugin directory at the root + # by default we create a plugin directory at the root target_dir = final_dir # figure out if additional directories need to be created at the root @@ -50,8 +49,8 @@ def create_repository_directories(source_dir): os.makedirs(target_dir, exist_ok=True) return target_dir - - + + def move_project_source_to_final_location(): """Move staged files to the the final target repo.""" source_dir = Path(os.getcwd()) @@ -59,5 +58,5 @@ def move_project_source_to_final_location(): logger.debug(f"moving sources from {source_dir} to {target_dir}") shutil.move(source_dir, target_dir) + # NOTE do not create folder structure with the repo at the moment. -# move_project_source_to_final_location() \ No newline at end of file diff --git a/utils/polus-python-template/hooks/pre_gen_project.py b/utils/polus-python-template/hooks/pre_gen_project.py index 802f5d154..0e7aa4eed 100644 --- a/utils/polus-python-template/hooks/pre_gen_project.py +++ b/utils/polus-python-template/hooks/pre_gen_project.py @@ -1,6 +1,4 @@ -""" -Validate of template variables before templating the project -""" +"""Validate of template variables before templating the project.""" import logging from os import environ @@ -24,12 +22,13 @@ plugin_package = "{{ cookiecutter.plugin_package }}" if not plugin_package.startswith("polus.plugins."): raise ValueError( - f"plugin package must be a child of polus.plugins." - + f"plugin_package must start with 'polus.plugins'. Got : {plugin_package}" + "plugin package must be a child of polus.plugins." + + f"plugin_package must start with 'polus.plugins'. Got : {plugin_package}", ) if plugin_package.endswith("_plugin"): + msg = f"plugin_package must not ends with _plugin. Got : {plugin_package}" raise ValueError( - f"plugin_package must not ends with _plugin. Got : {plugin_package}" + msg, ) # TODO check we have a valid python package name @@ -38,10 +37,12 @@ # TODO check version is valid project_name = "{{ cookiecutter.project_name }}" -assert not ("_" in project_name) and not ("." in project_name) +assert "_" not in project_name +assert "." not in project_name plugin_slug = "{{ cookiecutter.plugin_slug }}" -assert plugin_slug.startswith("polus-") and plugin_slug.endswith("-plugin") +assert plugin_slug.startswith("polus-") +assert plugin_slug.endswith("-plugin") container_name = "{{ cookiecutter.container_name }}" assert container_name.endswith("-plugin") @@ -52,4 +53,4 @@ container_version = "{{ cookiecutter.container_version }}" assert container_version == plugin_version -logger.debug(f"plugin_package: {plugin_package}" ) +logger.debug(f"plugin_package: {plugin_package}") diff --git a/utils/polus-python-template/{{cookiecutter.container_name}}/plugin.json b/utils/polus-python-template/{{cookiecutter.container_name}}/plugin.json index 69714cb83..e93e9fc54 100644 --- a/utils/polus-python-template/{{cookiecutter.container_name}}/plugin.json +++ b/utils/polus-python-template/{{cookiecutter.container_name}}/plugin.json @@ -60,4 +60,4 @@ "default": "False" } } -} \ No newline at end of file +} diff --git a/utils/polus-python-template/{{cookiecutter.container_name}}/src/{{cookiecutter.package_folders}}/{{ cookiecutter.package_name }}.py b/utils/polus-python-template/{{cookiecutter.container_name}}/src/{{cookiecutter.package_folders}}/{{ cookiecutter.package_name }}.py index 2573a729b..59de05eb2 100644 --- a/utils/polus-python-template/{{cookiecutter.container_name}}/src/{{cookiecutter.package_folders}}/{{ cookiecutter.package_name }}.py +++ b/utils/polus-python-template/{{cookiecutter.container_name}}/src/{{cookiecutter.package_folders}}/{{ cookiecutter.package_name }}.py @@ -13,4 +13,4 @@ def {{cookiecutter.package_name}}(inp_dir: Path, filepattern: str, out_dir: Path Returns: None """ - pass \ No newline at end of file + pass diff --git a/utils/polus-python-template/{{cookiecutter.container_name}}/tests/conftest.py b/utils/polus-python-template/{{cookiecutter.container_name}}/tests/conftest.py index fd0c32168..3d34999f0 100644 --- a/utils/polus-python-template/{{cookiecutter.container_name}}/tests/conftest.py +++ b/utils/polus-python-template/{{cookiecutter.container_name}}/tests/conftest.py @@ -11,6 +11,7 @@ from bfio import BioWriter, BioReader + def pytest_addoption(parser: pytest.Parser) -> None: """Add options to pytest.""" parser.addoption( @@ -29,22 +30,16 @@ def pytest_addoption(parser: pytest.Parser) -> None: ) - - -IMAGE_SIZES = [(1024 * (2**i) ,1024 * (2**i)) for i in range(1, 2)] -LARGE_IMAGE_SIZES = [(1024 * (2**i) ,1024 * (2**i)) for i in range(4, 5)] +IMAGE_SIZES = [(1024 * (2**i), 1024 * (2**i)) for i in range(1, 2)] +LARGE_IMAGE_SIZES = [(1024 * (2**i), 1024 * (2**i)) for i in range(4, 5)] PIXEL_TYPES = [np.uint8, float] PARAMS = [ (image_size, pixel_type) - for image_size, pixel_type in itertools.product( - IMAGE_SIZES, PIXEL_TYPES - ) + for image_size, pixel_type in itertools.product(IMAGE_SIZES, PIXEL_TYPES) ] LARGE_DATASET_PARAMS = [ (image_size, pixel_type) - for image_size, pixel_type in itertools.product( - LARGE_IMAGE_SIZES, PIXEL_TYPES - ) + for image_size, pixel_type in itertools.product(LARGE_IMAGE_SIZES, PIXEL_TYPES) ] @@ -63,7 +58,7 @@ def generate_test_data(request: pytest.FixtureRequest) -> FixtureReturnType: # collect test params image_size, pixel_type = request.param - test_data = _generate_test_data(image_size, pixel_type) + test_data = _generate_test_data(image_size, pixel_type) print(test_data) yield from test_data @@ -74,14 +69,16 @@ def generate_large_test_data(request: pytest.FixtureRequest) -> FixtureReturnTyp # collect test params image_size, pixel_type = request.param - test_data =_generate_test_data(image_size, pixel_type) + test_data = _generate_test_data(image_size, pixel_type) print(test_data) yield from test_data -def _generate_test_data(image_size : tuple[int,int], pixel_type: int) -> FixtureReturnType: +def _generate_test_data( + image_size: tuple[int, int], pixel_type: int +) -> FixtureReturnType: """Generate staging temporary directories with test data and ground truth.""" image_x, image_y = image_size @@ -105,19 +102,13 @@ def _generate_test_data(image_size : tuple[int,int], pixel_type: int) -> Fixture shutil.rmtree(data_dir) -def gen_2D_image( - img_path, - image_x, - image_y, - pixel_type -) : + +def gen_2D_image(img_path, image_x, image_y, pixel_type): """Generate a random 2D square image.""" - if np.issubdtype(pixel_type, np.floating) : + if np.issubdtype(pixel_type, np.floating): rng = np.random.default_rng() - image = rng.uniform(0.0, 1.0, - size=(image_y, image_x) - ).astype(pixel_type) + image = rng.uniform(0.0, 1.0, size=(image_y, image_x)).astype(pixel_type) else: image = np.random.randint(0, 255, size=(image_y, image_x)) @@ -134,7 +125,7 @@ def gen_2D_image( return image -def gen_ground_truth(img_path : Path, ground_truth_path : Path): +def gen_ground_truth(img_path: Path, ground_truth_path: Path): """generate some ground truth from the image data. Here we generate a simple binary mask. """ @@ -144,4 +135,4 @@ def gen_ground_truth(img_path : Path, ground_truth_path : Path): ground_truth = np.asarray(reader[:] != 0) writer[:] = ground_truth - return ground_truth \ No newline at end of file + return ground_truth diff --git a/utils/polus-python-template/{{cookiecutter.container_name}}/tests/test_{{cookiecutter.package_name}}.py b/utils/polus-python-template/{{cookiecutter.container_name}}/tests/test_{{cookiecutter.package_name}}.py index 75e3552e2..efbc5e2b9 100644 --- a/utils/polus-python-template/{{cookiecutter.container_name}}/tests/test_{{cookiecutter.package_name}}.py +++ b/utils/polus-python-template/{{cookiecutter.container_name}}/tests/test_{{cookiecutter.package_name}}.py @@ -19,4 +19,4 @@ def test_{{cookiecutter.package_name}}(generate_large_test_data : FixtureReturnT """Test {{cookiecutter.package_name}}.""" inp_dir, out_dir, ground_truth_dir, img_path, ground_truth_path = generate_large_test_data filepattern = ".*" - assert {{cookiecutter.package_name}}(inp_dir, filepattern, out_dir) == None \ No newline at end of file + assert {{cookiecutter.package_name}}(inp_dir, filepattern, out_dir) == None diff --git a/utils/polus-stitching-vector-merger-plugin/Dockerfile b/utils/polus-stitching-vector-merger-plugin/Dockerfile index 2733ed1bc..93372600d 100644 --- a/utils/polus-stitching-vector-merger-plugin/Dockerfile +++ b/utils/polus-stitching-vector-merger-plugin/Dockerfile @@ -1,4 +1,4 @@ FROM python:3.6-alpine COPY VERSION / COPY main.py main.py -ENTRYPOINT ["python3", "main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "main.py"] diff --git a/utils/polus-stitching-vector-merger-plugin/VERSION b/utils/polus-stitching-vector-merger-plugin/VERSION index 84aa3a7dd..699c6c6d4 100644 --- a/utils/polus-stitching-vector-merger-plugin/VERSION +++ b/utils/polus-stitching-vector-merger-plugin/VERSION @@ -1 +1 @@ -0.1.8 \ No newline at end of file +0.1.8 diff --git a/utils/polus-stitching-vector-merger-plugin/build-docker.sh b/utils/polus-stitching-vector-merger-plugin/build-docker.sh index 5515f8634..61002d402 100755 --- a/utils/polus-stitching-vector-merger-plugin/build-docker.sh +++ b/utils/polus-stitching-vector-merger-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( None: image = fmask.copy() pol = np.array(poly[i][0]) mask = sk.draw.polygon2mask((x, y), pol) - image[mask == False] = 0 - image[mask == True] = 1 + image[mask is False] = 0 + image[mask is True] = 1 fmask += image fmask = np.rot90(fmask) fmask = np.flipud(fmask) diff --git a/visualization/polus-color-pyramid-builder-plugin/Dockerfile b/visualization/polus-color-pyramid-builder-plugin/Dockerfile index 3afafceec..a81e42a21 100644 --- a/visualization/polus-color-pyramid-builder-plugin/Dockerfile +++ b/visualization/polus-color-pyramid-builder-plugin/Dockerfile @@ -2,7 +2,7 @@ FROM polusai/bfio:2.1.9 COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -21,4 +21,4 @@ RUN python3 ${EXEC_DIR}/dl_fi.py WORKDIR ${EXEC_DIR} # Default command. Additional arguments are provided through the command line -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/visualization/polus-color-pyramid-builder-plugin/README.md b/visualization/polus-color-pyramid-builder-plugin/README.md index 1e0c56d5f..0f061aed3 100644 --- a/visualization/polus-color-pyramid-builder-plugin/README.md +++ b/visualization/polus-color-pyramid-builder-plugin/README.md @@ -76,4 +76,3 @@ This plugin takes one input argument and one output argument: | `--layout` | Color ordering (e.g. 1,11,,,,5,6) | Input | string | | `--bounds` | Set bounds (should be float-float, int-int, or blank, e.g. 0.01-0.99,0-16000,,,,,) | Input | string | | `--outDir` | Output pyramid path. | Output | pyramid | - diff --git a/visualization/polus-color-pyramid-builder-plugin/VERSION b/visualization/polus-color-pyramid-builder-plugin/VERSION index 87a087111..1c09c74e2 100644 --- a/visualization/polus-color-pyramid-builder-plugin/VERSION +++ b/visualization/polus-color-pyramid-builder-plugin/VERSION @@ -1 +1 @@ -0.3.3 \ No newline at end of file +0.3.3 diff --git a/visualization/polus-color-pyramid-builder-plugin/build-docker.sh b/visualization/polus-color-pyramid-builder-plugin/build-docker.sh index f58403620..2a6dc43f9 100755 --- a/visualization/polus-color-pyramid-builder-plugin/build-docker.sh +++ b/visualization/polus-color-pyramid-builder-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$(= 0 and lower_bound <= 1.0 - assert upper_bound >= 0 and upper_bound <= 1.0 - + assert isinstance(lower_bound, float) + assert isinstance(upper_bound, float) + assert lower_bound >= 0 + assert lower_bound <= 1.0 + assert upper_bound >= 0 + assert upper_bound <= 1.0 + # Get the image size in pixels image_size = br.num_x() * br.num_y() - + # Get number of pixels needed to get percentile information - upper_bound_size = int(image_size * (1-upper_bound)) + upper_bound_size = int(image_size * (1 - upper_bound)) lower_bound_size = int(image_size * lower_bound) - + # Create the pixel buffer - dtype = br.read_image(X=[0,1024],Y=[0,1024],Z=[0,1]).dtype - upper_bound_vals = np.zeros((2*upper_bound_size,),dtype=dtype) - lower_bound_vals = np.full((2*lower_bound_size,),np.iinfo(dtype).max,dtype=dtype) - + dtype = br.read_image(X=[0, 1024], Y=[0, 1024], Z=[0, 1]).dtype + upper_bound_vals = np.zeros((2 * upper_bound_size,), dtype=dtype) + lower_bound_vals = np.full( + (2 * lower_bound_size,), np.iinfo(dtype).max, dtype=dtype, + ) + # Load image tiles and sort pixels - for x in range(0,br.num_x(),8192): - for y in range(0,br.num_y(),8192): - + for x in range(0, br.num_x(), 8192): + for y in range(0, br.num_y(), 8192): # Load the first tile - tile = br.read_image(X=[x,min([x+8192,br.num_x()])], - Y=[y,min([y+8192,br.num_y()])], - Z=[0,1]) - + tile = br.read_image( + X=[x, min([x + 8192, br.num_x()])], + Y=[y, min([y + 8192, br.num_y()])], + Z=[0, 1], + ) + # Sort the non-zero values - tile_sorted = np.sort(tile[tile.nonzero()],axis=None) + tile_sorted = np.sort(tile[tile.nonzero()], axis=None) # Store the upper and lower bound pixel values temp = tile_sorted[-upper_bound_size:] - upper_bound_vals[:temp.size] = temp + upper_bound_vals[: temp.size] = temp temp = tile_sorted[:lower_bound_size] - lower_bound_vals[-temp.size:] = temp - + lower_bound_vals[-temp.size :] = temp + # Resort the pixels - upper_bound_vals = np.sort(upper_bound_vals,axis=None) - lower_bound_vals = np.sort(lower_bound_vals,axis=None) - - return [lower_bound_vals[lower_bound_size],upper_bound_vals[-upper_bound_size]] + upper_bound_vals = np.sort(upper_bound_vals, axis=None) + lower_bound_vals = np.sort(lower_bound_vals, axis=None) + + return [lower_bound_vals[lower_bound_size], upper_bound_vals[-upper_bound_size]] -if __name__=="__main__": + +if __name__ == "__main__": # Initialize the logger - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) logger = logging.getLogger("main") logger.setLevel(logging.INFO) - ''' Argument parsing ''' + """ Argument parsing """ logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='Builds a DeepZoom color pyramid.') - + parser = argparse.ArgumentParser( + prog="main", description="Builds a DeepZoom color pyramid.", + ) + # Input arguments - parser.add_argument('--filePattern', dest='filePattern', type=str, - help='Filename pattern used to separate data', required=True) - parser.add_argument('--inpDir', dest='inpDir', type=str, - help='Input image collection to be processed by this plugin', required=True) - parser.add_argument('--layout', dest='layout', type=str, - help='Color ordering (e.g. 1,11,,,,5,6)', required=True) - parser.add_argument('--bounds', dest='bounds', type=str, - help='Set bounds (should be float-float, int-int, or blank, e.g. 0.01-0.99,0-16000,,,,,)', required=False) - parser.add_argument('--alpha', dest='alpha', type=str, - help='If true, transparency is equal to pixel intensity in the pyramid.', required=False) - parser.add_argument('--stitchPath', dest='stitch_path', type=str, - help='Path to a stitching vector.', required=False) - parser.add_argument('--background', dest='background', type=str, - help='Background fill value.', required=False) - + parser.add_argument( + "--filePattern", + dest="filePattern", + type=str, + help="Filename pattern used to separate data", + required=True, + ) + parser.add_argument( + "--inpDir", + dest="inpDir", + type=str, + help="Input image collection to be processed by this plugin", + required=True, + ) + parser.add_argument( + "--layout", + dest="layout", + type=str, + help="Color ordering (e.g. 1,11,,,,5,6)", + required=True, + ) + parser.add_argument( + "--bounds", + dest="bounds", + type=str, + help="Set bounds (should be float-float, int-int, or blank, e.g. 0.01-0.99,0-16000,,,,,)", + required=False, + ) + parser.add_argument( + "--alpha", + dest="alpha", + type=str, + help="If true, transparency is equal to pixel intensity in the pyramid.", + required=False, + ) + parser.add_argument( + "--stitchPath", + dest="stitch_path", + type=str, + help="Path to a stitching vector.", + required=False, + ) + parser.add_argument( + "--background", + dest="background", + type=str, + help="Background fill value.", + required=False, + ) + # Output arguments - parser.add_argument('--outDir', dest='outDir', type=str, - help='Output pyramid path.', required=True) - + parser.add_argument( + "--outDir", dest="outDir", type=str, help="Output pyramid path.", required=True, + ) + # Parse the arguments args = parser.parse_args() filePattern = args.filePattern - logger.info('filePattern = {}'.format(filePattern)) + logger.info(f"filePattern = {filePattern}") inpDir = args.inpDir - if (Path.is_dir(Path(args.inpDir).joinpath('images'))): + if Path.is_dir(Path(args.inpDir).joinpath("images")): # switch to images folder if present - fpath = str(Path(args.inpDir).joinpath('images').absolute()) - logger.info('inpDir = {}'.format(inpDir)) + fpath = str(Path(args.inpDir).joinpath("images").absolute()) + logger.info(f"inpDir = {inpDir}") layout = args.layout - logger.info('layout = {}'.format(layout)) + logger.info(f"layout = {layout}") bounds = args.bounds - logger.info('bounds = {}'.format(bounds)) - alpha = args.alpha == 'true' - logger.info('alpha = {}'.format(alpha)) + logger.info(f"bounds = {bounds}") + alpha = args.alpha == "true" + logger.info(f"alpha = {alpha}") outDir = args.outDir - logger.info('outDir = {}'.format(outDir)) + logger.info(f"outDir = {outDir}") outDir = Path(outDir) stitch_path = args.stitch_path - logger.info('stitchPath = {}'.format(stitch_path)) + logger.info(f"stitchPath = {stitch_path}") background = args.background - logger.info('background = {}'.format(background)) - + logger.info(f"background = {background}") + # Parse the layout - layout = [None if l=='' else int(l) for l in layout.split(',')] - if len(layout)>7: + layout = [None if l == "" else int(l) for l in layout.split(",")] + if len(layout) > 7: layout = layout[:7] - + # Parse the bounds - if bounds != None: - bounds = [[None] if l=='' else get_number(l) for l in bounds.split(',')] - bounds = bounds[:len(layout)] + if bounds is not None: + bounds = [[None] if l == "" else get_number(l) for l in bounds.split(",")] + bounds = bounds[: len(layout)] else: bounds = [[None] for _ in layout] - + # Parse files - fp = filepattern.FilePattern(inpDir,filePattern) - + fp = filepattern.FilePattern(inpDir, filePattern) + # A channel variable is expected, throw an error if it doesn't exist - if 'c' not in fp.variables: - raise ValueError('A channel variable is expected in the filepattern.') - + if "c" not in fp.variables: + msg = "A channel variable is expected in the filepattern." + raise ValueError(msg) + count = 0 - + for files in fp.iterate(group_by=fp.variables): - count += 1 - outDirFrame = outDir.joinpath('{}_files'.format(count)) + outDirFrame = outDir.joinpath(f"{count}_files") outDirFrame.mkdir() bioreaders = [] threads = [] - with ThreadPoolExecutor(max([multiprocessing.cpu_count()//2,2])) as executor: - for i,l in enumerate(layout): - if l == None: + with ThreadPoolExecutor(max([multiprocessing.cpu_count() // 2, 2])) as executor: + for i, l in enumerate(layout): + if l is None: bioreaders.append(None) continue - + # Create the type of BioReader based on whether a stitching vector is present - if stitch_path == None: + if stitch_path is None: # Create a standard BioReader object - f_path = [f for f in files if f['c']==l] - if len(f_path)==0: + f_path = [f for f in files if f["c"] == l] + if len(f_path) == 0: bioreaders.append(None) continue - f_path = f_path[0]['file'] - bioreaders.append(BioReader(f_path,max_workers=multiprocessing.cpu_count())) + f_path = f_path[0]["file"] + bioreaders.append( + BioReader(f_path, max_workers=multiprocessing.cpu_count()), + ) else: # Create a BioAssembler object, which assembles images when called - f_tiles = [f for f in files if f['c']==l] - if len(f_tiles)==0: + f_tiles = [f for f in files if f["c"] == l] + if len(f_tiles) == 0: continue for stitch in Path(stitch_path).iterdir(): - br = utils.BioAssembler(inpDir,stitch,multiprocessing.cpu_count()) - f_names = [f['file'] for f in br._file_dict['filePos']] + br = utils.BioAssembler( + inpDir, stitch, multiprocessing.cpu_count(), + ) + f_names = [f["file"] for f in br._file_dict["filePos"]] found_file = False - + # Check to see if the first file is in the stitching vector - if Path(f_tiles[0]['file']).name in f_names: + if Path(f_tiles[0]["file"]).name in f_names: bioreaders.append(br) found_file = True break if not found_file: bioreaders.append(None) continue - + # Set the rescaling bounds - if layout[i] != None: - if isinstance(bounds[i][0],float): - logger.info('{}: Getting percentile bounds {}...'.format(Path(bioreaders[-1]._file_path).name, - bounds[i])) - # get_bounds(bioreaders[-1],bounds[i][0],bounds[i][1]) - threads.append(executor.submit(get_bounds,bioreaders[-1],bounds[i][0],bounds[i][1])) - elif isinstance(bounds[i][0],int): + if layout[i] is not None: + if isinstance(bounds[i][0], float): + logger.info( + "{}: Getting percentile bounds {}...".format( + Path(bioreaders[-1]._file_path).name, bounds[i], + ), + ) + threads.append( + executor.submit( + get_bounds, bioreaders[-1], bounds[i][0], bounds[i][1], + ), + ) + elif isinstance(bounds[i][0], int): bioreaders[-1].bounds = bounds[i] else: - bioreaders[-1].bounds = [np.iinfo(bioreaders[-1].read_metadata().image().Pixels.get_PixelType()).min, - np.iinfo(bioreaders[-1].read_metadata().image().Pixels.get_PixelType()).max] - + bioreaders[-1].bounds = [ + np.iinfo( + bioreaders[-1] + .read_metadata() + .image() + .Pixels.get_PixelType(), + ).min, + np.iinfo( + bioreaders[-1] + .read_metadata() + .image() + .Pixels.get_PixelType(), + ).max, + ] + for i in reversed(range(len(layout))): - if bioreaders[i] == None: + if bioreaders[i] is None: continue - if isinstance(bounds[i][0],int) or bounds[i][0] == None: - logger.info('Color {}: {} (rescaling to {})'.format(COLORS[i], - Path(Path(bioreaders[i]._file_path).name).name, - bioreaders[i].bounds)) + if isinstance(bounds[i][0], int) or bounds[i][0] is None: + logger.info( + "Color {}: {} (rescaling to {})".format( + COLORS[i], + Path(Path(bioreaders[i]._file_path).name).name, + bioreaders[i].bounds, + ), + ) continue - if layout[i] == None: + if layout[i] is None: continue bioreaders[i].bounds = threads.pop().result() - logger.info('Color {}: {} (rescaling to {})'.format(COLORS[i], - Path(Path(bioreaders[i]._file_path).name).name, - bioreaders[i].bounds)) - + logger.info( + "Color {}: {} (rescaling to {})".format( + COLORS[i], + Path(Path(bioreaders[i]._file_path).name).name, + bioreaders[i].bounds, + ), + ) + for br in bioreaders: - if br != None: + if br is not None: br_meta = br - file_info = utils.dzi_file(br_meta,outDirFrame,count) + file_info = utils.dzi_file(br_meta, outDirFrame, count) encoder = utils.DeepZoomChunkEncoder(file_info) file_writer = utils.DeepZoomWriter(outDirFrame) - - utils._get_higher_res(0,bioreaders,file_writer,encoder,alpha,background,isinstance(stitch_path,str)) + + utils._get_higher_res( + 0, + bioreaders, + file_writer, + encoder, + alpha, + background, + isinstance(stitch_path, str), + ) diff --git a/visualization/polus-color-pyramid-builder-plugin/src/utils.py b/visualization/polus-color-pyramid-builder-plugin/src/utils.py index 2515ab413..b3a427324 100644 --- a/visualization/polus-color-pyramid-builder-plugin/src/utils.py +++ b/visualization/polus-color-pyramid-builder-plugin/src/utils.py @@ -1,41 +1,53 @@ -""" -This file originally appeared in the polus precomputed slide plugin. +"""This file originally appeared in the polus precomputed slide plugin. It has been modified to create color DeepZoom pyramids. Original Code: https://github.com/LabShare/polus-plugins/tree/master/polus-precompute-slide-plugin """ -from bfio.bfio import BioReader -import numpy as np -import copy, os -from pathlib import Path -import imageio, re, filepattern +import copy +import os +import re from concurrent.futures import ThreadPoolExecutor +from pathlib import Path -STITCH_VARS = ['file','correlation','posX','posY','gridX','gridY'] # image stitching values +import imageio +import numpy as np +from bfio.bfio import BioReader + +STITCH_VARS = [ + "file", + "correlation", + "posX", + "posY", + "gridX", + "gridY", +] # image stitching values STITCH_LINE = "file: {}; corr: {}; position: ({}, {}); grid: ({}, {});\n" # Conversion factors to nm, these are based off of supported Bioformats length units -UNITS = {'m': 10**9, - 'cm': 10**7, - 'mm': 10**6, - 'µm': 10**3, - 'nm': 1, - 'Å': 10**-1} +UNITS = { + "m": 10**9, + "cm": 10**7, + "mm": 10**6, + "µm": 10**3, + "nm": 1, + "Å": 10**-1, +} # Chunk Scale CHUNK_SIZE = 1024 + def get_number(s): - """ Check that s is number - + """Check that s is number. + In this plugin, heatmaps are created only for columns that contain numbers. This function checks to make sure an input value is able to be converted into a number. - + This function originally appeared in the image asembler plugin: https://github.com/Nicholas-Schaub/polus-plugins/blob/imageassembler/polus-image-assembler-plugin/src/main.py - + Inputs: s - An input string or number Outputs: @@ -46,48 +58,48 @@ def get_number(s): except ValueError: return s -class BioAssembler(): - - def __init__(self,file_path,stitch_path,max_workers=None): + +class BioAssembler: + def __init__(self, file_path, stitch_path, max_workers=None) -> None: self._file_path = file_path self._stitch_path = stitch_path - self._file_dict = self._parse_stitch(stitch_path,file_path) + self._file_dict = self._parse_stitch(stitch_path, file_path) self._max_workers = max_workers - self.X = [0,0] - self.Y = [0,0] - self.Z = [0,0] + self.X = [0, 0] + self.Y = [0, 0] + self.Z = [0, 0] self._X_offset = 0 self._Y_offset = 0 self._Z_offset = 0 self._image = None - + def physical_size_x(self): - return [None,None] - + return [None, None] + def physical_size_y(self): - return [None,None] - + return [None, None] + def physical_size_z(self): - return [None,None] - + return [None, None] + def num_x(self): - return self._file_dict['width'] - + return self._file_dict["width"] + def num_y(self): - return self._file_dict['height'] - + return self._file_dict["height"] + def num_z(self): return 1 - - def buffer_image(self,image_path,Xi,Yi,Xt,Yt,color=False): - """buffer_image Load and image and store in buffer + + def buffer_image(self, image_path, Xi, Yi, Xt, Yt, color=False): + """buffer_image Load and image and store in buffer. This method loads an image and stores it in the appropriate position based on the stitching vector coordinates within a large tile of the output image. It is intended to be used as a thread to increase the reading component to assembling the image. - + Args: image_path ([str]): Path to image to load Xi ([list]): Xmin and Xmax of pixels to load from the image @@ -95,24 +107,27 @@ def buffer_image(self,image_path,Xi,Yi,Xt,Yt,color=False): Xt ([list]): X position within the buffer to store the image Yt ([list]): Y position within the buffer to store the image """ - # Load the image - br = BioReader(image_path,max_workers=2) - image = br.read_image(X=Xi,Y=Yi) # only get the first z,c,t layer - + br = BioReader(image_path, max_workers=2) + image = br.read_image(X=Xi, Y=Yi) # only get the first z,c,t layer + # Put the image in the buffer - if color != None: - image_temp = (255*(image[...,0,0].astype(np.float32) - self.bounds[0])/(self.bounds[1] - self.bounds[0])) - image_temp[image_temp>255] = 255 - image_temp[image_temp<0] = 0 + if color is not None: + image_temp = ( + 255 + * (image[..., 0, 0].astype(np.float32) - self.bounds[0]) + / (self.bounds[1] - self.bounds[0]) + ) + image_temp[image_temp > 255] = 255 + image_temp[image_temp < 0] = 0 image_temp = image_temp.astype(np.uint8) - self._image[Yt[0]:Yt[1],Xt[0]:Xt[1],...] = 0 - self._image[Yt[0]:Yt[1],Xt[0]:Xt[1],self.color] = image_temp + self._image[Yt[0] : Yt[1], Xt[0] : Xt[1], ...] = 0 + self._image[Yt[0] : Yt[1], Xt[0] : Xt[1], self.color] = image_temp else: - self._image[Yt[0]:Yt[1],Xt[0]:Xt[1],...] = image[:,:,:,0,0] - - def make_tile(self,x_min,x_max,y_min,y_max,color=None): - """make_tile Create a supertile + self._image[Yt[0] : Yt[1], Xt[0] : Xt[1], ...] = image[:, :, :, 0, 0] + + def make_tile(self, x_min, x_max, y_min, y_max, color=None): + """make_tile Create a supertile. This method identifies images that have stitching vector positions within the bounds of the supertile defined by the x and y input @@ -130,49 +145,60 @@ def make_tile(self,x_min,x_max,y_min,y_max,color=None): Returns: [type]: [description] """ - self._X_offset = x_min self._Y_offset = y_min # Get the data type - br = BioReader(str(Path(self._file_path).joinpath(self._file_dict['filePos'][0]['file']))) - dtype = br._pix['type'] + br = BioReader( + str(Path(self._file_path).joinpath(self._file_dict["filePos"][0]["file"])), + ) + dtype = br._pix["type"] # initialize the image - if color!=None: - self._image = np.full((y_max-y_min,x_max-x_min,4),color,dtype=dtype) + if color is not None: + self._image = np.full((y_max - y_min, x_max - x_min, 4), color, dtype=dtype) else: - self._image = np.zeros((y_max-y_min,x_max-x_min,1),dtype=dtype) + self._image = np.zeros((y_max - y_min, x_max - x_min, 1), dtype=dtype) # get images in bounds of current super tile - with ThreadPoolExecutor(max([self._max_workers,2])) as executor: - for f in self._file_dict['filePos']: - if (f['posX'] >= x_min and f['posX'] <= x_max) or (f['posX']+f['width'] >= x_min and f['posX']+f['width'] <= x_max): - if (f['posY'] >= y_min and f['posY'] <= y_max) or (f['posY']+f['height'] >= y_min and f['posY']+f['height'] <= y_max): - - # get bounds of image within the tile - Xt = [max(0,f['posX']-x_min)] - Xt.append(min(x_max-x_min,f['posX']+f['width']-x_min)) - Yt = [max(0,f['posY']-y_min)] - Yt.append(min(y_max-y_min,f['posY']+f['height']-y_min)) - - # get bounds of image within the image - Xi = [max(0,x_min - f['posX'])] - Xi.append(min(f['width'],x_max - f['posX'])) - Yi = [max(0,y_min - f['posY'])] - Yi.append(min(f['height'],y_max - f['posY'])) - - # self.buffer_image(str(Path(self._file_path).joinpath(f['file'])),Xi,Yi,Xt,Yt,color) - executor.submit(self.buffer_image,str(Path(self._file_path).joinpath(f['file'])),Xi,Yi,Xt,Yt,color) - - def _parse_stitch(self,stitchPath,imagePath): - """ Load and parse image stitching vectors - + with ThreadPoolExecutor(max([self._max_workers, 2])) as executor: + for f in self._file_dict["filePos"]: + if ((f["posX"] >= x_min and f["posX"] <= x_max) or ( + f["posX"] + f["width"] >= x_min and f["posX"] + f["width"] <= x_max + )) and ((f["posY"] >= y_min and f["posY"] <= y_max) or ( + f["posY"] + f["height"] >= y_min + and f["posY"] + f["height"] <= y_max + )): + # get bounds of image within the tile + Xt = [max(0, f["posX"] - x_min)] + Xt.append(min(x_max - x_min, f["posX"] + f["width"] - x_min)) + Yt = [max(0, f["posY"] - y_min)] + Yt.append(min(y_max - y_min, f["posY"] + f["height"] - y_min)) + + # get bounds of image within the image + Xi = [max(0, x_min - f["posX"])] + Xi.append(min(f["width"], x_max - f["posX"])) + Yi = [max(0, y_min - f["posY"])] + Yi.append(min(f["height"], y_max - f["posY"])) + + executor.submit( + self.buffer_image, + str(Path(self._file_path).joinpath(f["file"])), + Xi, + Yi, + Xt, + Yt, + color, + ) + + def _parse_stitch(self, stitchPath, imagePath): + """Load and parse image stitching vectors. + This function creates a list of file dictionaries that include the filename and pixel position and dimensions within a stitched image. It also determines the size of the final stitched image and the suggested name of the output image based on differences in file names in the stitching vector. - + This method originally appeared in the image assembler plugin: https://github.com/Nicholas-Schaub/polus-plugins/blob/imageassembler/polus-image-assembler-plugin/src/main.py @@ -183,84 +209,93 @@ def _parse_stitch(self,stitchPath,imagePath): Outputs: out_dict - Dictionary with keys (width, height, name, filePos) """ - # Initialize the output - out_dict = {'width': int(0), - 'height': int(0), - 'filePos': []} + out_dict = {"width": int(0), "height": int(0), "filePos": []} # Set the regular expression used to parse each line of the stitching vector - line_regex = r"file: (.*); corr: (.*); position: \((.*), (.*)\); grid: \((.*), (.*)\);" + line_regex = ( + r"file: (.*); corr: (.*); position: \((.*), (.*)\); grid: \((.*), (.*)\);" + ) # Get a list of all images in imagePath images = [p.name for p in Path(imagePath).iterdir()] # Open each stitching vector fpath = str(Path(stitchPath).absolute()) - name_pos = {} - with open(fpath,'r') as fr: - + with open(fpath) as fr: # Read the first line to get the filename for comparison to all other filenames line = fr.readline() - stitch_groups = re.match(line_regex,line) - stitch_groups = {key:val for key,val in zip(STITCH_VARS,stitch_groups.groups())} - name = stitch_groups['file'] - name_ind = [i for i in range(len(name))] - fr.seek(0) # reset to the first line + stitch_groups = re.match(line_regex, line) + stitch_groups = dict(zip(STITCH_VARS, stitch_groups.groups())) + name = stitch_groups["file"] + list(range(len(name))) + fr.seek(0) # reset to the first line # Read each line in the stitching vector for line in fr: # Read and parse values from the current line - stitch_groups = re.match(line_regex,line) - stitch_groups = {key:get_number(val) for key,val in zip(STITCH_VARS,stitch_groups.groups())} - + stitch_groups = re.match(line_regex, line) + stitch_groups = { + key: get_number(val) + for key, val in zip(STITCH_VARS, stitch_groups.groups()) + } + # If an image in the vector doesn't match an image in the collection, then skip it - if stitch_groups['file'] not in images: + if stitch_groups["file"] not in images: continue # Get the image size - stitch_groups['width'], stitch_groups['height'] = BioReader.image_size(str(Path(imagePath).joinpath(stitch_groups['file']).absolute())) - if out_dict['width'] < stitch_groups['width']+stitch_groups['posX']: - out_dict['width'] = stitch_groups['width']+stitch_groups['posX'] - if out_dict['height'] < stitch_groups['height']+stitch_groups['posY']: - out_dict['height'] = stitch_groups['height']+stitch_groups['posY'] + stitch_groups["width"], stitch_groups["height"] = BioReader.image_size( + str(Path(imagePath).joinpath(stitch_groups["file"]).absolute()), + ) + if out_dict["width"] < stitch_groups["width"] + stitch_groups["posX"]: + out_dict["width"] = stitch_groups["width"] + stitch_groups["posX"] + if out_dict["height"] < stitch_groups["height"] + stitch_groups["posY"]: + out_dict["height"] = stitch_groups["height"] + stitch_groups["posY"] # Set the stitching vector values in the file dictionary - out_dict['filePos'].append(stitch_groups) + out_dict["filePos"].append(stitch_groups) return out_dict - - def read_image(self,X,Y,Z,color=None): + + def read_image(self, X, Y, Z, color=None): if X[0] >= self.X[0] and X[1] <= self.X[1]: if Y[0] >= self.Y[0] and Y[1] <= self.Y[1]: if Z[0] >= self.Z[0] and Z[1] <= self.Z[1]: - return self._image[Y[0]-self._Y_offset:Y[1]-self._Y_offset, - X[0]-self._X_offset:X[1]-self._X_offset,...] + return self._image[ + Y[0] - self._Y_offset : Y[1] - self._Y_offset, + X[0] - self._X_offset : X[1] - self._X_offset, + ..., + ] else: - raise ValueError('Z must be [0,1]') - - x_min = 2**13 * (X[0]//2**13) - x_max = min([x_min+2**13,self._file_dict['width']]) - y_min = 2**13 * (Y[0]//2**13) - y_max = min([y_min+2**13,self._file_dict['height']]) - + msg = "Z must be [0,1]" + raise ValueError(msg) + + x_min = 2**13 * (X[0] // 2**13) + x_max = min([x_min + 2**13, self._file_dict["width"]]) + y_min = 2**13 * (Y[0] // 2**13) + y_max = min([y_min + 2**13, self._file_dict["height"]]) + self._X_offset = x_min self._Y_offset = y_min - - self.make_tile(x_min,x_max,y_min,y_max,color) - - return self._image[Y[0]-self._Y_offset:Y[1]-self._Y_offset, - X[0]-self._X_offset:X[1]-self._X_offset,...] + + self.make_tile(x_min, x_max, y_min, y_max, color) + + return self._image[ + Y[0] - self._Y_offset : Y[1] - self._Y_offset, + X[0] - self._X_offset : X[1] - self._X_offset, + ..., + ] + def _avg2(image): - """ Average pixels together with optical field 2x2 and stride 2 - + """Average pixels together with optical field 2x2 and stride 2. + Inputs: image - numpy array with only two dimensions (m,n) Outputs: avg_img - numpy array with only two dimensions (round(m/2),round(n/2)) """ - # The data fed into this is the same as the native file format. # We need to make sure the type will not cause overflow - NJS if image.dtype == np.uint8: @@ -271,43 +306,55 @@ def _avg2(image): dtype = np.uint64 else: dtype = image.dtype - + odtype = image.dtype image = image.astype(dtype) imgshape = image.shape ypos = imgshape[0] xpos = imgshape[1] - - y_max = ypos - ypos % 2 # if odd then subtracting 1 + + y_max = ypos - ypos % 2 # if odd then subtracting 1 x_max = xpos - xpos % 2 - avg_imgshape = np.ceil([d/2 for d in imgshape]).astype(int) - avg_imgshape[2] = 4 # Only deal with color images in color pyramid builder plugin - avg_img = np.zeros(avg_imgshape,dtype=dtype) - avg_img[0:int(y_max/2),0:int(x_max/2),:]= (\ - image[0:y_max-1:2,0:x_max-1:2,:] + \ - image[1:y_max:2 ,0:x_max-1:2,:] + \ - image[0:y_max-1:2,1:x_max:2 ,:] + \ - image[1:y_max:2 ,1:x_max:2 ,:])/4 + avg_imgshape = np.ceil([d / 2 for d in imgshape]).astype(int) + avg_imgshape[2] = 4 # Only deal with color images in color pyramid builder plugin + avg_img = np.zeros(avg_imgshape, dtype=dtype) + avg_img[0 : int(y_max / 2), 0 : int(x_max / 2), :] = ( + image[0 : y_max - 1 : 2, 0 : x_max - 1 : 2, :] + + image[1:y_max:2, 0 : x_max - 1 : 2, :] + + image[0 : y_max - 1 : 2, 1:x_max:2, :] + + image[1:y_max:2, 1:x_max:2, :] + ) / 4 return avg_img.astype(odtype) -def _get_higher_res(S,bfio_reader,slide_writer,encoder,alpha,color=None,stitch=False,X=None,Y=None): - """ Recursive function for pyramid building - + +def _get_higher_res( + S, + bfio_reader, + slide_writer, + encoder, + alpha, + color=None, + stitch=False, + X=None, + Y=None, +): + """Recursive function for pyramid building. + This is a recursive function that builds an image pyramid by indicating an original region of an image at a given scale. This function then builds a pyramid up from the highest resolution components of the pyramid (the original images) to the given position resolution. - + As an example, imagine the following possible pyramid: - + Scale S=0 1234 / \ Scale S=1 12 34 - / \ / \ + / \\ / \ Scale S=2 1 2 3 4 - + At scale 2 (the highest resolution) there are 4 original images. At scale 1, images are averaged and concatenated into one image (i.e. image 12). Calling this function using S=0 will attempt to generate 1234 by calling this @@ -315,11 +362,11 @@ def _get_higher_res(S,bfio_reader,slide_writer,encoder,alpha,color=None,stitch=F get image 1 and then image 2. Note that this function actually builds images in quadrants (top left and right, bottom left and right) rather than two sections as displayed above. - + Due to the nature of how this function works, it is possible to build a pyramid in parallel, since building the subpyramid under image 12 can be run independently of the building of subpyramid under 34. - + Inputs: S - Top level scale from which the pyramid will be built bfio_reader - List of BioReader objects used to read the tiled tiffs @@ -332,123 +379,149 @@ def _get_higher_res(S,bfio_reader,slide_writer,encoder,alpha,color=None,stitch=F """ # Get the scale info scale_info = None - for res in encoder.info['scales']: - if int(res['key'])==S: + for res in encoder.info["scales"]: + if int(res["key"]) == S: scale_info = res break - if scale_info==None: - ValueError("No scale information for resolution {}.".format(S)) - + if scale_info is None: + ValueError(f"No scale information for resolution {S}.") + # Channels designates color indices defining the following colors: # red, green, blue, yellow, cyan, magenta, gray # When creating the image, if the 3rd value in the bfio_reader list is # defined, then the image is defined by channels[2], or blue. - channels = [[0,3], - [1,3], - [2,3], - [0,1,3], - [0,2,3], - [1,2,3], - [0,1,2,3]] - - if X == None: - X = [0,scale_info['size'][0]] - if Y == None: - Y = [0,scale_info['size'][1]] - Z = [0,1] - + channels = [[0, 3], [1, 3], [2, 3], [0, 1, 3], [0, 2, 3], [1, 2, 3], [0, 1, 2, 3]] + + if X is None: + X = [0, scale_info["size"][0]] + if Y is None: + Y = [0, scale_info["size"][1]] + Z = [0, 1] + # Modify upper bound to stay within resolution dimensions - if X[1] > scale_info['size'][0]: - X[1] = scale_info['size'][0] - if Y[1] > scale_info['size'][1]: - Y[1] = scale_info['size'][1] + if X[1] > scale_info["size"][0]: + X[1] = scale_info["size"][0] + if Y[1] > scale_info["size"][1]: + Y[1] = scale_info["size"][1] # Initialize the output - image = np.zeros((Y[1]-Y[0],X[1]-X[0],4),dtype=np.uint8) + image = np.zeros((Y[1] - Y[0], X[1] - X[0], 4), dtype=np.uint8) if not alpha: - image[:,:,3] = 255 - + image[:, :, 3] = 255 + # If requesting from the lowest scale, then just read the images - if str(S)==encoder.info['scales'][0]['key']: - for ind,br in enumerate(bfio_reader): - if br == None: + if str(S) == encoder.info["scales"][0]["key"]: + for ind, br in enumerate(bfio_reader): + if br is None: continue - if isinstance(br,BioAssembler): + if isinstance(br, BioAssembler): br.color = channels[ind] - image_color_temp = br.read_image(X,Y,Z,color).astype(np.uint8) + image_color_temp = br.read_image(X, Y, Z, color).astype(np.uint8) else: - image_temp = (255*(br.read_image(X=X,Y=Y,Z=Z)[...,0,0].astype(np.float32) - br.bounds[0])/(br.bounds[1] - br.bounds[0])) - image_temp[image_temp>255] = 255 - image_temp[image_temp<0] = 0 + image_temp = ( + 255 + * ( + br.read_image(X=X, Y=Y, Z=Z)[..., 0, 0].astype(np.float32) + - br.bounds[0] + ) + / (br.bounds[1] - br.bounds[0]) + ) + image_temp[image_temp > 255] = 255 + image_temp[image_temp < 0] = 0 image_temp = image_temp.astype(np.uint8) image_color_temp = copy.deepcopy(image) - image_color_temp[:,:,channels[ind]] = image_temp + image_color_temp[:, :, channels[ind]] = image_temp del image_temp - image = np.maximum(image,image_color_temp) + image = np.maximum(image, image_color_temp) else: # Set the subgrid dimensions - subgrid_dims = [[2*X[0],2*X[1]],[2*Y[0],2*Y[1]],[0,1]] + subgrid_dims = [[2 * X[0], 2 * X[1]], [2 * Y[0], 2 * Y[1]], [0, 1]] for dim in subgrid_dims: - while dim[1]-dim[0] > CHUNK_SIZE: - dim.insert(1,dim[0] + ((dim[1] - dim[0]-1)//CHUNK_SIZE) * CHUNK_SIZE) - - def load_and_scale(*args,**kwargs): + while dim[1] - dim[0] > CHUNK_SIZE: + dim.insert( + 1, dim[0] + ((dim[1] - dim[0] - 1) // CHUNK_SIZE) * CHUNK_SIZE, + ) + + def load_and_scale(*args, **kwargs): sub_image = _get_higher_res(**kwargs) image = args[0] x_ind = args[1] y_ind = args[2] - image[y_ind[0]:y_ind[1],x_ind[0]:x_ind[1],0:4] = _avg2(sub_image) - - if (S % 2 == 0 or str(S+1)==encoder.info['scales'][0]['key']) and not stitch: + image[y_ind[0] : y_ind[1], x_ind[0] : x_ind[1], 0:4] = _avg2(sub_image) + + if ( + S % 2 == 0 or str(S + 1) == encoder.info["scales"][0]["key"] + ) and not stitch: with ThreadPoolExecutor() as executor: - for y in range(0,len(subgrid_dims[1])-1): - y_ind = [subgrid_dims[1][y] - subgrid_dims[1][0],subgrid_dims[1][y+1] - subgrid_dims[1][0]] - y_ind = [np.ceil(yi/2).astype('int') for yi in y_ind] - for x in range(0,len(subgrid_dims[0])-1): - x_ind = [subgrid_dims[0][x] - subgrid_dims[0][0],subgrid_dims[0][x+1] - subgrid_dims[0][0]] - x_ind = [np.ceil(xi/2).astype('int') for xi in x_ind] - executor.submit(load_and_scale, - image,x_ind,y_ind, # args - alpha=alpha, # kwargs - X=subgrid_dims[0][x:x+2], - Y=subgrid_dims[1][y:y+2], - S=S+1, - bfio_reader=bfio_reader, - slide_writer=slide_writer, - encoder=encoder) + for y in range(0, len(subgrid_dims[1]) - 1): + y_ind = [ + subgrid_dims[1][y] - subgrid_dims[1][0], + subgrid_dims[1][y + 1] - subgrid_dims[1][0], + ] + y_ind = [np.ceil(yi / 2).astype("int") for yi in y_ind] + for x in range(0, len(subgrid_dims[0]) - 1): + x_ind = [ + subgrid_dims[0][x] - subgrid_dims[0][0], + subgrid_dims[0][x + 1] - subgrid_dims[0][0], + ] + x_ind = [np.ceil(xi / 2).astype("int") for xi in x_ind] + executor.submit( + load_and_scale, + image, + x_ind, + y_ind, # args + alpha=alpha, # kwargs + X=subgrid_dims[0][x : x + 2], + Y=subgrid_dims[1][y : y + 2], + S=S + 1, + bfio_reader=bfio_reader, + slide_writer=slide_writer, + encoder=encoder, + ) else: - for y in range(0,len(subgrid_dims[1])-1): - y_ind = [subgrid_dims[1][y] - subgrid_dims[1][0],subgrid_dims[1][y+1] - subgrid_dims[1][0]] - y_ind = [np.ceil(yi/2).astype('int') for yi in y_ind] - for x in range(0,len(subgrid_dims[0])-1): - x_ind = [subgrid_dims[0][x] - subgrid_dims[0][0],subgrid_dims[0][x+1] - subgrid_dims[0][0]] - x_ind = [np.ceil(xi/2).astype('int') for xi in x_ind] - load_and_scale(image,x_ind,y_ind, # args - alpha=alpha, # kwargs - X=subgrid_dims[0][x:x+2], - Y=subgrid_dims[1][y:y+2], - S=S+1, - bfio_reader=bfio_reader, - slide_writer=slide_writer, - encoder=encoder, - color=color, - stitch=stitch) + for y in range(0, len(subgrid_dims[1]) - 1): + y_ind = [ + subgrid_dims[1][y] - subgrid_dims[1][0], + subgrid_dims[1][y + 1] - subgrid_dims[1][0], + ] + y_ind = [np.ceil(yi / 2).astype("int") for yi in y_ind] + for x in range(0, len(subgrid_dims[0]) - 1): + x_ind = [ + subgrid_dims[0][x] - subgrid_dims[0][0], + subgrid_dims[0][x + 1] - subgrid_dims[0][0], + ] + x_ind = [np.ceil(xi / 2).astype("int") for xi in x_ind] + load_and_scale( + image, + x_ind, + y_ind, # args + alpha=alpha, # kwargs + X=subgrid_dims[0][x : x + 2], + Y=subgrid_dims[1][y : y + 2], + S=S + 1, + bfio_reader=bfio_reader, + slide_writer=slide_writer, + encoder=encoder, + color=color, + stitch=stitch, + ) # Encode the chunk image_encoded = encoder.encode(image) - + # Write the chunk - slide_writer.store_chunk(image_encoded,str(S),(X[0],X[1],Y[0],Y[1],0,1)) + slide_writer.store_chunk(image_encoded, str(S), (X[0], X[1], Y[0], Y[1], 0, 1)) return image + # Modified and condensed from FileAccessor class in neuroglancer-scripts # https://github.com/HumanBrainProject/neuroglancer-scripts/blob/master/src/neuroglancer_scripts/file_accessor.py -class PyramidWriter(): - """ Pyramid file writing base class +class PyramidWriter: + """Pyramid file writing base class This class should not be called directly. It should be inherited by a pyramid writing class type. - + Inputs: base_dir - Where pyramid folders and info file will be stored """ @@ -456,24 +529,24 @@ class PyramidWriter(): can_write = True chunk_pattern = None - def __init__(self, base_dir): + def __init__(self, base_dir) -> None: self.base_path = Path(base_dir) def store_chunk(self, buf, key, chunk_coords): - """ Store a pyramid chunk - + """Store a pyramid chunk. + Inputs: buf - byte stream to save to disk key - pyramid scale, folder to save chunk to chunk_coords - X,Y,Z coordinates of data in buf """ try: - self._write_chunk(key,chunk_coords,buf) + self._write_chunk(key, chunk_coords, buf) except OSError as exc: + msg = f"Error storing chunk {self._chunk_path(key, chunk_coords)} in {self.base_path}: {exc}" raise FileNotFoundError( - "Error storing chunk {0} in {1}: {2}" .format( - self._chunk_path(key, chunk_coords), - self.base_path, exc)) + msg, + ) def _chunk_path(self, key, chunk_coords, pattern=None): if pattern is None: @@ -482,37 +555,43 @@ def _chunk_path(self, key, chunk_coords, pattern=None): chunk_filename = pattern.format(*chunk_coords, key=key) return self.base_path / chunk_filename - def _chunk_coords(self,chunk_coords): + def _chunk_coords(self, chunk_coords): return chunk_coords - def _write_chunk(self,key,chunk_path,buf): + def _write_chunk(self, key, chunk_path, buf): NotImplementedError("_write_chunk was never implemented.") + class DeepZoomWriter(PyramidWriter): - """ Method to write a DeepZoom pyramid - + """Method to write a DeepZoom pyramid. + Inputs: base_dir - Where pyramid folders and info file will be stored """ - def __init__(self, base_dir): + def __init__(self, base_dir) -> None: super().__init__(base_dir) self.chunk_pattern = "{key}/{0}_{1}.png" - def _chunk_coords(self,chunk_coords): - chunk_coords = [chunk_coords[0]//CHUNK_SIZE,chunk_coords[2]//CHUNK_SIZE] - return chunk_coords + def _chunk_coords(self, chunk_coords): + return [chunk_coords[0] // CHUNK_SIZE, chunk_coords[2] // CHUNK_SIZE] - def _write_chunk(self,key,chunk_coords,buf): - chunk_path = self._chunk_path(key,chunk_coords) + def _write_chunk(self, key, chunk_coords, buf): + chunk_path = self._chunk_path(key, chunk_coords) os.makedirs(str(chunk_path.parent), exist_ok=True) - imageio.imwrite(str(chunk_path.with_name(chunk_path.name)),buf,format='PNG-FI',compression=1) - + imageio.imwrite( + str(chunk_path.with_name(chunk_path.name)), + buf, + format="PNG-FI", + compression=1, + ) + + # Modified and condensed from multiple functions and classes # https://github.com/HumanBrainProject/neuroglancer-scripts/blob/master/src/neuroglancer_scripts/chunk_encoding.py class NeuroglancerChunkEncoder: - """ Encode chunks from Numpy array to byte buffer. - + """Encode chunks from Numpy array to byte buffer. + Inputs: info - info dictionary """ @@ -520,120 +599,139 @@ class NeuroglancerChunkEncoder: # Data types used by Neuroglancer DATA_TYPES = ("uint8", "uint16", "uint32", "uint64", "float32") - def __init__(self, info): - + def __init__(self, info) -> None: try: data_type = info["data_type"] num_channels = info["num_channels"] except KeyError as exc: - raise KeyError("The info dict is missing an essential key {0}" - .format(exc)) from exc + msg = f"The info dict is missing an essential key {exc}" + raise KeyError( + msg, + ) from exc if not isinstance(num_channels, int) or not num_channels > 0: - raise KeyError("Invalid value {0} for num_channels (must be " - "a positive integer)".format(num_channels)) + msg = f"Invalid value {num_channels} for num_channels (must be a positive integer)" + raise KeyError( + msg, + ) if data_type not in self.DATA_TYPES: - raise KeyError("Invalid data_type {0} (should be one of {1})" - .format(data_type, self.DATA_TYPES)) - + msg = f"Invalid data_type {data_type} (should be one of {self.DATA_TYPES})" + raise KeyError( + msg, + ) + self.info = info self.num_channels = num_channels self.dtype = np.dtype(data_type).newbyteorder("<") -class DeepZoomChunkEncoder(NeuroglancerChunkEncoder): +class DeepZoomChunkEncoder(NeuroglancerChunkEncoder): # Data types used by Neuroglancer DATA_TYPES = ("uint8", "uint16", "uint32", "uint64", "float32") - def __init__(self, info): - """ Properly formats numpy array for DeepZoom pyramid. - + def __init__(self, info) -> None: + """Properly formats numpy array for DeepZoom pyramid. + Inputs: info - info dictionary """ - super().__init__(info) def encode(self, chunk): - """ Squeeze the input array. + """Squeeze the input array. Inputs: chunk - array with four dimensions (C, Z, Y, X) Outputs: - buf - encoded chunk (byte stream) + buf - encoded chunk (byte stream). """ # Check to make sure the data is formatted properly assert chunk.ndim == 3 return chunk -def bfio_metadata_to_slide_info(bfio_reader,outPath): - """ Generate a Neuroglancer info file from Bioformats metadata - + +def bfio_metadata_to_slide_info(bfio_reader, outPath): + """Generate a Neuroglancer info file from Bioformats metadata. + Neuroglancer requires an info file in the root of the pyramid directory. All information necessary for this info file is contained in Bioformats metadata, so this function takes the metadata and generates the info file. - + Inputs: bfio_reader - A BioReader object outPath - Path to directory where pyramid will be generated Outputs: info - A dictionary containing the information in the info file """ - # Get metadata info from the bfio reader - sizes = [bfio_reader.num_x(),bfio_reader.num_y(),bfio_reader.num_z()] + sizes = [bfio_reader.num_x(), bfio_reader.num_y(), bfio_reader.num_z()] phys_x = bfio_reader.physical_size_x() if None in phys_x: - phys_x = (1000,'nm') + phys_x = (1000, "nm") phys_y = bfio_reader.physical_size_y() if None in phys_y: - phys_y = (1000,'nm') + phys_y = (1000, "nm") resolution = [phys_x[0] * UNITS[phys_x[1]]] resolution.append(phys_y[0] * UNITS[phys_y[1]]) - resolution.append((phys_y[0] * UNITS[phys_y[1]] + phys_x[0] * UNITS[phys_x[1]])/2) # Just used as a placeholder - dtype = bfio_reader.read_image(X=[0,1024],Y=[0,1024],Z=[0,1]).dtype - + resolution.append( + (phys_y[0] * UNITS[phys_y[1]] + phys_x[0] * UNITS[phys_x[1]]) / 2, + ) # Just used as a placeholder + dtype = bfio_reader.read_image(X=[0, 1024], Y=[0, 1024], Z=[0, 1]).dtype + num_scales = int(np.log2(max(sizes))) + 1 - + # create a scales template, use the full resolution8 scales = { - "chunk_sizes":[[CHUNK_SIZE,CHUNK_SIZE,1]], - "encoding":"raw", + "chunk_sizes": [[CHUNK_SIZE, CHUNK_SIZE, 1]], + "encoding": "raw", "key": str(num_scales), - "resolution":resolution, - "size":sizes, - "voxel_offset":[0,0,0] + "resolution": resolution, + "size": sizes, + "voxel_offset": [0, 0, 0], } - + # initialize the json dictionary info = { "data_type": dtype, - "num_channels":1, - "scales": [scales], # Will build scales below - "type": "image" + "num_channels": 1, + "scales": [scales], # Will build scales below + "type": "image", } - - for i in range(1,num_scales+1): - previous_scale = info['scales'][-1] + + for i in range(1, num_scales + 1): + previous_scale = info["scales"][-1] current_scale = copy.deepcopy(previous_scale) - current_scale['key'] = str(num_scales - i) - current_scale['size'] = [int(np.ceil(previous_scale['size'][0]/2)),int(np.ceil(previous_scale['size'][1]/2)),1] - current_scale['resolution'] = [2*previous_scale['resolution'][0],2*previous_scale['resolution'][1],previous_scale['resolution'][2]] - info['scales'].append(current_scale) - + current_scale["key"] = str(num_scales - i) + current_scale["size"] = [ + int(np.ceil(previous_scale["size"][0] / 2)), + int(np.ceil(previous_scale["size"][1] / 2)), + 1, + ] + current_scale["resolution"] = [ + 2 * previous_scale["resolution"][0], + 2 * previous_scale["resolution"][1], + previous_scale["resolution"][2], + ] + info["scales"].append(current_scale) + return info -def dzi_file(bfio_reader,outPath,imageNum): + +def dzi_file(bfio_reader, outPath, imageNum): # Create an output path object for the info file - op = Path(outPath).parent.joinpath("{}.dzi".format(imageNum)) - + op = Path(outPath).parent.joinpath(f"{imageNum}.dzi") + # DZI file template DZI = '' - + # Get pyramid info - info = bfio_metadata_to_slide_info(bfio_reader,outPath) + info = bfio_metadata_to_slide_info(bfio_reader, outPath) # write the dzi file - with open(op,'w') as writer: - writer.write(DZI.format(CHUNK_SIZE,info['scales'][0]['size'][0],info['scales'][0]['size'][1])) - + with open(op, "w") as writer: + writer.write( + DZI.format( + CHUNK_SIZE, info["scales"][0]["size"][0], info["scales"][0]["size"][1], + ), + ) + return info diff --git a/visualization/polus-feature-heatmap-pyramid-plugin/Dockerfile b/visualization/polus-feature-heatmap-pyramid-plugin/Dockerfile index 0306aa237..c884b8bcb 100644 --- a/visualization/polus-feature-heatmap-pyramid-plugin/Dockerfile +++ b/visualization/polus-feature-heatmap-pyramid-plugin/Dockerfile @@ -14,4 +14,4 @@ WORKDIR ${EXEC_DIR} RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/visualization/polus-feature-heatmap-pyramid-plugin/VERSION b/visualization/polus-feature-heatmap-pyramid-plugin/VERSION index 341cf11fa..0ea3a944b 100644 --- a/visualization/polus-feature-heatmap-pyramid-plugin/VERSION +++ b/visualization/polus-feature-heatmap-pyramid-plugin/VERSION @@ -1 +1 @@ -0.2.0 \ No newline at end of file +0.2.0 diff --git a/visualization/polus-feature-heatmap-pyramid-plugin/build-docker.sh b/visualization/polus-feature-heatmap-pyramid-plugin/build-docker.sh index 64096d97b..714b5d7af 100755 --- a/visualization/polus-feature-heatmap-pyramid-plugin/build-docker.sh +++ b/visualization/polus-feature-heatmap-pyramid-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$( fcheck: + if fnum // 1000 > fcheck: fcheck += 1 - logger.info('Files parsed: {}'.format(fnum)) + logger.info(f"Files parsed: {fnum}") + + return unique_width, unique_height + - return unique_width,unique_height +def _parse_features(featurePath, fp, method): + """Load and parse the feature list. -def _parse_features(featurePath,fp,method): - """ Load and parse the feature list - This function adds mean feature values to the FilePattern object (fp) for every image in the FilePattern object if the image is listed in the feature csv file. @@ -275,22 +305,32 @@ def _parse_features(featurePath,fp,method): unique_height - List of all unique heights (in pixels) in the image stitching vectors """ # Get the csv files containing features - csv_files = [f.name for f in Path(featurePath).iterdir() if f.is_file() and f.suffix=='.csv'] + csv_files = [ + f.name + for f in Path(featurePath).iterdir() + if f.is_file() and f.suffix == ".csv" + ] # Unique list of features and values feature_list = {} - + # Open each csv files for feat_file in csv_files: - fpath = os.path.join(featurePath,feat_file) - with open(fpath,'r') as fr: + fpath = os.path.join(featurePath, feat_file) + with open(fpath) as fr: # Read the first line, which should contain headers first_line = fr.readline() - headers = first_line.rstrip('\n').split(',') - var_ind = {key:val for key,val in enumerate(headers)} # map headers to line positions + headers = first_line.rstrip("\n").split(",") + var_ind = dict(enumerate(headers)) # map headers to line positions # Add unique features to the feature_list - feature_list.update({key:[] for key in headers if key not in feature_list.keys() and key != 'file'}) + feature_list.update( + { + key: [] + for key in headers + if key not in feature_list.keys() and key != "file" + }, + ) # Get the first line of data line = fr.readline() @@ -300,127 +340,177 @@ def _parse_features(featurePath,fp,method): fcheck = 0 while line: # Parse the current line as a dictionary - p_line = {var_ind[ind]:val for ind,val in enumerate(line.rstrip('\n').split(','))} - for key,val in p_line.items(): + p_line = { + var_ind[ind]: val + for ind, val in enumerate(line.rstrip("\n").split(",")) + } + for key, val in p_line.items(): v = get_number(val) - if isinstance(v,float): + if isinstance(v, float): p_line[key] = [v] - elif key != 'file': + elif key != "file": p_line[key] = [] # Get the image associated with the current line - current_image = _get_file_dict(fp,p_line['file']) + current_image = _get_file_dict(fp, p_line["file"]) - if current_image == None or 'line' not in current_image.keys(): + if current_image is None or "line" not in current_image.keys(): line = fr.readline() continue # Loop through rows until the filename changes line = fr.readline() - np_line = {var_ind[ind]:val for ind,val in enumerate(line.rstrip('\n').split(','))} - while line and p_line['file'] == np_line['file']: + np_line = { + var_ind[ind]: val + for ind, val in enumerate(line.rstrip("\n").split(",")) + } + while line and p_line["file"] == np_line["file"]: # Store the values in a feature list - for key,val in np_line.items(): + for key, val in np_line.items(): v = get_number(val) - if isinstance(v,float): + if isinstance(v, float): p_line[key].append(float(val)) # Get the next line line = fr.readline() - np_line = {var_ind[ind]:val for ind,val in enumerate(line.rstrip('\n').split(','))} + np_line = { + var_ind[ind]: val + for ind, val in enumerate(line.rstrip("\n").split(",")) + } # Get the mean of the feature list, save in the file dictionary - for key,val in p_line.items(): - if isinstance(val,list): + for key, val in p_line.items(): + if isinstance(val, list): try: current_image[key] = METHODS[method](val) feature_list[key].append(current_image[key]) except ZeroDivisionError: - current_image[key] = 'NaN' - + current_image[key] = "NaN" + # Checkpoint fnum += 1 - if fnum//1000 > fcheck: + if fnum // 1000 > fcheck: fcheck += 1 - logger.info('Files parsed: {}'.format(fnum)) + logger.info(f"Files parsed: {fnum}") return feature_list -if __name__=="__main__": + +if __name__ == "__main__": # Setup the argument parsing logger.info("Parsing arguments...") - parser = argparse.ArgumentParser(prog='main', description='Build a heatmap pyramid for features values in a csv as an overlay for another pyramid.') - parser.add_argument('--features', dest='features', type=str, - help='CSV collection containing features', required=True) - parser.add_argument('--inpDir', dest='inpDir', type=str, - help='Input image collection used to build a pyramid that this plugin will make an overlay for', required=True) - parser.add_argument('--method', dest='method', type=str, - help='Method used to create the heatmap', required=True) - parser.add_argument('--vector', dest='vector', type=str, - help='Stitching vector used to buld the image pyramid.', required=True) - parser.add_argument('--outImages', dest='outImages', type=str, - help='Heatmap Output Images', required=True) - parser.add_argument('--vectorInMetadata', dest='vectorInMetadata', type=str, - help='Store stitching vector in metadata', required=True) - parser.add_argument('--outVectors', dest='outVectors', type=str, - help='Heatmap Output Vectors', required=False) - + parser = argparse.ArgumentParser( + prog="main", + description="Build a heatmap pyramid for features values in a csv as an overlay for another pyramid.", + ) + parser.add_argument( + "--features", + dest="features", + type=str, + help="CSV collection containing features", + required=True, + ) + parser.add_argument( + "--inpDir", + dest="inpDir", + type=str, + help="Input image collection used to build a pyramid that this plugin will make an overlay for", + required=True, + ) + parser.add_argument( + "--method", + dest="method", + type=str, + help="Method used to create the heatmap", + required=True, + ) + parser.add_argument( + "--vector", + dest="vector", + type=str, + help="Stitching vector used to buld the image pyramid.", + required=True, + ) + parser.add_argument( + "--outImages", + dest="outImages", + type=str, + help="Heatmap Output Images", + required=True, + ) + parser.add_argument( + "--vectorInMetadata", + dest="vectorInMetadata", + type=str, + help="Store stitching vector in metadata", + required=True, + ) + parser.add_argument( + "--outVectors", + dest="outVectors", + type=str, + help="Heatmap Output Vectors", + required=False, + ) + # Parse the arguments args = parser.parse_args() features = args.features - logger.info('features = {}'.format(features)) + logger.info(f"features = {features}") inpDir = args.inpDir - logger.info('inpDir = {}'.format(inpDir)) + logger.info(f"inpDir = {inpDir}") method = args.method - logger.info('method = {}'.format(method)) + logger.info(f"method = {method}") vector = args.vector - logger.info('vector = {}'.format(vector)) + logger.info(f"vector = {vector}") outImages = args.outImages - vectorInMetadata = args.vectorInMetadata == 'true' - logger.info('vectorInMetadata = {}'.format(vectorInMetadata)) + vectorInMetadata = args.vectorInMetadata == "true" + logger.info(f"vectorInMetadata = {vectorInMetadata}") if vectorInMetadata: - outVectors = Path(outImages).joinpath('metadata_files') + outVectors = Path(outImages).joinpath("metadata_files") outVectors.mkdir() outVectors = str(outVectors.absolute()) - outImages = Path(outImages).joinpath('images') + outImages = Path(outImages).joinpath("images") outImages.mkdir() outImages = str(outImages.absolute()) else: outVectors = args.outVectors - logger.info('outImages = {}'.format(outImages)) - logger.info('outVectors = {}'.format(outVectors)) + logger.info(f"outImages = {outImages}") + logger.info(f"outVectors = {outVectors}") # Set up the fileparser - fp = FilePattern(inpDir,'.*.ome.tif') + fp = FilePattern(inpDir, ".*.ome.tif") # Parse the stitching vector - logger.info('Parsing stitching vectors...') - widths, heights = _parse_stitch(vector,fp) + logger.info("Parsing stitching vectors...") + widths, heights = _parse_stitch(vector, fp) # Parse the features - logger.info('Parsing features...') - feature_list = _parse_features(features,fp,method) + logger.info("Parsing features...") + feature_list = _parse_features(features, fp, method) # Determine the min, max, and unique values for each data set - logger.info('Setting feature scales...') + logger.info("Setting feature scales...") feature_mins = {} feature_ranges = {} - for key,val in feature_list.items(): - valid_vals = [v for v in val if v is not 'NaN'] + for key, val in feature_list.items(): + valid_vals = [v for v in val if v != "NaN"] if len(valid_vals) == 0: feature_mins[key] = 0 feature_ranges[key] = 0 else: feature_mins[key] = min(valid_vals) - feature_ranges[key] = max(valid_vals)-feature_mins[key] + feature_ranges[key] = max(valid_vals) - feature_mins[key] unique_levels = set() for fl in fp.iterate(): - if 'line' not in fl.keys(): + if "line" not in fl.keys(): continue for ft in feature_list: try: if get_number(fl[ft]): - fl[ft] = round((fl[ft] - feature_mins[ft])/feature_ranges[ft] * 254 + 1) + fl[ft] = round( + (fl[ft] - feature_mins[ft]) / feature_ranges[ft] * 254 + 1, + ) unique_levels.update([fl[ft]]) else: fl[ft] = 0 @@ -428,46 +518,70 @@ def _parse_features(featurePath,fp,method): except ZeroDivisionError: fl[ft] = 0 unique_levels.update([0]) - + # Start the javabridge with proper java logging - logger.info('Initializing the javabridge...') + logger.info("Initializing the javabridge...") log_config = Path(__file__).parent.joinpath("log4j.properties") - jutil.start_vm(args=["-Dlog4j.configuration=file:{}".format(str(log_config.absolute()))],class_path=bioformats.JARS) + jutil.start_vm( + args=[f"-Dlog4j.configuration=file:{log_config.absolute()!s}"], + class_path=bioformats.JARS, + ) # Generate the heatmap images - logger.info('Generating heatmap images...') + logger.info("Generating heatmap images...") for w in widths: for h in heights: for l in unique_levels: - out_file = Path(outImages).joinpath(str(w) + '_' + str(h) + '_' + str(l) + '.ome.tif') + out_file = Path(outImages).joinpath( + str(w) + "_" + str(h) + "_" + str(l) + ".ome.tif", + ) if not out_file.exists(): - image = np.ones((h,w,1,1,1),dtype=np.uint8)*l - bw = BioWriter(str(Path(outImages).joinpath(str(w) + '_' + str(h) + '_' + str(l) + '.ome.tif').absolute()),X=w,Y=h,Z=1,C=1,T=1) + image = np.ones((h, w, 1, 1, 1), dtype=np.uint8) * l + bw = BioWriter( + str( + Path(outImages) + .joinpath(str(w) + "_" + str(h) + "_" + str(l) + ".ome.tif") + .absolute(), + ), + X=w, + Y=h, + Z=1, + C=1, + T=1, + ) bw.write_image(image) bw.close_image() # Close the javabridge - logger.info('Closing the javabridge...') + logger.info("Closing the javabridge...") jutil.kill_vm() # Build the output stitching vector - logger.info('Generating the heatmap...') - file_name = '{}_{}_{}.ome.tif' - for num,feat in enumerate(feature_list): - fpath = str(Path(outVectors).joinpath('img-global-positions-' + str(num+1) + '.txt').absolute()) - with open(fpath,'w') as fw: + logger.info("Generating the heatmap...") + file_name = "{}_{}_{}.ome.tif" + for num, feat in enumerate(feature_list): + fpath = str( + Path(outVectors) + .joinpath("img-global-positions-" + str(num + 1) + ".txt") + .absolute(), + ) + with open(fpath, "w") as fw: line = 0 while True: for f in fp.iterate(): - if 'line' in f and f['line'] == line: + if "line" in f and f["line"] == line: break - if 'line' in f and f['line'] == line: - fw.write("file: {}; corr: {}; position: ({}, {}); grid: ({}, {});\n".format(file_name.format(f['width'],f['height'],f[feat]), - f['correlation'], - f['posX'], - f['posY'], - f['gridX'], - f['gridY'])) + if "line" in f and f["line"] == line: + fw.write( + "file: {}; corr: {}; position: ({}, {}); grid: ({}, {});\n".format( + file_name.format(f["width"], f["height"], f[feat]), + f["correlation"], + f["posX"], + f["posY"], + f["gridX"], + f["gridY"], + ), + ) line += 1 else: - break \ No newline at end of file + break diff --git a/visualization/polus-image-cluster-annotation-plugin/Dockerfile b/visualization/polus-image-cluster-annotation-plugin/Dockerfile index ef21c451b..5547d57b2 100644 --- a/visualization/polus-image-cluster-annotation-plugin/Dockerfile +++ b/visualization/polus-image-cluster-annotation-plugin/Dockerfile @@ -2,7 +2,7 @@ FROM polusai/bfio:2.1.9 COPY VERSION / - + ARG EXEC_DIR="/opt/executables" ARG DATA_DIR="/data" @@ -18,4 +18,4 @@ RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir WORKDIR ${EXEC_DIR} -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/visualization/polus-image-cluster-annotation-plugin/README.md b/visualization/polus-image-cluster-annotation-plugin/README.md index c06fdcdfc..0b1e00087 100644 --- a/visualization/polus-image-cluster-annotation-plugin/README.md +++ b/visualization/polus-image-cluster-annotation-plugin/README.md @@ -35,5 +35,3 @@ This plugin takes four input argument if methods other than 'Manual' is selected | `--csvdir` | Input csv collection | Input | csvCollection | | `--borderwidth` | Enter border width | Input | integer | | `--outdir` | Output collection | Output | imageCollection | - - diff --git a/visualization/polus-image-cluster-annotation-plugin/build-docker.sh b/visualization/polus-image-cluster-annotation-plugin/build-docker.sh index 71e7a904d..fe263f4d0 100644 --- a/visualization/polus-image-cluster-annotation-plugin/build-docker.sh +++ b/visualization/polus-image-cluster-annotation-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$(=1.2.2 \ No newline at end of file +pandas>=1.2.2 diff --git a/visualization/polus-precompute-volume-plugin/Dockerfile b/visualization/polus-precompute-volume-plugin/Dockerfile index c1bee335a..6a6a3eea4 100644 --- a/visualization/polus-precompute-volume-plugin/Dockerfile +++ b/visualization/polus-precompute-volume-plugin/Dockerfile @@ -34,4 +34,4 @@ COPY --from=0 /opt/executables ${EXEC_DIR}/ WORKDIR ${EXEC_DIR} RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir -ENTRYPOINT ["python3", "/opt/executables/main.py"] \ No newline at end of file +ENTRYPOINT ["python3", "/opt/executables/main.py"] diff --git a/visualization/polus-precompute-volume-plugin/README.md b/visualization/polus-precompute-volume-plugin/README.md index 25b1604d0..8ec5af751 100644 --- a/visualization/polus-precompute-volume-plugin/README.md +++ b/visualization/polus-precompute-volume-plugin/README.md @@ -1,6 +1,6 @@ # Polus Precompute Volume Plugin -This WIPP plugin turns all tiled tiff images in an image collection into a [Neuroglancer precomputed format](https://github.com/google/neuroglancer/tree/master/src/neuroglancer/datasource/precomputed). The tiled tiff format and associated metadata is accessed using bfio and uses the third party library neurogen. +This WIPP plugin turns all tiled tiff images in an image collection into a [Neuroglancer precomputed format](https://github.com/google/neuroglancer/tree/master/src/neuroglancer/datasource/precomputed). The tiled tiff format and associated metadata is accessed using bfio and uses the third party library neurogen. This plugin can also creates meshes if the imagetype is 'segmentation' @@ -26,4 +26,3 @@ This plugin takes one input argument and one output argument: | `imageType` | Image/Segmentation | Input | String | | `mesh` | Generate Mesh for Labelled Data | Input | Boolean | | `imagepattern` | Image pattern | Input | String | - diff --git a/visualization/polus-precompute-volume-plugin/VERSION b/visualization/polus-precompute-volume-plugin/VERSION index c650d5af2..cb498ab2c 100644 --- a/visualization/polus-precompute-volume-plugin/VERSION +++ b/visualization/polus-precompute-volume-plugin/VERSION @@ -1 +1 @@ -0.4.8 \ No newline at end of file +0.4.8 diff --git a/visualization/polus-precompute-volume-plugin/plugin.json b/visualization/polus-precompute-volume-plugin/plugin.json index bb97cadcc..8566737ec 100644 --- a/visualization/polus-precompute-volume-plugin/plugin.json +++ b/visualization/polus-precompute-volume-plugin/plugin.json @@ -75,4 +75,4 @@ "condition": "inputs.imageType=='segmentation'" } ] -} \ No newline at end of file +} diff --git a/visualization/polus-precompute-volume-plugin/run-plugin.sh b/visualization/polus-precompute-volume-plugin/run-plugin.sh index e78793354..e047cb4b0 100755 --- a/visualization/polus-precompute-volume-plugin/run-plugin.sh +++ b/visualization/polus-precompute-volume-plugin/run-plugin.sh @@ -1,7 +1,7 @@ version=$(=1.20.3 scikit_image==0.18.2 trimesh==3.8.19 open3d==0.13.0 -bfio[all]==2.1.9 \ No newline at end of file +bfio[all]==2.1.9 diff --git a/visualization/polus-precompute-volume-plugin/src/utils.py b/visualization/polus-precompute-volume-plugin/src/utils.py index f65e3d575..72f2f86e3 100644 --- a/visualization/polus-precompute-volume-plugin/src/utils.py +++ b/visualization/polus-precompute-volume-plugin/src/utils.py @@ -1,27 +1,24 @@ -import numpy as np -import os +import logging import math - -import logging, traceback -from concurrent.futures import ThreadPoolExecutor +import os import tempfile +import traceback +from concurrent.futures import ThreadPoolExecutor +from itertools import repeat +import bfio +import numpy as np import trimesh -from skimage import measure - -from neurogen import mesh as ngmesh from neurogen import info as nginfo +from neurogen import mesh as ngmesh from neurogen import volume as ngvol - -from itertools import repeat - -import bfio +from skimage import measure # Import environment variables, if POLUS_LOG empty then automatically sets to INFO -POLUS_LOG = getattr(logging,os.environ.get('POLUS_LOG','INFO')) +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) CHUNK_SIZE = 64 -chunk_size = [CHUNK_SIZE,CHUNK_SIZE,CHUNK_SIZE] +chunk_size = [CHUNK_SIZE, CHUNK_SIZE, CHUNK_SIZE] MESH_CHUNK_SIZE = 512 mesh_chunk_size = [MESH_CHUNK_SIZE, MESH_CHUNK_SIZE, MESH_CHUNK_SIZE] @@ -29,60 +26,51 @@ bit_depth = 10 -get_dim1dim2 = lambda dimension1, dimension_size, rng_size: \ - (int(dimension1), int(min(dimension1+rng_size, dimension_size))) +def get_dim1dim2(dimension1, dimension_size, rng_size): + return int(dimension1), int(min(dimension1 + rng_size, dimension_size)) -# Initialize the logger -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') +# Initialize the logger +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) logger = logging.getLogger("utils") logger.setLevel(POLUS_LOG) -def get_resolution(phys_y : tuple, - phys_x : tuple, - phys_z : tuple): - - """ This function generates a resolution in nanometers (nm) - +def get_resolution(phys_y: tuple, phys_x: tuple, phys_z: tuple): + """This function generates a resolution in nanometers (nm). + Args: phys_y : Actual y dimension of input phys_x : Actual x dimension of input phys_z : Actual z dimension of input - - Returns: + + Returns: resolution : A list of integer values of resolution in nanometers in [Y, X, Z] order If Y and X resolutions are none, then default to 325 nm If Z resolution is none, then defaults to the average of Y and X """ # Conversion factors to nm, these are based off of supported Bioformats length units - UNITS = {'m': 10**9, - 'cm': 10**7, - 'mm': 10**6, - 'µm': 10**3, - 'nm': 1, - 'Å': 10**-1} - - if None in phys_y: - phys_y = 325 - else: - phys_y = phys_y[0] * UNITS[phys_y[1]] - if None in phys_x: - phys_x = 325 - else: - phys_x = phys_x[0] * UNITS[phys_x[1]] - if None in phys_z: - phys_z = (phys_x + phys_y)/2 - else: - phys_z = phys_z[0] * UNITS[phys_z[1]] - + UNITS = { + "m": 10**9, + "cm": 10**7, + "mm": 10**6, + "µm": 10**3, + "nm": 1, + "Å": 10**-1, + } + + phys_y = 325 if None in phys_y else phys_y[0] * UNITS[phys_y[1]] + phys_x = 325 if None in phys_x else phys_x[0] * UNITS[phys_x[1]] + phys_z = (phys_x + phys_y) / 2 if None in phys_z else phys_z[0] * UNITS[phys_z[1]] + return [phys_y, phys_x, phys_z] -def save_resolution(output_directory: str, - xyz_volume: tuple): +def save_resolution(output_directory: str, xyz_volume: tuple): """This function encodes a chunked volume. - + Args: output_directory : where the encoded image gets saved to xyz_volume : (xyz, volume) @@ -93,202 +81,265 @@ def save_resolution(output_directory: str, xyz, volume = xyz_volume x1_chunk, x2_chunk = xyz[0] y1_chunk, y2_chunk = xyz[1] - z1_chunk, z2_chunk = xyz[2] + z1_chunk, z2_chunk = xyz[2] volume = np.reshape(volume, volume.shape[:3]) - logger.debug("Global Index of Chunk being Saved: " + \ - "({0:0>4}, {0:0>4}), ".format(x1_chunk, x2_chunk) + \ - "({0:0>4}, {0:0>4}), ".format(y1_chunk, y2_chunk) + \ - "({0:0>4}, {0:0>4})".format(z1_chunk, z2_chunk)) + logger.debug( + "Global Index of Chunk being Saved: " + + "({0:0>4}, {0:0>4}), ".format(x1_chunk ) + + "({0:0>4}, {0:0>4}), ".format(y1_chunk ) + + "({0:0>4}, {0:0>4})".format(z1_chunk ), + ) volume_encoded = ngvol.encode_volume(volume) - ngvol.write_image(image=volume_encoded, volume_directory=output_directory, - y=(y1_chunk, y2_chunk), - x=(x1_chunk, x2_chunk), - z=(z1_chunk, z2_chunk)) - except Exception as e: - raise ValueError(f"Something Went Wrong!: {traceback.print_exc()}") - -def iterate_chunk_tiles(cached_image: bfio.bfio.BioReader, - x_dimensions: tuple, - y_dimensions: tuple, - z_dimensions: tuple, - chunk_tile_size: tuple): - - """ This iterator takes in a cached tile and returns chunks of the cached tile. - It is set up this way so that executor.map() can run on it. + ngvol.write_image( + image=volume_encoded, + volume_directory=output_directory, + y=(y1_chunk, y2_chunk), + x=(x1_chunk, x2_chunk), + z=(z1_chunk, z2_chunk), + ) + except Exception: + msg = f"Something Went Wrong!: {traceback.print_exc()}" + raise ValueError(msg) + + +def iterate_chunk_tiles( + cached_image: bfio.bfio.BioReader, + x_dimensions: tuple, + y_dimensions: tuple, + z_dimensions: tuple, + chunk_tile_size: tuple, +): + """This iterator takes in a cached tile and returns chunks of the cached tile. + It is set up this way so that executor.map() can run on it. + Args: cached_image: this is local cached tile that is split into chunks x_dimensions: global x dimensions of the cached tile y_dimensions: global y dimensions of the cached tile z_dimensions: global z dimensions of the cached tile chunk_tile_size: size of the chunks that we want to split the cached tile into - Returns: + Returns: (x1_chunk, x2_chunk): global x dimensions of the chunked tile (y1_chunk, y2_chunk): global y dimensions of the chunked tile (z1_chunk, z2_chunk): global z dimensions of the chunked tile - volume: the chunked volume + volume: the chunked volume. """ - for x1_chunk in range(x_dimensions[0], x_dimensions[1], chunk_tile_size[0]): for y1_chunk in range(y_dimensions[0], y_dimensions[1], chunk_tile_size[1]): for z1_chunk in range(z_dimensions[0], z_dimensions[1], chunk_tile_size[2]): - - # sometimes output is four dimensional or two dimensional, need to make sure - # thats its only three dimensional + # sometimes output is four dimensional or two dimensional, need to make sure + # thats its only three dimensional cached_image_shape = list(cached_image.shape) if len(cached_image_shape) != 3: if len(cached_image_shape) > 3: - cached_image = np.reshape(cached_image, (cached_image_shape[:3])) + cached_image = np.reshape( + cached_image, (cached_image_shape[:3]), + ) elif len(cached_image_shape) == 2: - cached_image = np.reshape(cached_image, cached_image_shape.append(1)) + cached_image = np.reshape( + cached_image, cached_image_shape.append(1), + ) else: - raise ValueError("Input Image is only 1 dimensional") + msg = "Input Image is only 1 dimensional" + raise ValueError(msg) + + x1_chunk, x2_chunk = get_dim1dim2( + x1_chunk, x_dimensions[1], chunk_tile_size[0], + ) + y1_chunk, y2_chunk = get_dim1dim2( + y1_chunk, y_dimensions[1], chunk_tile_size[1], + ) + z1_chunk, z2_chunk = get_dim1dim2( + z1_chunk, z_dimensions[1], chunk_tile_size[2], + ) + yield (x1_chunk, x2_chunk), (y1_chunk, y2_chunk), ( + z1_chunk, + z2_chunk, + ), cached_image[ + x1_chunk - x_dimensions[0] : x2_chunk - x_dimensions[0], + y1_chunk - y_dimensions[0] : y2_chunk - y_dimensions[0], + z1_chunk - z_dimensions[0] : z2_chunk - z_dimensions[0], + ] - x1_chunk, x2_chunk = get_dim1dim2(x1_chunk, x_dimensions[1], chunk_tile_size[0]) - y1_chunk, y2_chunk = get_dim1dim2(y1_chunk, y_dimensions[1], chunk_tile_size[1]) - z1_chunk, z2_chunk = get_dim1dim2(z1_chunk, z_dimensions[1], chunk_tile_size[2]) - yield (x1_chunk, x2_chunk), \ - (y1_chunk, y2_chunk), \ - (z1_chunk, z2_chunk), cached_image[x1_chunk-x_dimensions[0]:x2_chunk-x_dimensions[0], - y1_chunk-y_dimensions[0]:y2_chunk-y_dimensions[0], - z1_chunk-z_dimensions[0]:z2_chunk-z_dimensions[0]] def iterate_cache_tiles(bf_image: bfio.bfio.BioReader): + """This function iterates through the bfio object + tiles and caches the information for easy access. - """ This function iterates through the bfio object - tiles and caches the information for easy access. Args: bf_image: input image (YXZ) + Returns: - Transposed cache image with dimension order of (XYZ) - + Transposed cache image with dimension order of (XYZ). + """ - cache_tile = bf_image._TILE_SIZE for x1_cache in range(0, bf_image.X, cache_tile): for y1_cache in range(0, bf_image.Y, cache_tile): for z1_cache in range(0, bf_image.Z, cache_tile): for c1_cache in range(0, bf_image.C, cache_tile): for t1_cache in range(0, bf_image.T, cache_tile): + y1_cache, y2_cache = get_dim1dim2( + y1_cache, bf_image.Y, cache_tile, + ) + x1_cache, x2_cache = get_dim1dim2( + x1_cache, bf_image.X, cache_tile, + ) + z1_cache, z2_cache = get_dim1dim2( + z1_cache, bf_image.Z, cache_tile, + ) + c1_cache, c2_cache = get_dim1dim2( + c1_cache, bf_image.C, cache_tile, + ) + t1_cache, t2_cache = get_dim1dim2( + t1_cache, bf_image.T, cache_tile, + ) + + bf_image.cache = bf_image[ + y1_cache:y2_cache, + x1_cache:x2_cache, + z1_cache:z2_cache, + c1_cache:c2_cache, + t1_cache:t2_cache, + ] + + logger.info( + "Caching: " + + f"X ({x1_cache:0>4}-{x2_cache:0>4}), " + + f"Y ({y1_cache:0>4}-{y2_cache:0>4}), " + + f"Z ({z1_cache:0>4}-{z2_cache:0>4}), " + + f"C ({c1_cache:0>4}-{c2_cache:0>4}), " + + f"T ({t1_cache:0>4}-{t2_cache:0>4}) " + + f"has shape {bf_image.cache.shape}", + ) - y1_cache, y2_cache = get_dim1dim2(y1_cache, bf_image.Y, cache_tile) - x1_cache, x2_cache = get_dim1dim2(x1_cache, bf_image.X, cache_tile) - z1_cache, z2_cache = get_dim1dim2(z1_cache, bf_image.Z, cache_tile) - c1_cache, c2_cache = get_dim1dim2(c1_cache, bf_image.C, cache_tile) - t1_cache, t2_cache = get_dim1dim2(t1_cache, bf_image.T, cache_tile) - - bf_image.cache = bf_image[y1_cache:y2_cache, - x1_cache:x2_cache, - z1_cache:z2_cache, - c1_cache:c2_cache, - t1_cache:t2_cache] - - logger.info("Caching: " + \ - "X ({0:0>4}-{1:0>4}), ".format(x1_cache, x2_cache) + \ - "Y ({0:0>4}-{1:0>4}), ".format(y1_cache, y2_cache) + \ - "Z ({0:0>4}-{1:0>4}), ".format(z1_cache, z2_cache) + \ - "C ({0:0>4}-{1:0>4}), ".format(c1_cache, c2_cache) + \ - "T ({0:0>4}-{1:0>4}) ".format(t1_cache, t2_cache) + \ - "has shape {}".format(bf_image.cache.shape)) - # The order the image is now XYZ bf_image.cache = np.moveaxis(bf_image.cache, 0, 1) - yield (x1_cache, x2_cache, \ - y1_cache, y2_cache, \ - z1_cache, z2_cache, \ - c1_cache, c2_cache, \ - z1_cache, z2_cache, bf_image.cache) - -def get_highest_resolution_volumes(bf_image: bfio.bfio.BioReader, - resolution_directory: str): - """ This function gets the most detailed pyramid and saves it in encoded + yield ( + x1_cache, + x2_cache, + y1_cache, + y2_cache, + z1_cache, + z2_cache, + c1_cache, + c2_cache, + z1_cache, + z2_cache, + bf_image.cache, + ) + + +def get_highest_resolution_volumes( + bf_image: bfio.bfio.BioReader, resolution_directory: str, +): + """This function gets the most detailed pyramid and saves it in encoded chunks that can be processed by Neuroglancer. Args: bf_image: the image that gets read - resolution_directory: the directory that the images get saved into + resolution_directory: the directory that the images get saved into """ # get tiles of 1024 - for x1_cache, x2_cache, \ - y1_cache, y2_cache, \ - z1_cache, z2_cache, \ - c1_cache, c2_cache, \ - t1_cache, t2_cache, bf_image_cache in iterate_cache_tiles(bf_image = bf_image): - + for ( + x1_cache, + x2_cache, + y1_cache, + y2_cache, + z1_cache, + z2_cache, + c1_cache, + c2_cache, + t1_cache, + t2_cache, + bf_image_cache, + ) in iterate_cache_tiles(bf_image=bf_image): bf_image_cache_shape = bf_image_cache.shape bf_image_cache = np.reshape(bf_image_cache, bf_image_cache_shape[:3]) # use multiprocessing to encode every chunk - with ThreadPoolExecutor(max_workers = os.cpu_count()-1) as executor: - executor.map(save_resolution, - repeat(resolution_directory), - (((x_dim, y_dim, z_dim), - volume) for x_dim, \ - y_dim, \ - z_dim, \ - volume in iterate_chunk_tiles(cached_image = bf_image_cache, - x_dimensions = (x1_cache, x2_cache), - y_dimensions = (y1_cache, y2_cache), - z_dimensions = (z1_cache, z2_cache), - chunk_tile_size = chunk_size))) - -def create_plyfiles(subvolume : np.ndarray, - ids : list, - temp_dir : str, - start_y : int, - start_x : int, - start_z : int): - """ - This function generates temporary ply files of labelled segments found - in the subvolume. - + with ThreadPoolExecutor(max_workers=os.cpu_count() - 1) as executor: + executor.map( + save_resolution, + repeat(resolution_directory), + ( + ((x_dim, y_dim, z_dim), volume) + for x_dim, y_dim, z_dim, volume in iterate_chunk_tiles( + cached_image=bf_image_cache, + x_dimensions=(x1_cache, x2_cache), + y_dimensions=(y1_cache, y2_cache), + z_dimensions=(z1_cache, z2_cache), + chunk_tile_size=chunk_size, + ) + ), + ) + + +def create_plyfiles( + subvolume: np.ndarray, + ids: list, + temp_dir: str, + start_y: int, + start_x: int, + start_z: int, +): + """This function generates temporary ply files of labelled segments found + in the subvolume. + Args: subvolume : A chunk of the total volume ids : A list of labeled segments found in the subvolume temp_dir : temporary directory where outputs get saved to - start_y : The start y index of the subvolume + start_y : The start y index of the subvolume start_x : The start x index of the subvolume start_z : The start z index of the subvolume Returns: None, saves subvolumes into temporary directory """ - for iden in ids: - vertices,faces,_,_ = measure.marching_cubes((subvolume==iden).astype("uint8"), step_size=1) - root_mesh = trimesh.Trimesh(vertices=vertices, faces=faces) # creates mesh - chunk_filename = '{}_{}_{}_{}.ply'.format(iden, start_x, start_y, start_z) - export_to = os.path.join(temp_dir, chunk_filename) # saves mesh in temp directory + vertices, faces, _, _ = measure.marching_cubes( + (subvolume == iden).astype("uint8"), step_size=1, + ) + root_mesh = trimesh.Trimesh(vertices=vertices, faces=faces) # creates mesh + chunk_filename = f"{iden}_{start_x}_{start_y}_{start_z}.ply" + export_to = os.path.join( + temp_dir, chunk_filename, + ) # saves mesh in temp directory root_mesh.export(export_to) - logger.debug("Saved Segment {} as {}".format(iden, chunk_filename)) - -def concatenate_and_generate_meshes(iden : int, - temp_dir : str, - output_image : str, - bit_depth : int, - mesh_chunk_size : list): - """ This function concatenates the appropriate polygons in the temporary directory + logger.debug(f"Saved Segment {iden} as {chunk_filename}") + + +def concatenate_and_generate_meshes( + iden: int, temp_dir: str, output_image: str, bit_depth: int, mesh_chunk_size: list, +): + """This function concatenates the appropriate polygons in the temporary directory and generates progressive meshes as defined in neurogen. - - Args: - iden : The labeled segment that we are concatenating + + Args: + iden : The labeled segment that we are concatenating temp_dir : The directory where all the polygon files are located output_image : The output directory where all of Neuroglancer's files are stored - bit_depth : Number of bits for mesh vertex quantization. Can only be 10 or 16. + bit_depth : Number of bits for mesh vertex quantization. Can only be 10 or 16. chunk_size : Size of chunks in temporary file Returns: None, concatenates and saves the progressive meshes into the appropriate directory """ try: # Get the files that are relevent to the segment iden - chunkfiles = [f for f in os.listdir(temp_dir) if os.path.isfile(os.path.join(temp_dir, f))] - logger.info('Starting Progressive Meshes for ID {}'.format(iden)) - idenfiles = [str(f) for f in chunkfiles if f.split('_')[0] == str(iden)] + chunkfiles = [ + f for f in os.listdir(temp_dir) if os.path.isfile(os.path.join(temp_dir, f)) + ] + logger.info(f"Starting Progressive Meshes for ID {iden}") + idenfiles = [str(f) for f in chunkfiles if f.split("_")[0] == str(iden)] len_files = len(idenfiles) - logger.info('ID {} is scattered amoung {} chunk(s)'.format(str(iden), len_files)) + logger.info( + f"ID {iden!s} is scattered amoung {len_files} chunk(s)", + ) starts = [] - stripped_files = [i.strip('.ply').split('_')[1:] for i in idenfiles] + stripped_files = [i.strip(".ply").split("_")[1:] for i in idenfiles] for fil in range(len_files): start = [int(trans) for trans in stripped_files[fil]] starts.append(start) @@ -298,193 +349,270 @@ def concatenate_and_generate_meshes(iden : int, # Get the first mesh (upper left) mesh1_path = os.path.join(temp_dir, mesh_fileobj) - mesh1 = trimesh.load_mesh(file_obj=mesh1_path, file_type='ply') - translate_start = ([1, 0, 0, start_mesh[0]], - [0, 1, 0, start_mesh[1]], - [0, 0, 1, start_mesh[2]], - [0, 0, 0, 1]) + mesh1 = trimesh.load_mesh(file_obj=mesh1_path, file_type="ply") + translate_start = ( + [1, 0, 0, start_mesh[0]], + [0, 1, 0, start_mesh[1]], + [0, 0, 1, start_mesh[2]], + [0, 0, 0, 1], + ) mesh1.apply_transform(translate_start) - mesh1bounds = mesh1.bounds - logger.debug('** Loaded chunk #1: {} ---- {} bytes'.format(mesh_fileobj, os.path.getsize(mesh1_path))) + logger.debug( + "** Loaded chunk #1: {} ---- {} bytes".format( + mesh_fileobj, os.path.getsize(mesh1_path), + ), + ) # if there is only one mesh, then decompose if len_files == 1: - num_lods = math.ceil(math.log(len(mesh1.vertices),1024)) - ngmesh.fulloctree_decomposition_mesh(mesh1, num_lods=num_lods, - segment_id=iden, directory=output_image, quantization_bits=bit_depth) + num_lods = math.ceil(math.log(len(mesh1.vertices), 1024)) + ngmesh.fulloctree_decomposition_mesh( + mesh1, + num_lods=num_lods, + segment_id=iden, + directory=output_image, + quantization_bits=bit_depth, + ) # else concatenate the meshes else: - stripped_files_middle = [idy.strip('.ply').split('_')[1:] for idy in idenfiles] - for i in range(len_files-1): + stripped_files_middle = [ + idy.strip(".ply").split("_")[1:] for idy in idenfiles + ] + for i in range(len_files - 1): mesh2_path = os.path.join(temp_dir, idenfiles[i]) - mesh2 = trimesh.load_mesh(file_obj=mesh2_path, file_type='ply') - logger.debug('** Loaded chunk #{}: {} ---- {} bytes'.format(i+2, idenfiles[i], os.path.getsize(mesh2_path))) - transformationmatrix = [int(trans) for trans in stripped_files_middle[i]] - offset = [transformationmatrix[i]/mesh_chunk_size[i] for i in range(3)] + mesh2 = trimesh.load_mesh(file_obj=mesh2_path, file_type="ply") + logger.debug( + "** Loaded chunk #{}: {} ---- {} bytes".format( + i + 2, idenfiles[i], os.path.getsize(mesh2_path), + ), + ) + transformationmatrix = [ + int(trans) for trans in stripped_files_middle[i] + ] + offset = [ + transformationmatrix[i] / mesh_chunk_size[i] for i in range(3) + ] middle_mesh = transformationmatrix - translate_middle = ([1, 0, 0, middle_mesh[0] - offset[0]], - [0, 1, 0, middle_mesh[1] - offset[1]], - [0, 0, 1, middle_mesh[2] - offset[2]], - [0, 0, 0, 1]) + translate_middle = ( + [1, 0, 0, middle_mesh[0] - offset[0]], + [0, 1, 0, middle_mesh[1] - offset[1]], + [0, 0, 1, middle_mesh[2] - offset[2]], + [0, 0, 0, 1], + ) mesh2.apply_transform(translate_middle) mesh1 = trimesh.util.concatenate(mesh1, mesh2) - num_lods = math.ceil(math.log(len(mesh1.vertices),1024)) - ngmesh.fulloctree_decomposition_mesh(mesh1, num_lods=num_lods, - segment_id=iden, directory=output_image, quantization_bits=bit_depth) - except Exception as e: + num_lods = math.ceil(math.log(len(mesh1.vertices), 1024)) + ngmesh.fulloctree_decomposition_mesh( + mesh1, + num_lods=num_lods, + segment_id=iden, + directory=output_image, + quantization_bits=bit_depth, + ) + except Exception: traceback.print_exc() -def build_pyramid(input_image : str, - output_image : str, - imagetype : str, - mesh : bool): - """ - This function builds the pyramids for Volume Generation and Meshes (if specified) +def build_pyramid(input_image: str, output_image: str, imagetype: str, mesh: bool): + """This function builds the pyramids for Volume Generation and Meshes (if specified). Args: input_image : Where the input directory is located output_image : Where the output directory is located - imagetype : Specifying whether we are averaging or taking the mode of the images + imagetype : Specifying whether we are averaging or taking the mode of the images when blurring the images for the pyramids mesh : Whether or not meshes are generated with segmented volumes Returns: None, generates pyramids or volumes of input data - + Raises: ValueError: If imagetype is not properly specified """ - try: with bfio.BioReader(input_image) as bf: bfshape = (bf.X, bf.Y, bf.Z, bf.C, bf.T) datatype = np.dtype(bf.dtype) - logger.info("Image Shape (XYZCT) {}".format(bfshape)) + logger.info(f"Image Shape (XYZCT) {bfshape}") - logger.info("Image Datatype {}".format(datatype)) + logger.info(f"Image Datatype {datatype}") - num_scales = np.floor(np.log2(max(bfshape[:3]))).astype('int')+1 + num_scales = np.floor(np.log2(max(bfshape[:3]))).astype("int") + 1 highest_res_directory = os.path.join(output_image, f"{num_scales}") if not os.path.exists(highest_res_directory): os.makedirs(highest_res_directory) - - # info file specifications - resolution = get_resolution(phys_y=bf.physical_size_y, - phys_x=bf.physical_size_x, - phys_z=bf.physical_size_z) - + resolution = get_resolution( + phys_y=bf.physical_size_y, + phys_x=bf.physical_size_x, + phys_z=bf.physical_size_z, + ) if imagetype == "segmentation": - if mesh == False: + if mesh is False: logger.info("\n Creating info file for segmentations ...") - file_info = nginfo.info_segmentation(directory=output_image, - dtype=datatype, - chunk_size = chunk_size, - size=(bf.X, bf.Y, bf.Z), - resolution=resolution) - - else: # if generating meshes - + file_info = nginfo.info_segmentation( + directory=output_image, + dtype=datatype, + chunk_size=chunk_size, + size=(bf.X, bf.Y, bf.Z), + resolution=resolution, + ) + + else: # if generating meshes # Creating a temporary files for the polygon meshes -- will later be converted to Draco with tempfile.TemporaryDirectory() as temp_dir: - # keep track of labelled segments all_identities = [] cache_tile = bf._TILE_SIZE - - - logger.info("\n Starting to Cache Section Sizes of {} for Meshes".format(cache_tile)) - # cache tiles of 1024 - for x1_cache, x2_cache, \ - y1_cache, y2_cache, \ - z1_cache, z2_cache, \ - c1_cache, c2_cache, \ - t1_cache, t2_cache, bf.cache in iterate_cache_tiles(bf_image = bf): + logger.info( + "\n Starting to Cache Section Sizes of {} for Meshes".format( + cache_tile, + ), + ) + # cache tiles of 1024 + for ( + x1_cache, + x2_cache, + y1_cache, + y2_cache, + z1_cache, + z2_cache, + c1_cache, + c2_cache, + t1_cache, + t2_cache, + bf.cache, + ) in iterate_cache_tiles(bf_image=bf): cached_shape = bf.cache.shape - bf.cache = np.reshape(bf.cache, cached_shape[:3]) - - for x_dim, y_dim, z_dim, volume in iterate_chunk_tiles(cached_image = bf.cache, - x_dimensions = (x1_cache, x2_cache), - y_dimensions = (y1_cache, y2_cache), - z_dimensions = (z1_cache, z2_cache), - chunk_tile_size = mesh_chunk_size): - + bf.cache = np.reshape(bf.cache, cached_shape[:3]) + + for x_dim, y_dim, z_dim, volume in iterate_chunk_tiles( + cached_image=bf.cache, + x_dimensions=(x1_cache, x2_cache), + y_dimensions=(y1_cache, y2_cache), + z_dimensions=(z1_cache, z2_cache), + chunk_tile_size=mesh_chunk_size, + ): # iterate through mesh chunks in cached tile - ids = np.unique(volume[volume>0]) + ids = np.unique(volume[volume > 0]) len_ids = len(ids) - logger.debug("({0:0>4}, {0:0>4}), ".format(x_dim[0], x_dim[1]) + \ - "({0:0>4}, {0:0>4}), ".format(y_dim[0], y_dim[1]) + \ - "({0:0>4}, {0:0>4}) ".format(z_dim[0], z_dim[1]) + \ - "has {0:0>2} IDS".format(len_ids)) - - all_identities = np.unique(np.append(all_identities, ids)) + logger.debug( + "({0:0>4}, {0:0>4}), ".format(x_dim[0] ) + + "({0:0>4}, {0:0>4}), ".format(y_dim[0] ) + + "({0:0>4}, {0:0>4}) ".format(z_dim[0] ) + + f"has {len_ids:0>2} IDS", + ) + + all_identities = np.unique( + np.append(all_identities, ids), + ) if len_ids > 0: - with ThreadPoolExecutor(max_workers=max([os.cpu_count()-1,2])) as executor: - executor.submit(create_plyfiles(subvolume = volume, - ids=ids, - temp_dir=temp_dir, - start_y=y_dim[0], - start_x=x_dim[0], - start_z=z_dim[0])) + with ThreadPoolExecutor( + max_workers=max([os.cpu_count() - 1, 2]), + ) as executor: + executor.submit( + create_plyfiles( + subvolume=volume, + ids=ids, + temp_dir=temp_dir, + start_y=y_dim[0], + start_x=x_dim[0], + start_z=z_dim[0], + ), + ) # concatenate and decompose the meshes in the temporary file for all segments logger.info("\n Generate Progressive Meshes for segments ...") - all_identities = np.unique(all_identities).astype('int') - with ThreadPoolExecutor(max_workers=max([os.cpu_count()-1,2])) as executor: - executor.map(concatenate_and_generate_meshes, - all_identities, repeat(temp_dir), repeat(output_image), repeat(bit_depth), repeat(mesh_chunk_size)) + all_identities = np.unique(all_identities).astype("int") + with ThreadPoolExecutor( + max_workers=max([os.cpu_count() - 1, 2]), + ) as executor: + executor.map( + concatenate_and_generate_meshes, + all_identities, + repeat(temp_dir), + repeat(output_image), + repeat(bit_depth), + repeat(mesh_chunk_size), + ) # Once you have all the labelled segments, then create segment_properties file - logger.info("\n Creating info file for segmentations and meshes ...") - file_info = nginfo.info_mesh(directory=output_image, - chunk_size=chunk_size, - size=(bf.X, bf.Y, bf.Z), - dtype=np.dtype(bf.dtype).name, - ids=all_identities, - resolution=resolution, - segmentation_subdirectory="segment_properties", - bit_depth=bit_depth, - order="XYZ") + logger.info( + "\n Creating info file for segmentations and meshes ...", + ) + file_info = nginfo.info_mesh( + directory=output_image, + chunk_size=chunk_size, + size=(bf.X, bf.Y, bf.Z), + dtype=np.dtype(bf.dtype).name, + ids=all_identities, + resolution=resolution, + segmentation_subdirectory="segment_properties", + bit_depth=bit_depth, + order="XYZ", + ) if imagetype == "image": - file_info = nginfo.info_image(directory=output_image, - dtype=datatype, - chunk_size = chunk_size, - size=(bf.X, bf.Y, bf.Z), - resolution=resolution) - - logger.info(f"\n Creating chunked volumes of {chunk_size} based on the info file ...") - get_highest_resolution_volumes(bf_image = bf, - resolution_directory = highest_res_directory) + file_info = nginfo.info_image( + directory=output_image, + dtype=datatype, + chunk_size=chunk_size, + size=(bf.X, bf.Y, bf.Z), + resolution=resolution, + ) + + logger.info( + f"\n Creating chunked volumes of {chunk_size} based on the info file ...", + ) + get_highest_resolution_volumes( + bf_image=bf, resolution_directory=highest_res_directory, + ) logger.info("\n Getting the Rest of the Pyramid ...") for higher_scale in reversed(range(0, num_scales)): # bfshape is XYZ, look at line 357 - inputshape = np.ceil(np.array(bfshape[:3])/(2**(num_scales-higher_scale-1))).astype('int') + inputshape = np.ceil( + np.array(bfshape[:3]) / (2 ** (num_scales - higher_scale - 1)), + ).astype("int") - scale_directory = os.path.join(output_image, str(higher_scale+1)) #images are read from this directory + scale_directory = os.path.join( + output_image, str(higher_scale + 1), + ) # images are read from this directory if not os.path.exists(scale_directory): os.makedirs(scale_directory) - assert os.path.exists(scale_directory), f"Key Directory {scale_directory} does not exist" - + assert os.path.exists( + scale_directory, + ), f"Key Directory {scale_directory} does not exist" + if imagetype == "image": - ngvol.get_rest_of_the_pyramid(directory=scale_directory, input_shape=inputshape, chunk_size=chunk_size, - datatype=datatype, blurring_method='average') + ngvol.get_rest_of_the_pyramid( + directory=scale_directory, + input_shape=inputshape, + chunk_size=chunk_size, + datatype=datatype, + blurring_method="average", + ) else: - ngvol.get_rest_of_the_pyramid(directory=scale_directory, input_shape=inputshape, chunk_size=chunk_size, - datatype=datatype, blurring_method='mode') - logger.info(f"Saved Encoded Volumes for Scale {higher_scale} from Key Directory {os.path.basename(scale_directory)}") + ngvol.get_rest_of_the_pyramid( + directory=scale_directory, + input_shape=inputshape, + chunk_size=chunk_size, + datatype=datatype, + blurring_method="mode", + ) + logger.info( + f"Saved Encoded Volumes for Scale {higher_scale} from Key Directory {os.path.basename(scale_directory)}", + ) logger.info("\n Info basesd on Info File ...") - logger.info("Data Type: {}".format(file_info['data_type'])) - logger.info("Number of Channels: {}".format(file_info['num_channels'])) - logger.info("Number of Scales: {}".format(len(file_info['scales']))) - logger.info("Image Type: {}".format(file_info['type'])) - - except Exception as e: - raise ValueError(f"Something Went Wrong!: {traceback.print_exc()}") - \ No newline at end of file + logger.info("Data Type: {}".format(file_info["data_type"])) + logger.info("Number of Channels: {}".format(file_info["num_channels"])) + logger.info("Number of Scales: {}".format(len(file_info["scales"]))) + logger.info("Image Type: {}".format(file_info["type"])) + + except Exception: + msg = f"Something Went Wrong!: {traceback.print_exc()}" + raise ValueError(msg) diff --git a/visualization/tabular-to-microjson-tool/src/polus/images/visualization/tabular_to_microjson/microjson_overlay.py b/visualization/tabular-to-microjson-tool/src/polus/images/visualization/tabular_to_microjson/microjson_overlay.py index 6e249c1d3..c0353e162 100644 --- a/visualization/tabular-to-microjson-tool/src/polus/images/visualization/tabular_to_microjson/microjson_overlay.py +++ b/visualization/tabular-to-microjson-tool/src/polus/images/visualization/tabular_to_microjson/microjson_overlay.py @@ -232,7 +232,7 @@ class ValidatedProperties(mj.Properties): def validate_str( cls, v: Union[str, None], - ) -> str: # noqa: N805 + ) -> str: """Validate string.""" if v is None: return "" @@ -242,7 +242,7 @@ def validate_str( def validate_num( cls, v: Union[int, None], - ) -> Union[int, None]: # noqa: N805 + ) -> Union[int, None]: """Validate numeric.""" if v is None: return np.nan From e95887f77d03711ec78ba59ef95f5169f4918120 Mon Sep 17 00:00:00 2001 From: Jane Van Lam <75lam@cua.edu> Date: Tue, 10 Mar 2026 15:56:24 -0400 Subject: [PATCH 2/3] update all changes --- .../Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py index 1041308be..7636149c3 100644 --- a/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py +++ b/dimension_reduction/UMAP/Shared-Memory-OpenMP/Examples/Plotting MNIST Data/plot.py @@ -47,7 +47,3 @@ # colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] - - - - From 579318bcf09a392ab85d05233ea92fd8509e98fd Mon Sep 17 00:00:00 2001 From: Jane Van Lam <75lam@cua.edu> Date: Tue, 10 Mar 2026 16:00:40 -0400 Subject: [PATCH 3/3] update packages cp313, bump 2.0.4-dev0 --- .../apply-flatfield-tool/.bumpversion.cfg | 2 +- .../images/apply-flatfield-tool/.gitignore | 1 + .../images/apply-flatfield-tool/Dockerfile | 13 ++++++++--- .../images/apply-flatfield-tool/README.md | 2 +- .../images/apply-flatfield-tool/VERSION | 2 +- .../apply-flatfield-tool/build-docker.sh | 22 ------------------- .../images/apply-flatfield-tool/plugin.json | 4 ++-- .../apply-flatfield-tool/pyproject.toml | 12 +++++----- .../images/apply_flatfield/__init__.py | 2 +- 9 files changed, 23 insertions(+), 37 deletions(-) delete mode 100644 transforms/images/apply-flatfield-tool/build-docker.sh diff --git a/transforms/images/apply-flatfield-tool/.bumpversion.cfg b/transforms/images/apply-flatfield-tool/.bumpversion.cfg index a2f2fd1cf..d32b9aa2a 100644 --- a/transforms/images/apply-flatfield-tool/.bumpversion.cfg +++ b/transforms/images/apply-flatfield-tool/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.0.1 +current_version = 2.0.4-dev0 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/transforms/images/apply-flatfield-tool/.gitignore b/transforms/images/apply-flatfield-tool/.gitignore index 5b8869373..c77f23c31 100644 --- a/transforms/images/apply-flatfield-tool/.gitignore +++ b/transforms/images/apply-flatfield-tool/.gitignore @@ -35,3 +35,4 @@ build/ dist/ *.egg-info/ +PUSH_AND_PR.md diff --git a/transforms/images/apply-flatfield-tool/Dockerfile b/transforms/images/apply-flatfield-tool/Dockerfile index bf8d52999..ebb8187f4 100644 --- a/transforms/images/apply-flatfield-tool/Dockerfile +++ b/transforms/images/apply-flatfield-tool/Dockerfile @@ -1,4 +1,6 @@ -FROM polusai/bfio:2.3.3 +# bfiocpp (bfio dependency) has no wheel for arm64 + Python 3.13. Build with: +# docker build --platform linux/amd64 -f transforms/images/apply-flatfield-tool/Dockerfile -t polusai/apply-flatfield-tool:2.0.2-dev0 . +FROM python:3.13-slim ENV EXEC_DIR="/opt/executables" ENV POLUS_IMG_EXT=".ome.tif" @@ -7,10 +9,15 @@ ENV POLUS_LOG="INFO" WORKDIR ${EXEC_DIR} -# Build context = repo root (PolusAI-image-tools). Path is transforms/images/apply-flatfield-tool +# bfio/Bioformats needs a JVM (Debian Trixie has openjdk-21, not 17) +RUN apt-get update && apt-get install -y --no-install-recommends \ + openjdk-21-jre-headless \ + && rm -rf /var/lib/apt/lists/* + +# Build context = repo root. Copy only this tool. COPY transforms/images/apply-flatfield-tool /opt/executables/app -RUN pip3 install --no-cache-dir /opt/executables/app +RUN pip install --no-cache-dir /opt/executables/app ENTRYPOINT ["python3", "-m", "polus.images.transforms.images.apply_flatfield"] CMD ["--help"] diff --git a/transforms/images/apply-flatfield-tool/README.md b/transforms/images/apply-flatfield-tool/README.md index c2ce5d8b9..98d4f356e 100644 --- a/transforms/images/apply-flatfield-tool/README.md +++ b/transforms/images/apply-flatfield-tool/README.md @@ -1,4 +1,4 @@ -# Apply Flatfield Plugin (v2.0.1) +# Apply Flatfield Plugin (v2.0.4-dev0) This WIPP plugin applies a flatfield operation on every image in a collection. The algorithm used to apply the flatfield is as follows: diff --git a/transforms/images/apply-flatfield-tool/VERSION b/transforms/images/apply-flatfield-tool/VERSION index 38f77a65b..e33ee18b1 100644 --- a/transforms/images/apply-flatfield-tool/VERSION +++ b/transforms/images/apply-flatfield-tool/VERSION @@ -1 +1 @@ -2.0.1 +2.0.4-dev0 diff --git a/transforms/images/apply-flatfield-tool/build-docker.sh b/transforms/images/apply-flatfield-tool/build-docker.sh deleted file mode 100644 index 6bc521942..000000000 --- a/transforms/images/apply-flatfield-tool/build-docker.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Change the name of the tool here -tool_dir="transforms/images" -tool_name="apply-flatfield-tool" - -# The version is read from the VERSION file -version=$(", @@ -10,12 +10,12 @@ readme = "README.md" packages = [{include = "polus", from = "src"}] [tool.poetry.dependencies] -python = ">=3.9,<3.12" -bfio = { version = "^2.3.3", extras = ["all"] } +python = ">=3.9" +bfio = ">=2.5.0" filepattern = ">=2.0.4" -typer = ">=0.23.0,<0.24.0" -numpy = "^1.24.3" -tqdm = "^4.65.0" +typer = ">=0.23.0" +numpy = ">=1.24.3" +tqdm = ">=4.65.0" [tool.poetry.group.dev.dependencies] bump2version = "^1.0.1" diff --git a/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/__init__.py b/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/__init__.py index ab03b2528..d6182b84c 100644 --- a/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/__init__.py +++ b/transforms/images/apply-flatfield-tool/src/polus/images/transforms/images/apply_flatfield/__init__.py @@ -3,4 +3,4 @@ from . import utils from .apply_flatfield import apply -__version__ = "2.0.1" +__version__ = "2.0.4-dev0"