Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions server/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM nvidia/cuda:12.1.0-devel-ubuntu20.04
FROM ubuntu:20.04

RUN apt-get update && apt-get install -y wget
RUN apt-get update && apt-get install -y wget python3.12 python3.12-pip

RUN useradd -m user

Expand All @@ -25,7 +25,7 @@ RUN /bin/bash -c "\

RUN echo "Conda env nnInteractive created"

RUN /opt/conda/envs/nnInteractive/bin/python3.12 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cu126
RUN /opt/conda/envs/nnInteractive/bin/python3.12 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

WORKDIR /opt/server

Expand Down
16 changes: 14 additions & 2 deletions server/nninteractive_slicer_server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,26 @@ def download_weights(self):
def make_session(self):
"""
Creates an nnInteractiveInferenceSession, points it at the downloaded model.
Automatically detect the best available device (MPS > CUDA > CPU).
"""
# Automatically detect the best available device
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
device = torch.device("mps")
print("Using MPS (Metal Performance Shaders) device for inference")
elif torch.cuda.is_available():
device = torch.device("cuda:0")
print("Using CUDA device for inference")
else:
device = torch.device("cpu")
print("Using CPU device for inference")

session = nnInteractiveInferenceSession(
device=torch.device("cuda:0"), # Set inference device
device=device, # Set inference device automatically
use_torch_compile=False, # Experimental: Not tested yet
verbose=True,
torch_n_threads=os.cpu_count(), # Use available CPU cores
do_autozoom=True, # Enables AutoZoom for better patching
use_pinned_memory=True, # Optimizes GPU memory transfers
use_pinned_memory=(device.type == 'cuda'), # Only use pinned memory for CUDA
)

# Load the trained model
Expand Down
2 changes: 1 addition & 1 deletion server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies = [
"nninteractive==1.0.1",
"fastapi==0.111.0",
"numpy==2.2.3",
"torch==2.6.0",
"torch>=2.9.0",
"Pillow==11.1.0",
"transformers==4.49.0",
"xxhash==3.5.0"
Expand Down
2 changes: 1 addition & 1 deletion server/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
nninteractive==1.0.1
fastapi==0.111.0
numpy==2.2.3
torch==2.6.0
torch>=2.9.0
Pillow==11.1.0
transformers==4.49.0
xxhash==3.5.0