Polysome/test_container_configurable.sh at main · computationalpathologygroup/Polysome · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#!/bin/bash

# Configurable test script for Polysome containers
# Supports both GPU and ARM64 CPU containers with configurable mount points

set -e # Exit on any error

# Default configuration
CONTAINER_TYPE="arm64" # Default to arm64
WORKFLOW_NAME=""
DOCKER_ARGS=""

# Default mount locations (can be overridden)
MODEL_PATH="$(pwd)/test/model"
DATA_PATH="$(pwd)/test/data"
OUTPUT_PATH="$(pwd)/test/output"
WORKFLOWS_PATH="$(pwd)/test/workflows"
PROMPTS_PATH="$(pwd)/prompts"

# Function to show usage
show_usage() {
  echo "Usage: $0 [OPTIONS]"
  echo ""
  echo "Options:"
  echo "  -t, --type TYPE           Container type: 'gpu' or 'arm64' (default: arm64)"
  echo "  -w, --workflow NAME       Workflow name (default: auto-detected based on type)"
  echo "  -m, --model-path PATH     Path to model directory (default: ./test/model)"
  echo "  -d, --data-path PATH      Path to data directory (default: ./test/data)"
  echo "  -o, --output-path PATH    Path to output directory (default: ./test/output)"
  echo "  --workflows-path PATH     Path to workflows directory (default: ./test/workflows)"
  echo "  --prompts-path PATH       Path to prompts directory (default: ./prompts)"
  echo "  -h, --help               Show this help message"
  echo ""
  echo "Examples:"
  echo "  $0                        # Test ARM64 container with default paths"
  echo "  $0 -t gpu                 # Test GPU container with default paths"
  echo "  $0 -m /path/to/models/gemma-3-27b-it-quantized.w4a16 \\"
  echo "     -d /path/to/data/input \\"
  echo "     -o /path/to/data/output \\"
  echo "     --prompts-path /path/to/prompts"
  echo ""
}

# Parse arguments
while [[ $# -gt 0 ]]; do
  case $1 in
  -t | --type)
    CONTAINER_TYPE="$2"
    shift 2
    ;;
  -w | --workflow)
    WORKFLOW_NAME="$2"
    shift 2
    ;;
  -m | --model-path)
    MODEL_PATH="$2"
    shift 2
    ;;
  -d | --data-path)
    DATA_PATH="$2"
    shift 2
    ;;
  -o | --output-path)
    OUTPUT_PATH="$2"
    shift 2
    ;;
  --workflows-path)
    WORKFLOWS_PATH="$2"
    shift 2
    ;;
  --prompts-path)
    PROMPTS_PATH="$2"
    shift 2
    ;;
  -h | --help)
    show_usage
    exit 0
    ;;
  *)
    echo "Unknown option: $1"
    show_usage
    exit 1
    ;;
  esac
done

# Validate container type
if [[ "$CONTAINER_TYPE" != "gpu" && "$CONTAINER_TYPE" != "arm64" ]]; then
  echo "ERROR: Container type must be 'gpu' or 'arm64'"
  show_usage
  exit 1
fi

# Set container-specific configuration
if [[ "$CONTAINER_TYPE" == "gpu" ]]; then
  CONTAINER_NAME="polysome-runner"
  DOCKERFILE="Dockerfile"
  DOCKER_ARGS="--gpus all"
  DEFAULT_WORKFLOW="gpu_test"
  CONTAINER_DESC="GPU-enabled"
else
  CONTAINER_NAME="polysome-runner-arm64"
  DOCKERFILE="Dockerfile.arm64"
  DOCKER_ARGS=""
  DEFAULT_WORKFLOW="arm64_test"
  CONTAINER_DESC="ARM64 CPU-only"
fi

# Set workflow name
if [[ -z "$WORKFLOW_NAME" ]]; then
  WORKFLOW_NAME="$DEFAULT_WORKFLOW"
fi

# Configuration
LOG_FILE="$(pwd)/test_${CONTAINER_TYPE}_output.log"

echo "=== Polysome ${CONTAINER_DESC} Container Test ==="
echo "Timestamp: $(date)"
echo "Container: ${CONTAINER_NAME}"
echo "Workflow: ${WORKFLOW_NAME}.json"
echo ""

# Create output directory if it doesn't exist
mkdir -p "${OUTPUT_PATH}"

# Check if container image exists
echo "1. Checking if ${CONTAINER_DESC} container image exists..."
if ! docker images | grep -q "${CONTAINER_NAME}"; then
  echo "   ERROR: Container image '${CONTAINER_NAME}' not found!"
  echo "   Please build it first with: docker build -f ${DOCKERFILE} -t ${CONTAINER_NAME} ."
  exit 1
fi
echo "   ✓ Container image found"

# Check if required directories and files exist
echo ""
echo "2. Checking test setup..."
required_paths=(
  "${DATA_PATH}"
  "${WORKFLOWS_PATH}/${WORKFLOW_NAME}.json"
  "${MODEL_PATH}"
  "${PROMPTS_PATH}"
)

for path in "${required_paths[@]}"; do
  if [[ ! -e "$path" ]]; then
    echo "   ERROR: Required path missing: $path"
    if [[ "$path" == *"${WORKFLOW_NAME}.json" ]]; then
      echo "   Available workflows:"
      ls -1 "${WORKFLOWS_PATH}/"*.json 2>/dev/null | sed 's/.*\//     /' || echo "     No workflows found"
    fi
    exit 1
  fi
  if [[ -d "$path" ]]; then
    echo "   ✓ Found directory: $(basename "$path")"
  else
    echo "   ✓ Found file: $(basename "$path")"
  fi
done

# Show test configuration
echo ""
echo "3. Test configuration:"
echo "   Container type: ${CONTAINER_DESC}"
echo "   Container name: ${CONTAINER_NAME}"
echo "   Workflow: ${WORKFLOW_NAME}.json"
echo "   Model path: ${MODEL_PATH}"
echo "   Data path: ${DATA_PATH}"
echo "   Output path: ${OUTPUT_PATH}"
echo "   Workflows path: ${WORKFLOWS_PATH}"
echo "   Prompts path: ${PROMPTS_PATH}"
echo "   Log file: ${LOG_FILE}"
if [[ -n "$DOCKER_ARGS" ]]; then
  echo "   Docker args: ${DOCKER_ARGS}"
fi

# Build docker command
DOCKER_CMD="docker run --rm"
if [[ -n "$DOCKER_ARGS" ]]; then
  DOCKER_CMD="$DOCKER_CMD $DOCKER_ARGS"
fi
DOCKER_CMD="$DOCKER_CMD \
  --user $(id -u):$(id -g) \
  -v ${MODEL_PATH}:/models \
  -v ${DATA_PATH}:/data \
  -v ${OUTPUT_PATH}:/output \
  -v ${WORKFLOWS_PATH}:/workflows \
  -v ${PROMPTS_PATH}:/prompts \
  -v ${VLLM_CACHE_PATH:-./vllm_cache}:/root/.cache \
  -e WORKFLOW_PATH=/workflows/${WORKFLOW_NAME}.json \
  -e CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} \
  ${CONTAINER_NAME}"

# Run the container
echo ""
echo "5. Running ${CONTAINER_DESC} container..."
echo "   Command: ${DOCKER_CMD}"
echo ""
echo "   Starting container (this may take several minutes for model loading and inference)..."
echo "   Logs will be saved to: ${LOG_FILE}"

# Run container and capture all output
eval "$DOCKER_CMD" 2>&1 | tee "${LOG_FILE}"

# Capture the exit code from docker run
DOCKER_EXIT_CODE=${PIPESTATUS[0]}

echo ""
echo "6. Analyzing results..."

# Check exit code
if [[ $DOCKER_EXIT_CODE -eq 0 ]]; then
  echo "   ✓ Container completed successfully (exit code: 0)"
else
  echo "   ✗ Container failed (exit code: $DOCKER_EXIT_CODE)"
fi

# Check output files
echo ""
echo "7. Output files generated:"
if [[ -d "${OUTPUT_PATH}" ]]; then
  output_files=$(find "${OUTPUT_PATH}" -name "*.jsonl" -o -name "*.json" 2>/dev/null || true)
  if [[ -n "$output_files" ]]; then
    echo "$output_files" | while IFS= read -r file; do
      if [[ -f "$file" ]]; then
        size=$(wc -l <"$file" 2>/dev/null || echo "0")
        echo "   ✓ $(basename "$file") ($size lines)"

        # Show first few lines of output for verification
        if [[ "$size" -gt 0 && $(basename "$file") == *.jsonl ]]; then
          echo "     Preview: $(head -1 "$file" 2>/dev/null | cut -c1-80)..."
        fi
      fi
    done
  else
    echo "   ✗ No output files found"
  fi
else
  echo "   ✗ Output directory not found"
fi

# Show log summary
echo ""
echo "8. Log summary:"
if [[ -f "${LOG_FILE}" ]]; then
  log_lines=$(wc -l <"${LOG_FILE}")
  echo "   Log file size: $log_lines lines"

  # Show performance info if available
  if grep -q "Model loaded" "${LOG_FILE}" 2>/dev/null; then
    echo "   ✓ Model loaded successfully"
  fi
  if grep -q "Workflow completed" "${LOG_FILE}" 2>/dev/null; then
    echo "   ✓ Workflow completed"
  fi

  # Count errors and warnings (look for actual log levels, not just words)
  error_count=$(grep -ic " - ERROR - \| - CRITICAL - " "${LOG_FILE}" 2>/dev/null || echo "0")
  warning_count=$(grep -ic " - WARNING - " "${LOG_FILE}" 2>/dev/null || echo "0")

  if [[ $error_count -gt 0 ]]; then
    echo "   ⚠ Errors found: $error_count"
  fi
  if [[ $warning_count -gt 0 ]]; then
    echo "   ⚠ Warnings found: $warning_count"
  fi

  echo ""
  echo "   To view full logs: cat ${LOG_FILE}"
  echo "   To view errors only: grep -i error ${LOG_FILE}"
  echo "   To view warnings: grep -i warning ${LOG_FILE}"
else
  echo "   ✗ Log file not created"
fi

# Final status
echo ""
echo "=== Test Summary ==="
if [[ $DOCKER_EXIT_CODE -eq 0 ]]; then
  echo "Status: SUCCESS ✓"
  echo "The ${CONTAINER_DESC} container completed the test workflow successfully."

  # Performance note for ARM64
  if [[ "$CONTAINER_TYPE" == "arm64" ]]; then
    echo ""
    echo "Note: ARM64 CPU inference is expected to be slower than GPU inference."
    echo "For production workloads, consider using the GPU container or smaller models."
  fi
else
  echo "Status: FAILED ✗"
  echo "The ${CONTAINER_DESC} container encountered an error. Check the logs above."
fi

echo ""
echo "Next steps:"
echo "  - Review logs: cat ${LOG_FILE}"
echo "  - Check output: ls -la ${OUTPUT_PATH}/"
echo "  - Examine workflow results in the output JSONL files"
if [[ "$CONTAINER_TYPE" == "arm64" ]]; then
  echo "  - Compare with GPU performance: $0 -t gpu [same mount options]"
else
  echo "  - Compare with ARM64 performance: $0 -t arm64 [same mount options]"
fi

exit $DOCKER_EXIT_CODE