-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_container_configurable.sh
More file actions
executable file
·306 lines (272 loc) · 8.58 KB
/
test_container_configurable.sh
File metadata and controls
executable file
·306 lines (272 loc) · 8.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#!/bin/bash
# Configurable test script for Polysome containers
# Supports both GPU and ARM64 CPU containers with configurable mount points
set -e # Exit on any error
# Default configuration
CONTAINER_TYPE="arm64" # Default to arm64
WORKFLOW_NAME=""
DOCKER_ARGS=""
# Default mount locations (can be overridden)
MODEL_PATH="$(pwd)/test/model"
DATA_PATH="$(pwd)/test/data"
OUTPUT_PATH="$(pwd)/test/output"
WORKFLOWS_PATH="$(pwd)/test/workflows"
PROMPTS_PATH="$(pwd)/prompts"
# Function to show usage
show_usage() {
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " -t, --type TYPE Container type: 'gpu' or 'arm64' (default: arm64)"
echo " -w, --workflow NAME Workflow name (default: auto-detected based on type)"
echo " -m, --model-path PATH Path to model directory (default: ./test/model)"
echo " -d, --data-path PATH Path to data directory (default: ./test/data)"
echo " -o, --output-path PATH Path to output directory (default: ./test/output)"
echo " --workflows-path PATH Path to workflows directory (default: ./test/workflows)"
echo " --prompts-path PATH Path to prompts directory (default: ./prompts)"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 # Test ARM64 container with default paths"
echo " $0 -t gpu # Test GPU container with default paths"
echo " $0 -m /path/to/models/gemma-3-27b-it-quantized.w4a16 \\"
echo " -d /path/to/data/input \\"
echo " -o /path/to/data/output \\"
echo " --prompts-path /path/to/prompts"
echo ""
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-t | --type)
CONTAINER_TYPE="$2"
shift 2
;;
-w | --workflow)
WORKFLOW_NAME="$2"
shift 2
;;
-m | --model-path)
MODEL_PATH="$2"
shift 2
;;
-d | --data-path)
DATA_PATH="$2"
shift 2
;;
-o | --output-path)
OUTPUT_PATH="$2"
shift 2
;;
--workflows-path)
WORKFLOWS_PATH="$2"
shift 2
;;
--prompts-path)
PROMPTS_PATH="$2"
shift 2
;;
-h | --help)
show_usage
exit 0
;;
*)
echo "Unknown option: $1"
show_usage
exit 1
;;
esac
done
# Validate container type
if [[ "$CONTAINER_TYPE" != "gpu" && "$CONTAINER_TYPE" != "arm64" ]]; then
echo "ERROR: Container type must be 'gpu' or 'arm64'"
show_usage
exit 1
fi
# Set container-specific configuration
if [[ "$CONTAINER_TYPE" == "gpu" ]]; then
CONTAINER_NAME="polysome-runner"
DOCKERFILE="Dockerfile"
DOCKER_ARGS="--gpus all"
DEFAULT_WORKFLOW="gpu_test"
CONTAINER_DESC="GPU-enabled"
else
CONTAINER_NAME="polysome-runner-arm64"
DOCKERFILE="Dockerfile.arm64"
DOCKER_ARGS=""
DEFAULT_WORKFLOW="arm64_test"
CONTAINER_DESC="ARM64 CPU-only"
fi
# Set workflow name
if [[ -z "$WORKFLOW_NAME" ]]; then
WORKFLOW_NAME="$DEFAULT_WORKFLOW"
fi
# Configuration
LOG_FILE="$(pwd)/test_${CONTAINER_TYPE}_output.log"
echo "=== Polysome ${CONTAINER_DESC} Container Test ==="
echo "Timestamp: $(date)"
echo "Container: ${CONTAINER_NAME}"
echo "Workflow: ${WORKFLOW_NAME}.json"
echo ""
# Create output directory if it doesn't exist
mkdir -p "${OUTPUT_PATH}"
# Check if container image exists
echo "1. Checking if ${CONTAINER_DESC} container image exists..."
if ! docker images | grep -q "${CONTAINER_NAME}"; then
echo " ERROR: Container image '${CONTAINER_NAME}' not found!"
echo " Please build it first with: docker build -f ${DOCKERFILE} -t ${CONTAINER_NAME} ."
exit 1
fi
echo " ✓ Container image found"
# Check if required directories and files exist
echo ""
echo "2. Checking test setup..."
required_paths=(
"${DATA_PATH}"
"${WORKFLOWS_PATH}/${WORKFLOW_NAME}.json"
"${MODEL_PATH}"
"${PROMPTS_PATH}"
)
for path in "${required_paths[@]}"; do
if [[ ! -e "$path" ]]; then
echo " ERROR: Required path missing: $path"
if [[ "$path" == *"${WORKFLOW_NAME}.json" ]]; then
echo " Available workflows:"
ls -1 "${WORKFLOWS_PATH}/"*.json 2>/dev/null | sed 's/.*\// /' || echo " No workflows found"
fi
exit 1
fi
if [[ -d "$path" ]]; then
echo " ✓ Found directory: $(basename "$path")"
else
echo " ✓ Found file: $(basename "$path")"
fi
done
# Show test configuration
echo ""
echo "3. Test configuration:"
echo " Container type: ${CONTAINER_DESC}"
echo " Container name: ${CONTAINER_NAME}"
echo " Workflow: ${WORKFLOW_NAME}.json"
echo " Model path: ${MODEL_PATH}"
echo " Data path: ${DATA_PATH}"
echo " Output path: ${OUTPUT_PATH}"
echo " Workflows path: ${WORKFLOWS_PATH}"
echo " Prompts path: ${PROMPTS_PATH}"
echo " Log file: ${LOG_FILE}"
if [[ -n "$DOCKER_ARGS" ]]; then
echo " Docker args: ${DOCKER_ARGS}"
fi
# Build docker command
DOCKER_CMD="docker run --rm"
if [[ -n "$DOCKER_ARGS" ]]; then
DOCKER_CMD="$DOCKER_CMD $DOCKER_ARGS"
fi
DOCKER_CMD="$DOCKER_CMD \
--user $(id -u):$(id -g) \
-v ${MODEL_PATH}:/models \
-v ${DATA_PATH}:/data \
-v ${OUTPUT_PATH}:/output \
-v ${WORKFLOWS_PATH}:/workflows \
-v ${PROMPTS_PATH}:/prompts \
-v ${VLLM_CACHE_PATH:-./vllm_cache}:/root/.cache \
-e WORKFLOW_PATH=/workflows/${WORKFLOW_NAME}.json \
-e CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} \
${CONTAINER_NAME}"
# Run the container
echo ""
echo "5. Running ${CONTAINER_DESC} container..."
echo " Command: ${DOCKER_CMD}"
echo ""
echo " Starting container (this may take several minutes for model loading and inference)..."
echo " Logs will be saved to: ${LOG_FILE}"
# Run container and capture all output
eval "$DOCKER_CMD" 2>&1 | tee "${LOG_FILE}"
# Capture the exit code from docker run
DOCKER_EXIT_CODE=${PIPESTATUS[0]}
echo ""
echo "6. Analyzing results..."
# Check exit code
if [[ $DOCKER_EXIT_CODE -eq 0 ]]; then
echo " ✓ Container completed successfully (exit code: 0)"
else
echo " ✗ Container failed (exit code: $DOCKER_EXIT_CODE)"
fi
# Check output files
echo ""
echo "7. Output files generated:"
if [[ -d "${OUTPUT_PATH}" ]]; then
output_files=$(find "${OUTPUT_PATH}" -name "*.jsonl" -o -name "*.json" 2>/dev/null || true)
if [[ -n "$output_files" ]]; then
echo "$output_files" | while IFS= read -r file; do
if [[ -f "$file" ]]; then
size=$(wc -l <"$file" 2>/dev/null || echo "0")
echo " ✓ $(basename "$file") ($size lines)"
# Show first few lines of output for verification
if [[ "$size" -gt 0 && $(basename "$file") == *.jsonl ]]; then
echo " Preview: $(head -1 "$file" 2>/dev/null | cut -c1-80)..."
fi
fi
done
else
echo " ✗ No output files found"
fi
else
echo " ✗ Output directory not found"
fi
# Show log summary
echo ""
echo "8. Log summary:"
if [[ -f "${LOG_FILE}" ]]; then
log_lines=$(wc -l <"${LOG_FILE}")
echo " Log file size: $log_lines lines"
# Show performance info if available
if grep -q "Model loaded" "${LOG_FILE}" 2>/dev/null; then
echo " ✓ Model loaded successfully"
fi
if grep -q "Workflow completed" "${LOG_FILE}" 2>/dev/null; then
echo " ✓ Workflow completed"
fi
# Count errors and warnings (look for actual log levels, not just words)
error_count=$(grep -ic " - ERROR - \| - CRITICAL - " "${LOG_FILE}" 2>/dev/null || echo "0")
warning_count=$(grep -ic " - WARNING - " "${LOG_FILE}" 2>/dev/null || echo "0")
if [[ $error_count -gt 0 ]]; then
echo " ⚠ Errors found: $error_count"
fi
if [[ $warning_count -gt 0 ]]; then
echo " ⚠ Warnings found: $warning_count"
fi
echo ""
echo " To view full logs: cat ${LOG_FILE}"
echo " To view errors only: grep -i error ${LOG_FILE}"
echo " To view warnings: grep -i warning ${LOG_FILE}"
else
echo " ✗ Log file not created"
fi
# Final status
echo ""
echo "=== Test Summary ==="
if [[ $DOCKER_EXIT_CODE -eq 0 ]]; then
echo "Status: SUCCESS ✓"
echo "The ${CONTAINER_DESC} container completed the test workflow successfully."
# Performance note for ARM64
if [[ "$CONTAINER_TYPE" == "arm64" ]]; then
echo ""
echo "Note: ARM64 CPU inference is expected to be slower than GPU inference."
echo "For production workloads, consider using the GPU container or smaller models."
fi
else
echo "Status: FAILED ✗"
echo "The ${CONTAINER_DESC} container encountered an error. Check the logs above."
fi
echo ""
echo "Next steps:"
echo " - Review logs: cat ${LOG_FILE}"
echo " - Check output: ls -la ${OUTPUT_PATH}/"
echo " - Examine workflow results in the output JSONL files"
if [[ "$CONTAINER_TYPE" == "arm64" ]]; then
echo " - Compare with GPU performance: $0 -t gpu [same mount options]"
else
echo " - Compare with ARM64 performance: $0 -t arm64 [same mount options]"
fi
exit $DOCKER_EXIT_CODE