FastDeploy/.github/workflows/_base_test.yml at develop · StareAtYou/FastDeploy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
name: Base Test
description: "Run Base Tests"

on:
  workflow_call:
    inputs:
      DOCKER_IMAGE:
        description: "Build Images"
        required: true
        type: string
        default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310"
      FASTDEPLOY_ARCHIVE_URL:
        description: "URL of the compressed FastDeploy code archive."
        required: true
        type: string
      FASTDEPLOY_WHEEL_URL:
        description: "URL of the FastDeploy Wheel."
        required: true
        type: string
      CACHE_DIR:
        description: "Cache Dir Use"
        required: false
        type: string
        default: ""
      MODEL_CACHE_DIR:
        description: "Cache Dir Use"
        required: false
        type: string
        default: ""

jobs:
  base_tests:
    runs-on: [self-hosted, GPU-h20-1Cards]
    timeout-minutes: 60
    steps:
      - name: Code Prepare
        shell: bash
        env:
          docker_image: ${{ inputs.DOCKER_IMAGE }}
          fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
        run: |
          set -x
          REPO="https://github.com/${{ github.repository }}.git"
          FULL_REPO="${{ github.repository }}"
          REPO_NAME="${FULL_REPO##*/}"
          BASE_BRANCH="${{ github.base_ref }}"
          docker pull ${docker_image}
          # Clean the repository directory before starting
          docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
          -e "REPO_NAME=${REPO_NAME}" \
          ${docker_image} /bin/bash -c '
              CLEAN_RETRIES=3
              CLEAN_COUNT=0

              while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
                echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
                rm -rf "${REPO_NAME}"* || true
                sleep 2

                # Check if anything matching ${REPO_NAME}* still exists
                if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
                  echo "All ${REPO_NAME}* removed successfully"
                  break
                fi

                CLEAN_COUNT=$((CLEAN_COUNT + 1))
              done

              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
                echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
                ls -ld "${REPO_NAME}"*
                exit 1
              fi
            '

          # Download with retry and validation
          MAX_RETRIES=3
          RETRY_COUNT=0
          while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
            if wget -q --no-proxy ${fd_archive_url} && [ -f FastDeploy.tar.gz ] && [ -s FastDeploy.tar.gz ]; then
              echo "Download successful, file size: $(stat -c%s FastDeploy.tar.gz) bytes"
              break
            else
              RETRY_COUNT=$((RETRY_COUNT + 1))
              echo "Download failed or file is empty, retry $RETRY_COUNT/$MAX_RETRIES..."
              rm -f FastDeploy.tar.gz
              sleep 2
            fi
          done

          if [ ! -f FastDeploy.tar.gz ] || [ ! -s FastDeploy.tar.gz ]; then
            echo "ERROR: Failed to download FastDeploy.tar.gz after $MAX_RETRIES attempts"
            exit 1
          fi

          # Verify tar.gz integrity before extraction
          if ! tar -tzf FastDeploy.tar.gz > /dev/null 2>&1; then
            echo "ERROR: FastDeploy.tar.gz is corrupted or incomplete"
            exit 1
          fi

          tar -xf FastDeploy.tar.gz
          rm -rf FastDeploy.tar.gz
          cd FastDeploy
          git config --global user.name "FastDeployCI"
          git config --global user.email "fastdeploy_ci@example.com"
          git log -n 3 --oneline

      - name: Run FastDeploy Base Tests
        shell: bash
        env:
          docker_image: ${{ inputs.DOCKER_IMAGE }}
          fastdeploy_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
          CACHE_DIR: ${{ inputs.CACHE_DIR }}
          MODEL_CACHE_DIR: ${{ inputs.MODEL_CACHE_DIR }}
        run: |
          runner_name="${{ runner.name }}"
          CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}')
          DEVICES=$(echo "$CARD_ID" | fold -w1 | paste -sd,)
          DEVICE_PORT=$(echo "$DEVICES" | cut -d',' -f1)

          FLASK_PORT=$((8068 + DEVICE_PORT * 100))
          FD_API_PORT=$((8088 + DEVICE_PORT * 100))
          FD_ENGINE_QUEUE_PORT=$((8058 + DEVICE_PORT * 100))
          FD_METRICS_PORT=$((8078 + DEVICE_PORT * 100))
          FD_CACHE_QUEUE_PORT=$((8098 + DEVICE_PORT * 100))
          echo "Test ENV Parameter:"
          echo "========================================================="
          echo "FLASK_PORT=${FLASK_PORT}"
          echo "FD_API_PORT=${FD_API_PORT}"
          echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
          echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
          echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
          echo "DEVICES=${DEVICES}"
          echo "========================================================="

          CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
          echo "CACHE_DIR is set to ${CACHE_DIR}"
          if [ ! -f "${CACHE_DIR}/gitconfig" ]; then
            touch "${CACHE_DIR}/gitconfig"
          fi
          if [ ! -d "${MODEL_CACHE_DIR}" ]; then
            echo "Error: MODEL_CACHE_DIR '${MODEL_CACHE_DIR}' does not exist."
            exit 1
          fi

          PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
          LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
          echo "==== LOG_FILE is ${LOG_FILE} ===="

          echo "==== PORT CLEAN BEFORE TASK RUN ====" | tee -a $LOG_FILE

          for port in "${PORTS[@]}"; do
              PIDS=$(lsof -t -i :$port || true)
              if [ -n "$PIDS" ]; then
                  echo "Port $port is occupied by PID(s): $PIDS" | tee -a $LOG_FILE
                  echo "$PIDS" | xargs -r kill -9
                  echo "Port $port cleared" | tee -a $LOG_FILE
              else
                  echo "Port $port is free" | tee -a $LOG_FILE
              fi
          done

          echo "==== PORT CLEAN COMPLETE ====" | tee -a $LOG_FILE

          echo "========================================================="
          echo "Ensuring no stale container named ${runner_name} ..."
          if [ "$(docker ps -a -q -f name=${runner_name})" ]; then
            echo "Removing stale container: ${runner_name}"
            docker rm -f ${runner_name} || true
          fi

          docker run --rm --ipc=host --pid=host --net=host \
          --name ${runner_name} \
          -v $(pwd):/workspace \
          -w /workspace \
          -e fastdeploy_wheel_url=${fastdeploy_wheel_url} \
          -e "FD_API_PORT=${FD_API_PORT}" \
          -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
          -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
          -e "FLASK_PORT=${FLASK_PORT}" \
          -e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
          -v "${MODEL_CACHE_DIR}:/MODELDATA" \
          -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
          -v "${CACHE_DIR}/.cache:/root/.cache" \
          -v "${CACHE_DIR}/ConfigDir:/root/.config" \
          -e TZ="Asia/Shanghai" \
          --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/

          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

          python -m pip install ${fastdeploy_wheel_url}
          python -m pip install pytest

          wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
          chmod +x ./llm-deploy-linux-amd64
          ./llm-deploy-linux-amd64 -python python3.10 \
          -model_name ERNIE-4.5-0.3B-Paddle \
          -model_path /MODELDATA \
          --skip install,model

          git config --global --add safe.directory /workspace/FastDeploy
          cd FastDeploy
          pushd tests/ce/deploy
          ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
          ps -ef | grep "${FD_ENGINE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
          python3.10 deploy.py > dd.log 2>&1 &
          sleep 3
          curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
            -H "Content-Type: application/json" \
            -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\"}"

          check_service() {
            local timeout=${1:-90}
            local url="http://localhost:${FLASK_PORT}/wait_for_infer?timeout=${timeout}"
            local resp

            resp=$(curl -s -X POST "$url")

            if echo "$resp" | grep -q "服务启动超时"; then
              exit 8
            fi
          }

          check_service 90
          popd

          pushd tests/ce/server
          export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
          export TEMPLATE=TOKEN_LOGPROB
          TEST_EXIT_CODE=0
          python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py test_completions.py test_return_token_ids.py test_update_weight.py || TEST_EXIT_CODE=1
          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
            -H "Content-Type: application/json" \
            -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--early-stop-config\": \"{\\\"enable_early_stop\\\":true, \\\"window_size\\\":6, \\\"threshold\\\":0.93}\"}"
          check_service 90
          python -m pytest -sv test_repetition_early_stop.py || TEST_EXIT_CODE=1

          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
            -H "Content-Type: application/json" \
            -d "{ \"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--max-concurrency\": 5, \"--max-waiting-time\": 1 }"
          check_service 90
          python -m pytest -sv test_max_concurrency.py || TEST_EXIT_CODE=1

          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
            -H "Content-Type: application/json" \
            -d "{ \"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--max-concurrency\": 5000, \"--max-waiting-time\": 1 }"
          check_service 90
          python -m pytest -sv test_max_waiting_time.py || TEST_EXIT_CODE=1

          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
            -H "Content-Type: application/json" \
            -d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_sot_wint4.yaml\", \"--enable-logprob\": \"False\"}"
          check_service 360
          export TEMPLATE=TOKEN_NORMAL
          python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1

          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
            -H "Content-Type: application/json" \
            -d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_cinn_wint4.yaml\", \"--enable-logprob\": \"False\"}"
          check_service 360
          export TEMPLATE=TOKEN_NORMAL
          python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1

          export TEMPLATE=TOKEN_NORMAL
          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
            -H "Content-Type: application/json" \
            -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-VL-28B-A3B-Thinking\", \"--reasoning-parser\": \"ernie-45-vl-thinking\", \"--tool-call-parser\": \"ernie-45-vl-thinking\", \"--tensor-parallel-size\": 1, \"--quantization\": \"wint4\", \"--max-model-len\": 131072, \"--max-num-seqs\": 32, \"--no-enable-prefix-caching\": true}"
          check_service 180
          python -m pytest -sv test_prompt_ids.py || TEST_EXIT_CODE=1

          popd
          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
          '
          if [ -f ./FastDeploy/exit_code.env ]; then
            source ./FastDeploy/exit_code.env
            cat ./FastDeploy/exit_code.env >> $GITHUB_ENV
          fi
          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
          exit ${TEST_EXIT_CODE}