graph-compiler-playground/.github/workflows/execute-test-script.yml at main · intel/graph-compiler-playground · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
name: Run mlp test on requested platforms

on:
    workflow_call:
        inputs:
            compiler:
                required: true
                type: string
                description: Type of JIT to use
            device:
                required: true
                type: string
                description: Type of engine to use
            tag:
                required: true
                type: string
                description: Tag to label this result in DB
            torch_mlir_repo:
                description: Torch-MLIR repository on github
                default: intel-ai/torch-mlir
                required: false
                type: string
            torch_mlir_branch:
                description: Torch-MLIR branch to checkout
                default: cpu-proto
                required: false
                type: string
            runner_type:
                description: Type of runner to use
                required: true
                default: spr
                type: string
            test_script:
                description: Test script to run
                required: false
                default: "mlp.sh"
                type: string
        secrets:
            DB_URL:
                required: true
            HF_TOKEN:
                required: true


jobs:
    print_inputs:
        runs-on: Linux
        steps:
            - name: Print Inputs
              run: echo "${{ toJSON(github.event.inputs) }}"

    set_up_vars:
        runs-on: Linux
        outputs:
            runner_labels: ${{ steps.set_up_vars.outputs.runner_labels }}
            results_name: ${{ steps.set_up_vars.outputs.results_name }}
        steps:
            - name: Get conda env and results name for cloud runners
              id: set_up_vars
              shell: bash -el {0}
              run: |
                case "${{ inputs.runner_type }}" in
                    spr)
                        echo 'runner_labels=["self-hosted", "spr", "glados"]' >> $GITHUB_OUTPUT;;
                    amd32c)
                        echo 'runner_labels=["self-hosted", "amd32c", "aws"]' >> $GITHUB_OUTPUT;;
                    amd64c)
                        echo 'runner_labels=["self-hosted", "amd64c", "aws"]' >> $GITHUB_OUTPUT;;
                    genoa60c)
                        echo 'runner_labels=["self-hosted", "genoa", "60c", "gcp"]' >> $GITHUB_OUTPUT;;
                    a100)
                        echo 'runner_labels=["self-hosted", "nvidia", "a100"]' >> $GITHUB_OUTPUT;;
                    v100)
                        echo 'runner_labels=["self-hosted", "nvidia", "v100"]' >> $GITHUB_OUTPUT;;
                    *)
                        echo "runner_labels=unknown" >> $GITHUB_OUTPUT;;
                esac

                echo "results_name=$(echo "${{ inputs.device }}-${{ inputs.compiler }}-${{ inputs.test_script }}-results" | tr -cd '[:alnum:]-').db" >> $GITHUB_OUTPUT

    mlp_test:
        needs: set_up_vars
        runs-on: ${{ fromJSON(needs.set_up_vars.outputs.runner_labels) }}
        steps:
            - name: Guess conda env from device and compiler parameters
              id: guess_conda_env
              shell: bash -el {0}
              run: |
                case "${{ inputs.compiler }}" in
                    torch_mlir*)
                        echo conda_env=mlir-dev >> $GITHUB_OUTPUT;;
                    ipex*)
                        echo conda_env=ipex >> $GITHUB_OUTPUT;;
                    *)
                        if [[ ${{ inputs.device }} = "cuda" ]]; then
                            echo conda_env=cuda >> $GITHUB_OUTPUT
                        else
                            echo conda_env=cpu >> $GITHUB_OUTPUT
                        fi;;
                esac


            - uses: actions/checkout@v4
            - uses: ./.github/actions/initial_setup
              with:
                  conda_env: ${{ steps.guess_conda_env.outputs.conda_env }}
                  compiler: ${{ inputs.compiler }}
                  torch_mlir_repo: ${{ inputs.torch_mlir_repo }}
                  torch_mlir_branch: ${{ inputs.torch_mlir_branch }}

            - name: Run MLP test on specific compiler
              shell: bash -el {0}
              run: |
                  source ${CONDA}/bin/activate ${{ steps.guess_conda_env.outputs.conda_env }}

                  # We need token to load llama2 from huggingface repo, which is closed
                  export HF_TOKEN="${{ secrets.HF_TOKEN }}"

                  export ONEDNN_VERBOSE=0
                  LABELS='${{ needs.set_up_vars.outputs.runner_labels }}'
                  if [[ ${LABELS} = *glados* ]]; then
                      URL="--url ${{ secrets.DB_URL }}"
                  fi

                  export COMPILER="${{ inputs.compiler }}"
                  export DEVICE="${{ inputs.device }}"
                  export OTHER_ARGS="--host ${{ inputs.runner_type }} --tag ${{ inputs.tag }} ${URL}"

                  # We mainly want to verify our own backend
                  if [[ ${{ inputs.compiler }} != *torch_mlir* ]]; then
                    OTHER_ARGS="${OTHER_ARGS} --skip_verification"
                  fi

                  # HOST CONFIG
                  export KMP_AFFINITY=respect,noreset,granularity=fine,balanced
                  # This parameter is incredibly important once we use numactl to pick one socket, performance difference was 10x for resnet50 bs=1 torch-inductor
                  export OMP_NUM_THREADS=$(grep ^cpu\\scores /proc/cpuinfo | uniq | awk '{print $4}')
                  echo "CPU cores configured: $OMP_NUM_THREADS"
                  if [[ ${LABELS} = *glados* ]]; then
                      export HF_HOME="/cache/torchmlir/huggingface_cache"

                      numactl -m 0 --physcpubind=0-31 ./bench_suit/${{ inputs.test_script }}
                  else
                      export HF_HOME="/data/torchmlir/huggingface_cache"
                      source ./bench_suit/${{ inputs.test_script}}
                  fi


            - name: Upload results.db to artifacts when running in the cloud
              if: ${{ inputs.runner_type != 'spr' }}
              uses: actions/upload-artifact@v3
              with:
                  name: ${{ needs.set_up_vars.outputs.results_name }}
                  path: results.db

    parse_results:
        runs-on: [self-hosted, glados, spr]
        needs:
            - set_up_vars
            - mlp_test
        if: ${{ inputs.runner_type != 'spr' }}
        steps:
            - uses: actions/checkout@v4
            - name: Download results.db from artifacts
              uses: actions/download-artifact@v3
              with:
                  name: ${{ needs.set_up_vars.outputs.results_name }}
                  path: .

            - name: Upload results into DB
              shell: bash -el {0}
              run: |
                ls -l

                sudo apt update && sudo apt install -y sqlite3
                ./db_tools/export_sqlite2csv.sh

                source ${CONDA}/bin/activate
                pip install sqlalchemy pandas mysql-connector-python
                pip install --no-deps -e .
                python ./db_tools/export_csv2url.py results.csv --url ${{ secrets.DB_URL }}