Skip to content

Commit b39cc40

Browse files
committed
update
1 parent e001fd6 commit b39cc40

File tree

1 file changed

+21
-69
lines changed

1 file changed

+21
-69
lines changed

.github/workflows/e2e-smoke-test.yml

Lines changed: 21 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,31 @@ jobs:
7171
7272
echo "test_exit_code=$TEST_EXIT_CODE" >> $GITHUB_OUTPUT
7373
74+
# List generated files for debugging
75+
echo "📁 Generated files:"
76+
ls -la *.json 2>/dev/null || echo "No JSON files found"
77+
ls -la ep_summary* 2>/dev/null || echo "No ep_summary files found"
78+
7479
# Parse evaluation protocol summary if it exists
75-
if [ -f ep_summary.json ]; then
76-
echo "EP Summary found, parsing..."
80+
# EP might generate files with different names, check for common patterns
81+
EP_SUMMARY_FILE=""
82+
for file in ep_summary*.json; do
83+
if [ -f "$file" ]; then
84+
EP_SUMMARY_FILE="$file"
85+
break
86+
fi
87+
done
88+
89+
if [ -n "$EP_SUMMARY_FILE" ] && [ -f "$EP_SUMMARY_FILE" ]; then
90+
echo "EP Summary found: $EP_SUMMARY_FILE, parsing..."
7791
7892
# Log the full summary for debugging
7993
echo "EP Summary contents:"
80-
cat ep_summary.json | jq . 2>/dev/null || cat ep_summary.json
94+
cat "$EP_SUMMARY_FILE" | jq . 2>/dev/null || cat "$EP_SUMMARY_FILE"
8195
8296
# Extract success rate from EP summary (this contains the actual accuracy/success rate)
8397
# The EP summary uses 'agg_score' for the aggregated success rate
84-
SUCCESS_RATE=$(jq -r '.agg_score // 0' ep_summary.json 2>/dev/null || echo "0")
98+
SUCCESS_RATE=$(jq -r '.agg_score // 0' "$EP_SUMMARY_FILE" 2>/dev/null || echo "0")
8599
86100
echo "success_rate=$SUCCESS_RATE" >> $GITHUB_OUTPUT
87101
@@ -97,8 +111,8 @@ jobs:
97111
echo "threshold_met=$THRESHOLD_MET" >> $GITHUB_OUTPUT
98112
99113
# Extract additional info for display
100-
NUM_ROWS=$(jq -r '.rows // 0' ep_summary.json 2>/dev/null || echo "0")
101-
NUM_RUNS=$(jq -r '.num_runs // 0' ep_summary.json 2>/dev/null || echo "0")
114+
NUM_ROWS=$(jq -r '.rows // 0' "$EP_SUMMARY_FILE" 2>/dev/null || echo "0")
115+
NUM_RUNS=$(jq -r '.num_runs // 0' "$EP_SUMMARY_FILE" 2>/dev/null || echo "0")
102116
103117
echo "📊 Evaluation Summary:"
104118
echo " - Success rate (agg_score): $(echo "$SUCCESS_RATE * 100" | bc -l)%"
@@ -119,7 +133,7 @@ jobs:
119133
with:
120134
name: e2e-smoke-test-results-${{ github.run_number }}
121135
path: |
122-
ep_summary.json
136+
ep_summary*.json
123137
*.log
124138
retention-days: 7
125139

@@ -178,65 +192,3 @@ jobs:
178192
echo " - Success rate: ${SUCCESS_RATE:-unknown}"
179193
echo " - Within acceptable range: 40%-90%"
180194
fi
181-
182-
- name: Create GitHub issue on failure
183-
if: failure()
184-
uses: actions/github-script@v7
185-
with:
186-
script: |
187-
const testResults = {
188-
exitCode: '${{ steps.run_test.outputs.test_exit_code }}',
189-
successRate: '${{ steps.run_test.outputs.success_rate }}',
190-
thresholdMet: '${{ steps.run_test.outputs.threshold_met }}',
191-
lowerBoundMet: '${{ steps.run_test.outputs.lower_bound_met }}',
192-
upperBoundMet: '${{ steps.run_test.outputs.upper_bound_met }}'
193-
};
194-
195-
const title = `🚨 E2E Smoke Test Failed (${new Date().toISOString().split('T')[0]})`;
196-
197-
const body = `
198-
## E2E Smoke Test Failure Report
199-
200-
**Test:** E2E Smoke Test
201-
**Date:** ${new Date().toISOString()}
202-
**Workflow Run:** [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
203-
204-
### Test Results
205-
- **Success Rate:** ${testResults.successRate ? (parseFloat(testResults.successRate) * 100).toFixed(1) + '%' : 'Unknown'}
206-
- **Lower Bound Met (≥40%):** ${testResults.lowerBoundMet === '1' ? '✅ Yes' : '❌ No'}
207-
- **Upper Bound Met (≤90%):** ${testResults.upperBoundMet === '1' ? '✅ Yes' : '❌ No'}
208-
- **Within Range (40%-90%):** ${testResults.thresholdMet === '1' ? '✅ Yes' : '❌ No'}
209-
- **Test Exit Code:** ${testResults.exitCode || 'Unknown'}
210-
211-
### Required Actions
212-
213-
${ testResults.thresholdMet !== '1' ?
214-
(testResults.lowerBoundMet !== '1' ?
215-
'🔍 **Performance Issue:** The success rate is below the required 40% minimum threshold. This indicates potential issues with model performance or test environment.' :
216-
testResults.upperBoundMet !== '1' ?
217-
'⚠️ **Suspiciously High Performance:** The success rate exceeds 90%, which may indicate test issues, data leakage, or unrealistic performance.' :
218-
'🔍 **Performance Issue:** The success rate is outside the acceptable 40%-90% range.'
219-
) :
220-
'🔧 **Infrastructure Issue:** Tests failed to execute properly despite potentially meeting performance thresholds.'
221-
}
222-
223-
### Next Steps
224-
1. Review the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for detailed error information
225-
2. Check if this is a temporary issue by re-running the workflow manually
226-
3. If persistent, investigate potential causes:
227-
- Model performance degradation
228-
- Test environment configuration
229-
- API key or service availability issues
230-
231-
### Auto-generated
232-
This issue was automatically created by the E2E smoke test workflow.
233-
`;
234-
235-
// Create the issue
236-
await github.rest.issues.create({
237-
owner: context.repo.owner,
238-
repo: context.repo.repo,
239-
title: title,
240-
body: body,
241-
labels: ['bug', 'e2e-test', 'automated', 'smoke-test']
242-
});

0 commit comments

Comments
 (0)