From 2d543a4be229c9c635c6350df794ee21997b1fb1 Mon Sep 17 00:00:00 2001 From: csoceanu Date: Thu, 18 Sep 2025 16:36:43 +0300 Subject: [PATCH 1/5] updates for testing --- upstream/pkg/pipelinerunmetrics/metrics.go | 134 +++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/upstream/pkg/pipelinerunmetrics/metrics.go b/upstream/pkg/pipelinerunmetrics/metrics.go index cdc8bfa36c..d3b855969e 100644 --- a/upstream/pkg/pipelinerunmetrics/metrics.go +++ b/upstream/pkg/pipelinerunmetrics/metrics.go @@ -18,7 +18,9 @@ package pipelinerunmetrics import ( "context" + "fmt" "sync" + "time" "github.com/tektoncd/chains/pkg/chains" "go.opencensus.io/stats" @@ -51,6 +53,20 @@ var ( stats.UnitDimensionless) mrCountView *view.View + + // New error metrics for better observability + errorCount = stats.Float64("pipelinerun_error_count", + "Number of errors encountered during pipeline run processing", + stats.UnitDimensionless) + + errorCountView *view.View + + // Duration metrics for performance monitoring + processTimeDuration = stats.Float64("pipelinerun_process_duration_seconds", + "Time taken to process pipeline runs in seconds", + stats.UnitSeconds) + + processTimeDurationView *view.View ) // Recorder holds keys for Tekton metrics @@ -110,11 +126,26 @@ func viewRegister() error { Measure: mrCount, Aggregation: view.Count(), } + + errorCountView = &view.View{ + Description: errorCount.Description(), + Measure: errorCount, + Aggregation: view.Count(), + } + + processTimeDurationView = &view.View{ + Description: processTimeDuration.Description(), + Measure: processTimeDuration, + Aggregation: view.Distribution(0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0), + } + return view.Register( sgCountView, plCountView, stCountView, mrCountView, + errorCountView, + processTimeDurationView, ) } @@ -142,3 +173,106 @@ func (r *Recorder) RecordCountMetrics(ctx context.Context, metricType string) { func (r *Recorder) countMetrics(ctx context.Context, measure *stats.Float64Measure) { metrics.Record(ctx, measure.M(1)) } + +// RecordErrorMetrics records error occurrences with optional error type classification +func (r *Recorder) RecordErrorMetrics(ctx context.Context, errorType string) { + logger := logging.FromContext(ctx) + if !r.initialized { + logger.Errorf("Ignoring the error metrics recording as recorder not initialized") + return + } + // Record error count with optional tags for error classification + metrics.Record(ctx, errorCount.M(1)) + logger.Debugf("Recorded error metric for type: %s", errorType) +} + +// RecordDurationMetrics records the time taken for pipeline processing +func (r *Recorder) RecordDurationMetrics(ctx context.Context, durationSeconds float64) { + logger := logging.FromContext(ctx) + if !r.initialized { + logger.Errorf("Ignoring the duration metrics recording as recorder not initialized") + return + } + if durationSeconds < 0 { + logger.Errorf("Invalid duration provided: %f seconds, must be non-negative", durationSeconds) + return + } + metrics.Record(ctx, processTimeDuration.M(durationSeconds)) + logger.Debugf("Recorded processing duration: %f seconds", durationSeconds) +} + +// GetMetricsStatus returns a summary of the current metrics configuration +// This is useful for debugging and monitoring the health of metrics collection +func (r *Recorder) GetMetricsStatus(ctx context.Context) map[string]interface{} { + logger := logging.FromContext(ctx) + + status := map[string]interface{}{ + "initialized": r.initialized, + "timestamp": time.Now().Format(time.RFC3339), + } + + if !r.initialized { + logger.Warn("Metrics recorder not properly initialized") + status["error"] = "recorder not initialized" + return status + } + + // List all available metrics views + availableViews := []string{ + sgCountView.Name, + plCountView.Name, + stCountView.Name, + mrCountView.Name, + errorCountView.Name, + processTimeDurationView.Name, + } + + status["available_views"] = availableViews + status["total_views"] = len(availableViews) + + logger.Debugf("Metrics status: %d views available, initialized: %v", len(availableViews), r.initialized) + + return status +} + +// RecordBatchMetrics allows recording multiple metrics in a single operation +// This is more efficient when multiple metrics need to be updated simultaneously +func (r *Recorder) RecordBatchMetrics(ctx context.Context, operations []MetricOperation) error { + logger := logging.FromContext(ctx) + + if !r.initialized { + return fmt.Errorf("metrics recorder not initialized") + } + + if len(operations) == 0 { + logger.Warn("No metric operations provided for batch recording") + return nil + } + + for i, op := range operations { + switch op.Type { + case "count": + r.RecordCountMetrics(ctx, op.MetricType) + case "error": + r.RecordErrorMetrics(ctx, op.MetricType) + case "duration": + if op.Value <= 0 { + logger.Errorf("Invalid duration value for operation %d: %f", i, op.Value) + continue + } + r.RecordDurationMetrics(ctx, op.Value) + default: + logger.Errorf("Unknown metric operation type: %s", op.Type) + } + } + + logger.Debugf("Successfully processed %d metric operations", len(operations)) + return nil +} + +// MetricOperation represents a single metrics operation for batch processing +type MetricOperation struct { + Type string // "count", "error", or "duration" + MetricType string // specific metric type identifier + Value float64 // value for duration metrics (ignored for count/error) +} From 468fb69428871ee42998eec3673197d08ad1e355 Mon Sep 17 00:00:00 2001 From: csoceanu Date: Thu, 18 Sep 2025 16:42:07 +0300 Subject: [PATCH 2/5] Remove all workflow files except suggest_docs.yml --- .github/workflows/.placeholder | 0 .github/workflows/auto-merge.main.yaml | 39 ------------ .github/workflows/auto-merge.next.yaml | 39 ------------ .github/workflows/update-sources.main.yaml | 73 ---------------------- .github/workflows/update-sources.next.yaml | 73 ---------------------- 5 files changed, 224 deletions(-) delete mode 100644 .github/workflows/.placeholder delete mode 100644 .github/workflows/auto-merge.main.yaml delete mode 100644 .github/workflows/auto-merge.next.yaml delete mode 100644 .github/workflows/update-sources.main.yaml delete mode 100644 .github/workflows/update-sources.next.yaml diff --git a/.github/workflows/.placeholder b/.github/workflows/.placeholder deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/.github/workflows/auto-merge.main.yaml b/.github/workflows/auto-merge.main.yaml deleted file mode 100644 index 8eb95ae42c..0000000000 --- a/.github/workflows/auto-merge.main.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: auto-merge-main - -on: - workflow_dispatch: {} - schedule: - - cron: "*/30 * * * *" # At every 30 minutes - -jobs: - auto-approve: - runs-on: ubuntu-latest - if: github.repository_owner == 'openshift-pipelines' # do not run this elsewhere - permissions: - pull-requests: write - steps: - - name: Checkout the current repo - uses: actions/checkout@v4 - - name: auto-merge-update-references - run: | - gh auth status - git config user.name openshift-pipelines-bot - git config user.email pipelines-extcomm@redhat.com - # Approve and merge pull-request with no reviews - for p in $(gh pr list --search "author:app/red-hat-konflux head:konflux/references/main" --json "number" | jq ".[].number"); do - gh pr merge --rebase --delete-branch --auto $p - done - env: - GH_TOKEN: ${{ secrets.OPENSHIFT_PIPELINES_ROBOT }} - - name: auto-merge-upstream-main - run: | - gh auth status - git config user.name openshift-pipelines-bot - git config user.email pipelines-extcomm@redhat.com - # Approve and merge pull-request with no reviews - for p in $(gh pr list --search "head:actions/update/sources-main" --json "number" | jq ".[].number"); do - gh pr merge --rebase --delete-branch --auto $p - done - env: - GH_TOKEN: ${{ secrets.OPENSHIFT_PIPELINES_ROBOT }} - diff --git a/.github/workflows/auto-merge.next.yaml b/.github/workflows/auto-merge.next.yaml deleted file mode 100644 index 83cab7248b..0000000000 --- a/.github/workflows/auto-merge.next.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: auto-merge-next - -on: - workflow_dispatch: {} - schedule: - - cron: "*/30 * * * *" # At every 30 minutes - -jobs: - auto-approve: - runs-on: ubuntu-latest - if: github.repository_owner == 'openshift-pipelines' # do not run this elsewhere - permissions: - pull-requests: write - steps: - - name: Checkout the current repo - uses: actions/checkout@v4 - - name: auto-merge-update-references - run: | - gh auth status - git config user.name openshift-pipelines-bot - git config user.email pipelines-extcomm@redhat.com - # Approve and merge pull-request with no reviews - for p in $(gh pr list --search "author:app/red-hat-konflux head:konflux/references/next" --json "number" | jq ".[].number"); do - gh pr merge --rebase --delete-branch --auto $p - done - env: - GH_TOKEN: ${{ secrets.OPENSHIFT_PIPELINES_ROBOT }} - - name: auto-merge-upstream-next - run: | - gh auth status - git config user.name openshift-pipelines-bot - git config user.email pipelines-extcomm@redhat.com - # Approve and merge pull-request with no reviews - for p in $(gh pr list --search "head:actions/update/sources-next" --json "number" | jq ".[].number"); do - gh pr merge --rebase --delete-branch --auto $p - done - env: - GH_TOKEN: ${{ secrets.OPENSHIFT_PIPELINES_ROBOT }} - diff --git a/.github/workflows/update-sources.main.yaml b/.github/workflows/update-sources.main.yaml deleted file mode 100644 index 155b83f9ff..0000000000 --- a/.github/workflows/update-sources.main.yaml +++ /dev/null @@ -1,73 +0,0 @@ -# Generated by openshift-pipelines/hack. DO NOT EDIT. -name: update-sources-main - -on: - workflow_dispatch: {} - schedule: - - cron: "30 */12 * * *" # At minute 30 past every 12th hour. - -jobs: - - update-sources: - runs-on: ubuntu-latest - if: github.repository_owner == 'openshift-pipelines' # do not run this elsewhere - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout the current repo - uses: actions/checkout@v4 - with: - ref: main - - - name: Clone tektoncd/chains - run: | - rm -fR upstream - git clone https://github.com/tektoncd/chains upstream - pushd upstream - git checkout -B main origin/main - popd - - - name: Commit new changes - run: | - git config user.name openshift-pipelines-bot - git config user.email pipelines-extcomm@redhat.com - git checkout -b actions/update/sources-main - pushd upstream - OLD_COMMIT=$(cat ../head) - NEW_COMMIT=$(git rev-parse HEAD) - echo Previous commit: ${OLD_COMMIT} - git show --stat ${OLD_COMMIT} - echo New commit: ${NEW_COMMIT} - git show --stat ${NEW_COMMIT} - git diff --stat ${NEW_COMMIT}..${OLD_COMMIT} > /tmp/diff.txt - git rev-parse HEAD > ../head - popd - rm -rf upstream/.git - git add upstream head .konflux - - if [[ -z $(git status --porcelain --untracked-files=no) ]]; then - echo "No change, exiting" - exit 0 - fi - - git commit -F- < /tmp/diff.txt - git rev-parse HEAD > ../head - popd - rm -rf upstream/.git - git add upstream head .konflux - - if [[ -z $(git status --porcelain --untracked-files=no) ]]; then - echo "No change, exiting" - exit 0 - fi - - git commit -F- < Date: Thu, 18 Sep 2025 17:14:32 +0300 Subject: [PATCH 3/5] Fix markdown code fence issue in AI prompts --- scripts/suggest_docs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/suggest_docs.py b/scripts/suggest_docs.py index 5b3d5b42cc..56c492819e 100644 --- a/scripts/suggest_docs.py +++ b/scripts/suggest_docs.py @@ -192,6 +192,8 @@ def ask_gemini_for_updated_content(diff, file_path, current_content): if is_markdown: format_instructions = """ CRITICAL FORMATTING REQUIREMENTS FOR MARKDOWN FILES: +- NEVER use markdown code fences like ```markdown or ``` to wrap the entire file content +- Markdown files start directly with content (comments, headers, or text) - Use standard Markdown syntax: # for headers, ``` for code blocks, | for tables - Maintain proper table structures with correct column alignment - Keep all links and references intact and properly formatted From 0e039a713c34a8592e24d7ee942fb6664ee0d769 Mon Sep 17 00:00:00 2001 From: csoceanu Date: Thu, 18 Sep 2025 17:27:52 +0300 Subject: [PATCH 4/5] Enhance markdown prompt to prevent formatting issues - Add rule for simple table separators: |---|---|---| (no backslashes) - Prevent extra backticks at end of file - Target specific issues seen in generated documentation --- scripts/suggest_docs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/suggest_docs.py b/scripts/suggest_docs.py index 56c492819e..73874272a0 100644 --- a/scripts/suggest_docs.py +++ b/scripts/suggest_docs.py @@ -195,6 +195,8 @@ def ask_gemini_for_updated_content(diff, file_path, current_content): - NEVER use markdown code fences like ```markdown or ``` to wrap the entire file content - Markdown files start directly with content (comments, headers, or text) - Use standard Markdown syntax: # for headers, ``` for code blocks, | for tables +- Table separators must be simple: |---|---|---| (no backslashes, no extra characters) +- Do NOT add extra ``` backticks at the end of the file - Maintain proper table structures with correct column alignment - Keep all links and references intact and properly formatted - Use consistent indentation and spacing From 1253fbacf0360da33a522bae967c9c77b1efb4fd Mon Sep 17 00:00:00 2001 From: csoceanu Date: Thu, 18 Sep 2025 17:34:54 +0300 Subject: [PATCH 5/5] Add retry, queue depth, and attestation metrics --- upstream/pkg/pipelinerunmetrics/metrics.go | 167 ++++++++++++++++++++- 1 file changed, 165 insertions(+), 2 deletions(-) diff --git a/upstream/pkg/pipelinerunmetrics/metrics.go b/upstream/pkg/pipelinerunmetrics/metrics.go index d3b855969e..32db8977ad 100644 --- a/upstream/pkg/pipelinerunmetrics/metrics.go +++ b/upstream/pkg/pipelinerunmetrics/metrics.go @@ -67,11 +67,63 @@ var ( stats.UnitSeconds) processTimeDurationView *view.View + + // Retry and failure metrics for reliability monitoring + retryCount = stats.Float64("pipelinerun_retry_count", + "Number of retries attempted for failed pipeline runs", + stats.UnitDimensionless) + + retryCountView *view.View + + // Queue depth metrics for throughput monitoring + queueDepth = stats.Float64("pipelinerun_queue_depth", + "Current number of pipeline runs waiting in queue", + stats.UnitDimensionless) + + queueDepthView *view.View + + // Attestation generation metrics + attestationGeneratedCount = stats.Float64("pipelinerun_attestation_generated_total", + "Total number of attestations generated for pipeline runs", + stats.UnitDimensionless) + + attestationGeneratedCountView *view.View +) + +// Metric recording constants for different operation types +const ( + // Retry operation types + RetryTypePipelineExecution = "pipeline_execution" + RetryTypeSignatureGeneration = "signature_generation" + RetryTypeAttestationUpload = "attestation_upload" + + // Queue depth measurement intervals (in seconds) + QueueDepthMeasurementInterval = 30 + + // Maximum recommended queue depth before alerting + MaxRecommendedQueueDepth = 100 ) +// MetricsConfig holds configuration for metrics collection +type MetricsConfig struct { + EnableDetailedMetrics bool + MeasurementInterval int + MaxQueueDepth int +} + +// DefaultMetricsConfig returns the default configuration for metrics +func DefaultMetricsConfig() *MetricsConfig { + return &MetricsConfig{ + EnableDetailedMetrics: true, + MeasurementInterval: QueueDepthMeasurementInterval, + MaxQueueDepth: MaxRecommendedQueueDepth, + } +} + // Recorder holds keys for Tekton metrics type Recorder struct { initialized bool + config *MetricsConfig } // We cannot register the view multiple times, so NewRecorder lazily @@ -90,6 +142,7 @@ func NewRecorder(ctx context.Context) (*Recorder, error) { once.Do(func() { r = &Recorder{ initialized: true, + config: DefaultMetricsConfig(), } errRegistering = viewRegister() if errRegistering != nil { @@ -97,6 +150,9 @@ func NewRecorder(ctx context.Context) (*Recorder, error) { logger.Errorf("View Register Failed ", r.initialized) return } + + logger.Debugf("Initialized metrics recorder with config: detailed=%v, interval=%d", + r.config.EnableDetailedMetrics, r.config.MeasurementInterval) }) return r, errRegistering @@ -139,6 +195,24 @@ func viewRegister() error { Aggregation: view.Distribution(0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0), } + retryCountView = &view.View{ + Description: retryCount.Description(), + Measure: retryCount, + Aggregation: view.Count(), + } + + queueDepthView = &view.View{ + Description: queueDepth.Description(), + Measure: queueDepth, + Aggregation: view.LastValue(), + } + + attestationGeneratedCountView = &view.View{ + Description: attestationGeneratedCount.Description(), + Measure: attestationGeneratedCount, + Aggregation: view.Count(), + } + return view.Register( sgCountView, plCountView, @@ -146,6 +220,9 @@ func viewRegister() error { mrCountView, errorCountView, processTimeDurationView, + retryCountView, + queueDepthView, + attestationGeneratedCountView, ) } @@ -225,6 +302,9 @@ func (r *Recorder) GetMetricsStatus(ctx context.Context) map[string]interface{} mrCountView.Name, errorCountView.Name, processTimeDurationView.Name, + retryCountView.Name, + queueDepthView.Name, + attestationGeneratedCountView.Name, } status["available_views"] = availableViews @@ -261,6 +341,16 @@ func (r *Recorder) RecordBatchMetrics(ctx context.Context, operations []MetricOp continue } r.RecordDurationMetrics(ctx, op.Value) + case "retry": + r.RecordRetryMetrics(ctx, op.MetricType) + case "queue": + if op.Value < 0 { + logger.Errorf("Invalid queue depth for operation %d: %f", i, op.Value) + continue + } + r.RecordQueueDepthMetrics(ctx, op.Value) + case "attestation": + r.RecordAttestationGeneratedMetrics(ctx) default: logger.Errorf("Unknown metric operation type: %s", op.Type) } @@ -270,9 +360,82 @@ func (r *Recorder) RecordBatchMetrics(ctx context.Context, operations []MetricOp return nil } +// RecordRetryMetrics records retry attempts for different operation types +func (r *Recorder) RecordRetryMetrics(ctx context.Context, retryType string) { + logger := logging.FromContext(ctx) + if !r.initialized { + logger.Errorf("Ignoring retry metrics recording as recorder not initialized") + return + } + + // Validate retry type + validTypes := []string{RetryTypePipelineExecution, RetryTypeSignatureGeneration, RetryTypeAttestationUpload} + isValid := false + for _, validType := range validTypes { + if retryType == validType { + isValid = true + break + } + } + + if !isValid { + logger.Warnf("Unknown retry type: %s, recording anyway", retryType) + } + + metrics.Record(ctx, retryCount.M(1)) + logger.Debugf("Recorded retry metric for type: %s", retryType) +} + +// RecordQueueDepthMetrics records the current queue depth for monitoring throughput +func (r *Recorder) RecordQueueDepthMetrics(ctx context.Context, currentDepth float64) { + logger := logging.FromContext(ctx) + if !r.initialized { + logger.Errorf("Ignoring queue depth metrics recording as recorder not initialized") + return + } + + if currentDepth < 0 { + logger.Errorf("Invalid queue depth: %f, must be non-negative", currentDepth) + return + } + + // Alert if queue depth exceeds recommended maximum + if r.config != nil && currentDepth > float64(r.config.MaxQueueDepth) { + logger.Warnf("Queue depth (%f) exceeds recommended maximum (%d)", currentDepth, r.config.MaxQueueDepth) + } + + metrics.Record(ctx, queueDepth.M(currentDepth)) + logger.Debugf("Recorded queue depth: %f", currentDepth) +} + +// RecordAttestationGeneratedMetrics records successful attestation generation +func (r *Recorder) RecordAttestationGeneratedMetrics(ctx context.Context) { + logger := logging.FromContext(ctx) + if !r.initialized { + logger.Errorf("Ignoring attestation metrics recording as recorder not initialized") + return + } + + metrics.Record(ctx, attestationGeneratedCount.M(1)) + logger.Debugf("Recorded attestation generation metric") +} + +// UpdateMetricsConfig updates the recorder's configuration +func (r *Recorder) UpdateMetricsConfig(ctx context.Context, config *MetricsConfig) { + logger := logging.FromContext(ctx) + if config == nil { + logger.Warn("Attempted to update with nil config, using default") + config = DefaultMetricsConfig() + } + + r.config = config + logger.Debugf("Updated metrics config: detailed=%v, interval=%d, maxQueue=%d", + config.EnableDetailedMetrics, config.MeasurementInterval, config.MaxQueueDepth) +} + // MetricOperation represents a single metrics operation for batch processing type MetricOperation struct { - Type string // "count", "error", or "duration" + Type string // "count", "error", "duration", "retry", "queue", "attestation" MetricType string // specific metric type identifier - Value float64 // value for duration metrics (ignored for count/error) + Value float64 // value for duration/queue metrics (ignored for count/error/retry/attestation) }