GoogleCloudPlatform · akangsha7 · Jun 24, 2026 · Jun 25, 2026 · helloeve · Jun 25, 2026
@@ -0,0 +1,124 @@
+# MCP endpoints to check against the Data Cloud MCP style guide.
+#
+# Mirrors cloud/databases/mcp/readability/monitored_endpoints.textproto
+# (EndpointList). The proto `type` maps to `environment`; all live endpoints are
+# fetched via the `mcp` source over Streamable HTTP using ADC auth.
+#
+# `defaults` provides values shared by all endpoints; each endpoint may override
+# any of them. `tools_source.type` makes the tool listing pluggable:
+#   - mcp:  fetch tools/list from a live MCP server (official mcp SDK). Needs
+#           network access and (for these googleapis.com endpoints) ADC auth.
+#   - file: read a local YAML/JSON tools spec (offline / deterministic).
+
+defaults:
+  token_budget: 200000
+  endpoint_type: REMOTE       # EndpointType: REMOTE | LOCAL
+  tools_source:
+    type: mcp                 # mcp | file
+    auth: google_credentials  # ADC bearer + x-goog-user-project
+
+endpoints:
+  - product_name: "AlloyDB for PostgreSQL"
+    endpoint_url: "https://alloydb.us-central1.rep.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "BigQuery"
+    endpoint_url: "https://bigquery.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "BigQuery Data Transfer Service"
+    endpoint_url: "https://bigquerydatatransfer.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "BigQuery Migration Service"
+    endpoint_url: "https://bigquerymigration.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Bigtable"
+    endpoint_url: "https://bigtableadmin.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Cloud SQL"
+    endpoint_url: "https://sqladmin.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Firestore"
+    endpoint_url: "https://firestore.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Spanner"
+    endpoint_url: "https://spanner.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Data Migration Service"
+    endpoint_url: "https://datamigration.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Datastream"
+    endpoint_url: "https://datastream.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Oracle Database"
+    endpoint_url: "https://oracledatabase.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Memory store for Redis/Redis Cluster"
+    endpoint_url: "https://redis.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Memory store for Valkey"
+    endpoint_url: "https://memorystore.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Pub/Sub"
+    endpoint_url: "https://pubsub.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Managed Kafka"
+    endpoint_url: "https://managedkafka.us-central1.rep.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Cloud Composer"
+    endpoint_url: "https://us-central1-staging-composer.sandbox.googleapis.com/mcp"
+    environment: STAGING
+  - product_name: "Cloud Composer"
+    endpoint_url: "https://composer.us-central1.rep.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Knowledge Catalog"
+    endpoint_url: "https://dataplex.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Knowledge Catalog Data Products"
+    endpoint_url: "https://staging-dataplex.sandbox.googleapis.com/mcp"
+    environment: STAGING
+
+  - product_name: "Database Insights (EGO for AlloyDB Tools)"
+    endpoint_url: "https://autopush-databaseinsights.sandbox.googleapis.com/mcp"
+    environment: AUTOPUSH
+
+  - product_name: "DB: AI Assisted Troubleshooting"
+    endpoint_url: "https://databaseinsights.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Cloud Storage MCP Server"
+    endpoint_url: "https://storage.googleapis.com/storage/mcp"
+    environment: PROD
+
+  - product_name: "Database Center API for MCP"
+    endpoint_url: "https://databasecenter.googleapis.com/mcp"
+    environment: PROD
+
+  - product_name: "Dataproc"
+    endpoint_url: "https://dataproc.googleapis.com/mcp"
+    environment: PROD
+
+  # --- Offline / deterministic sample (no network or auth) ------------------
+  # Uncomment to smoke-test the pipeline without hitting live endpoints.
+  # - product_name: "Sample Data Cloud MCP (local file)"
+  #   endpoint_url: "file://datasets/mcp_readability/sample_tools.yaml"
+  #   endpoint_type: LOCAL
+  #   environment: DEV
+  #   tools_source:
+  #     type: file
+  #     path: datasets/mcp_readability/sample_tools.yaml
@@ -0,0 +1,24 @@
+# Style-rule exceptions (waivers).
+#
+# Each entry waives one style `rule_id` for the endpoints it matches. A waived
+# rule is NOT counted toward p0/p1/p2 issues and is reported separately under
+# "waived" in the LLM feedback.
+#
+# Matchers (all that are present must match; an absent field or "*" = match-all):
+#   - endpoint_url
+#   - product_name
+#   - environment
+
+# `rule_id` should match the style guide's section heading text (the scorer uses
+# headings as rule IDs), e.g. "Tool Names", "Concise Descriptions",
+# "Use Enums", "Limit Options".
+exceptions:
+  # Example: waive the tool-naming rule for one specific endpoint.
+  - product_name: "Sample Data Cloud MCP (local file)"
+    rule_id: "Tool Names"
+    reason: "Sample fixture intentionally uses a non-compliant tool name (RunQuery)."
+
+  # Example: waive the enum rule for every autopush endpoint.
+  - environment: AUTOPUSH
+    rule_id: "Use Enums"
+    reason: "Autopush builds defer enum constraints until promotion to prod."
@@ -0,0 +1,4 @@
+generator: gcp_vertex_gemini
+vertex_model: gemini-2.5-pro
+base_prompt: ""
+execs_per_minute: 5
@@ -0,0 +1,43 @@
+############################################################
+### MCP Style-Guide Compliance (Readability) Run Config
+############################################################
+# Drives the `mcp_readability` orchestrator: fetch each endpoint's tools (man-page
+# markup), evaluate it against the style guide with an LLM, and write a compliance
+# CSV (and optionally append the rows to the eval BigQuery table).
+
+orchestrator: mcp_readability
+
+# Inputs
+endpoints_config: datasets/mcp_readability/endpoints.yaml
+style_guide: datasets/mcp_readability/style_guide.md
+exceptions_config: datasets/mcp_readability/exceptions.yaml          # optional
+tools_generator_config: datasets/mcp_readability/tools_generator.yaml
+
+# Default token budget for token_budget_used_percent (endpoints may override).
+token_budget: 200000
+
+# Optional: path to a prior compliance CSV; prior feedback is fed to the LLM to
+# keep findings consistent run-to-run.
+previous_results_csv: null
+
+# Optional: only check endpoints whose environment is in this list, e.g. [PROD].
+environments: []
+
+# LLM used by the compliance scorer.
+scorers:
+  mcp_style_compliance:
+    model_config: datasets/mcp_readability/model_configs/gemini_2.5_pro_model.yaml
+
+# Endpoint check concurrency.
+runners:
+  endpoint_runners: 4
+
+# Output: results/<job_id>/mcp_readability_compliance.csv
+# Add a `bigquery` block to also append the compliance rows to the standard eval
+# table (<gcp_project_id>.evalbench.results); the schema auto-evolves to add the
+# compliance columns. No new dataset/table is created.
+reporting:
+  csv:
+    output_directory: 'results'
+  # bigquery:
+  #   gcp_project_id: senseai-team
@@ -0,0 +1,37 @@
+# Sample MCP tools spec for offline, deterministic testing of the readability
+# check (tools_source.type: file). Intentionally mixes good and bad style so the
+# LLM has something to flag.
+tools:
+  - name: list_datasets
+    description: "List all BigQuery datasets in the given project. Use this to discover available datasets before querying."
+    inputSchema:
+      type: object
+      properties:
+        project_id:
+          type: string
+          description: "The Google Cloud project ID to list datasets from."
+      required: ["project_id"]
+
+  - name: RunQuery
+    description: "runs sql"
+    inputSchema:
+      type: object
+      properties:
+        q:
+          type: string
+
+  - name: delete_table
+    description: "Deletes a table."
+    inputSchema:
+      type: object
+      properties:
+        project_id:
+          type: string
+          description: "Project ID."
+        dataset_id:
+          type: string
+          description: "Dataset ID."
+        table_id:
+          type: string
+          description: "Table ID."
+      required: ["project_id", "dataset_id", "table_id"]