Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions datasets/mcp_readability/endpoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# MCP endpoints to check against the Data Cloud MCP style guide.
#
# Mirrors cloud/databases/mcp/readability/monitored_endpoints.textproto
# (EndpointList). The proto `type` maps to `environment`; all live endpoints are
# fetched via the `mcp` source over Streamable HTTP using ADC auth.
#
# `defaults` provides values shared by all endpoints; each endpoint may override
# any of them. `tools_source.type` makes the tool listing pluggable:
# - mcp: fetch tools/list from a live MCP server (official mcp SDK). Needs
# network access and (for these googleapis.com endpoints) ADC auth.
# - file: read a local YAML/JSON tools spec (offline / deterministic).

defaults:
token_budget: 200000
endpoint_type: REMOTE # EndpointType: REMOTE | LOCAL
tools_source:
type: mcp # mcp | file
auth: google_credentials # ADC bearer + x-goog-user-project

endpoints:
- product_name: "AlloyDB for PostgreSQL"
endpoint_url: "https://alloydb.us-central1.rep.googleapis.com/mcp"
environment: PROD

- product_name: "BigQuery"
endpoint_url: "https://bigquery.googleapis.com/mcp"
environment: PROD

- product_name: "BigQuery Data Transfer Service"
endpoint_url: "https://bigquerydatatransfer.googleapis.com/mcp"
environment: PROD

- product_name: "BigQuery Migration Service"
endpoint_url: "https://bigquerymigration.googleapis.com/mcp"
environment: PROD

- product_name: "Bigtable"
endpoint_url: "https://bigtableadmin.googleapis.com/mcp"
environment: PROD

- product_name: "Cloud SQL"
endpoint_url: "https://sqladmin.googleapis.com/mcp"
environment: PROD

- product_name: "Firestore"
endpoint_url: "https://firestore.googleapis.com/mcp"
environment: PROD

- product_name: "Spanner"
endpoint_url: "https://spanner.googleapis.com/mcp"
environment: PROD

- product_name: "Data Migration Service"
endpoint_url: "https://datamigration.googleapis.com/mcp"
environment: PROD

- product_name: "Datastream"
endpoint_url: "https://datastream.googleapis.com/mcp"
environment: PROD

- product_name: "Oracle Database"
endpoint_url: "https://oracledatabase.googleapis.com/mcp"
environment: PROD

- product_name: "Memory store for Redis/Redis Cluster"
endpoint_url: "https://redis.googleapis.com/mcp"
environment: PROD

- product_name: "Memory store for Valkey"
endpoint_url: "https://memorystore.googleapis.com/mcp"
environment: PROD

- product_name: "Pub/Sub"
endpoint_url: "https://pubsub.googleapis.com/mcp"
environment: PROD

- product_name: "Managed Kafka"
endpoint_url: "https://managedkafka.us-central1.rep.googleapis.com/mcp"
environment: PROD

- product_name: "Cloud Composer"
endpoint_url: "https://us-central1-staging-composer.sandbox.googleapis.com/mcp"
environment: STAGING
- product_name: "Cloud Composer"
endpoint_url: "https://composer.us-central1.rep.googleapis.com/mcp"
environment: PROD

- product_name: "Knowledge Catalog"
endpoint_url: "https://dataplex.googleapis.com/mcp"
environment: PROD

- product_name: "Knowledge Catalog Data Products"
endpoint_url: "https://staging-dataplex.sandbox.googleapis.com/mcp"
environment: STAGING

- product_name: "Database Insights (EGO for AlloyDB Tools)"
endpoint_url: "https://autopush-databaseinsights.sandbox.googleapis.com/mcp"
environment: AUTOPUSH

- product_name: "DB: AI Assisted Troubleshooting"
endpoint_url: "https://databaseinsights.googleapis.com/mcp"
environment: PROD

- product_name: "Cloud Storage MCP Server"
endpoint_url: "https://storage.googleapis.com/storage/mcp"
environment: PROD

- product_name: "Database Center API for MCP"
endpoint_url: "https://databasecenter.googleapis.com/mcp"
environment: PROD

- product_name: "Dataproc"
endpoint_url: "https://dataproc.googleapis.com/mcp"
environment: PROD

# --- Offline / deterministic sample (no network or auth) ------------------
# Uncomment to smoke-test the pipeline without hitting live endpoints.
# - product_name: "Sample Data Cloud MCP (local file)"
# endpoint_url: "file://datasets/mcp_readability/sample_tools.yaml"
# endpoint_type: LOCAL
# environment: DEV
# tools_source:
# type: file
# path: datasets/mcp_readability/sample_tools.yaml
24 changes: 24 additions & 0 deletions datasets/mcp_readability/exceptions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Style-rule exceptions (waivers).
#
# Each entry waives one style `rule_id` for the endpoints it matches. A waived
# rule is NOT counted toward p0/p1/p2 issues and is reported separately under
# "waived" in the LLM feedback.
#
# Matchers (all that are present must match; an absent field or "*" = match-all):
# - endpoint_url
# - product_name
# - environment

# `rule_id` should match the style guide's section heading text (the scorer uses
# headings as rule IDs), e.g. "Tool Names", "Concise Descriptions",
# "Use Enums", "Limit Options".
exceptions:
# Example: waive the tool-naming rule for one specific endpoint.
- product_name: "Sample Data Cloud MCP (local file)"
rule_id: "Tool Names"
reason: "Sample fixture intentionally uses a non-compliant tool name (RunQuery)."

# Example: waive the enum rule for every autopush endpoint.
- environment: AUTOPUSH
rule_id: "Use Enums"
reason: "Autopush builds defer enum constraints until promotion to prod."
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
generator: gcp_vertex_gemini
vertex_model: gemini-2.5-pro
base_prompt: ""
execs_per_minute: 5
43 changes: 43 additions & 0 deletions datasets/mcp_readability/run_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
############################################################
### MCP Style-Guide Compliance (Readability) Run Config
############################################################
# Drives the `mcp_readability` orchestrator: fetch each endpoint's tools (man-page
# markup), evaluate it against the style guide with an LLM, and write a compliance
# CSV (and optionally append the rows to the eval BigQuery table).

orchestrator: mcp_readability

# Inputs
endpoints_config: datasets/mcp_readability/endpoints.yaml
style_guide: datasets/mcp_readability/style_guide.md
exceptions_config: datasets/mcp_readability/exceptions.yaml # optional
tools_generator_config: datasets/mcp_readability/tools_generator.yaml

# Default token budget for token_budget_used_percent (endpoints may override).
token_budget: 200000

# Optional: path to a prior compliance CSV; prior feedback is fed to the LLM to
# keep findings consistent run-to-run.
previous_results_csv: null

# Optional: only check endpoints whose environment is in this list, e.g. [PROD].
environments: []

# LLM used by the compliance scorer.
scorers:
mcp_style_compliance:
model_config: datasets/mcp_readability/model_configs/gemini_2.5_pro_model.yaml

# Endpoint check concurrency.
runners:
endpoint_runners: 4

# Output: results/<job_id>/mcp_readability_compliance.csv
# Add a `bigquery` block to also append the compliance rows to the standard eval
# table (<gcp_project_id>.evalbench.results); the schema auto-evolves to add the
# compliance columns. No new dataset/table is created.
reporting:
csv:
output_directory: 'results'
# bigquery:
# gcp_project_id: senseai-team
37 changes: 37 additions & 0 deletions datasets/mcp_readability/sample_tools.yaml

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, this refers to the offline file-based approach; I think we should stick to raw json as input, otherwise it remains unclear how this file can be realistically constructed by a developer and aligned with the man page expected by the eval.

Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Sample MCP tools spec for offline, deterministic testing of the readability
# check (tools_source.type: file). Intentionally mixes good and bad style so the
# LLM has something to flag.
tools:
- name: list_datasets
description: "List all BigQuery datasets in the given project. Use this to discover available datasets before querying."
inputSchema:
type: object
properties:
project_id:
type: string
description: "The Google Cloud project ID to list datasets from."
required: ["project_id"]

- name: RunQuery
description: "runs sql"
inputSchema:
type: object
properties:
q:
type: string

- name: delete_table
description: "Deletes a table."
inputSchema:
type: object
properties:
project_id:
type: string
description: "Project ID."
dataset_id:
type: string
description: "Dataset ID."
table_id:
type: string
description: "Table ID."
required: ["project_id", "dataset_id", "table_id"]
Loading
Loading