From a7cedaef23140b5064128de23166ee9beb71bd9a Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Wed, 21 May 2025 23:53:19 +0800 Subject: [PATCH 01/26] feat: add mcp command group with create-mcp-proxy functionality --- src/datapilot/core/mcp_utils/mcp.py | 59 +++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 src/datapilot/core/mcp_utils/mcp.py diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py new file mode 100644 index 0000000..b520f7c --- /dev/null +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -0,0 +1,59 @@ +import asyncio +import logging +import json +import shutil + +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +import click + + +logging.basicConfig(level=logging.INFO) + +# New mcp group +@click.group() +def mcp(): + """mcp specific commands.""" + + +@mcp.command("create-mcp-proxy") +def create_mcp_proxy(): + content = click.edit() + if content is None: + click.echo("No input provided.") + + output = asyncio.run(list_tools()) + click.echo(json.dumps(output, indent=2)) + +async def list_tools(command: str, args: list[str], env: dict[str, str]) -> str: + command = shutil.which(command) + + # Create server parameters for stdio connection + server_params = StdioServerParameters( + command=command, # Executable + args=args, # Optional command line arguments + env=None, # Optional environment variables + ) + + async with stdio_client(server_params) as (read, write): + async with ClientSession( + read, write + ) as session: + # Initialize the connection + await session.initialize() + + # List available tools + tools = await session.list_tools() + + # print as json + tools_list = [ + { + "name": tool.name, + "description": tool.description, + "inputSchema": tool.inputSchema, + } + for tool in tools.tools + ] + + return tools_list + From f7a13561f0f4693e8bc0c508985303ac04d958f9 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Wed, 21 May 2025 23:54:52 +0800 Subject: [PATCH 02/26] feat: add MCP proxy config processing with inputs and servers --- src/datapilot/core/mcp_utils/mcp.py | 59 +++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index b520f7c..959fd54 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -21,9 +21,62 @@ def create_mcp_proxy(): content = click.edit() if content is None: click.echo("No input provided.") - - output = asyncio.run(list_tools()) - click.echo(json.dumps(output, indent=2)) + return + + try: + config_data = json.loads(content) + mcp_config = config_data.get("mcp", {}) + + # Process inputs + inputs = [] + for input_def in mcp_config.get("inputs", []): + inputs.append(InputParameter( + name=input_def.get("name", input_def["id"]), + type=input_def["type"], + required=input_def.get("required", False), + key=input_def["id"].lower(), + description=input_def["description"], + encrypted=input_def.get("password", False) + )) + + # Process servers + tool_config = [] + for server_name, server_def in mcp_config.get("servers", {}).items(): + # Command config + tool_config.append(IntegrationConfigItem( + key="command", + value=server_def["command"] + )) + + # Arguments config + tool_config.append(IntegrationConfigItem( + key="arguments", + value=server_def.get("args", []) + )) + + # Environment variables + env_items = [] + for var_name, var_value in server_def.get("env", {}).items(): + if var_value.startswith("${input:"): + _, input_id = var_value[2:-1].split(":") + var_value = f"${{{input_id}}}" + + env_items.append({"key": var_name, "value": var_value}) + + tool_config.append(IntegrationConfigItem( + key="env", + value=env_items + )) + + integration_model = CustomDatamateIntegrationModel( + config=inputs, + toolConfig=tool_config + ) + + click.echo(json.dumps(integration_model, cls=EnhancedJSONEncoder, indent=2)) + + except Exception as e: + click.echo(f"Error processing config: {str(e)}") async def list_tools(command: str, args: list[str], env: dict[str, str]) -> str: command = shutil.which(command) From 6e23a6a47641b551fa41f2aac257488918dcd08e Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Wed, 21 May 2025 23:56:30 +0800 Subject: [PATCH 03/26] feat: add MCP integration and tool listing --- .gitignore | 1 + setup.py | 1 + src/datapilot/cli/main.py | 2 + src/datapilot/core/mcp_utils/__init__.py | 2 + src/datapilot/core/mcp_utils/mcp.py | 59 ++---------------------- 5 files changed, 9 insertions(+), 56 deletions(-) create mode 100644 src/datapilot/core/mcp_utils/__init__.py diff --git a/.gitignore b/.gitignore index 77973dd..23bb0a1 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,4 @@ docs/_build # Mypy Cache .mypy_cache/ +.aider* diff --git a/setup.py b/setup.py index 28859c9..ff91f49 100644 --- a/setup.py +++ b/setup.py @@ -68,6 +68,7 @@ def read(*names, **kwargs): "tabulate~=0.9.0", "requests>=2.31", "sqlglot~=25.30.0", + "mcp~=1.9.0" ], extras_require={ # eg: diff --git a/src/datapilot/cli/main.py b/src/datapilot/cli/main.py index 9e0cfb3..f0f1796 100644 --- a/src/datapilot/cli/main.py +++ b/src/datapilot/cli/main.py @@ -1,5 +1,6 @@ import click +from datapilot.core.mcp_utils.mcp import mcp from datapilot.core.platforms.dbt.cli.cli import dbt @@ -9,3 +10,4 @@ def datapilot(): datapilot.add_command(dbt) +datapilot.add_command(mcp) diff --git a/src/datapilot/core/mcp_utils/__init__.py b/src/datapilot/core/mcp_utils/__init__.py new file mode 100644 index 0000000..5216d6d --- /dev/null +++ b/src/datapilot/core/mcp_utils/__init__.py @@ -0,0 +1,2 @@ +DBT = [] +SQL = [] diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 959fd54..b520f7c 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -21,62 +21,9 @@ def create_mcp_proxy(): content = click.edit() if content is None: click.echo("No input provided.") - return - - try: - config_data = json.loads(content) - mcp_config = config_data.get("mcp", {}) - - # Process inputs - inputs = [] - for input_def in mcp_config.get("inputs", []): - inputs.append(InputParameter( - name=input_def.get("name", input_def["id"]), - type=input_def["type"], - required=input_def.get("required", False), - key=input_def["id"].lower(), - description=input_def["description"], - encrypted=input_def.get("password", False) - )) - - # Process servers - tool_config = [] - for server_name, server_def in mcp_config.get("servers", {}).items(): - # Command config - tool_config.append(IntegrationConfigItem( - key="command", - value=server_def["command"] - )) - - # Arguments config - tool_config.append(IntegrationConfigItem( - key="arguments", - value=server_def.get("args", []) - )) - - # Environment variables - env_items = [] - for var_name, var_value in server_def.get("env", {}).items(): - if var_value.startswith("${input:"): - _, input_id = var_value[2:-1].split(":") - var_value = f"${{{input_id}}}" - - env_items.append({"key": var_name, "value": var_value}) - - tool_config.append(IntegrationConfigItem( - key="env", - value=env_items - )) - - integration_model = CustomDatamateIntegrationModel( - config=inputs, - toolConfig=tool_config - ) - - click.echo(json.dumps(integration_model, cls=EnhancedJSONEncoder, indent=2)) - - except Exception as e: - click.echo(f"Error processing config: {str(e)}") + + output = asyncio.run(list_tools()) + click.echo(json.dumps(output, indent=2)) async def list_tools(command: str, args: list[str], env: dict[str, str]) -> str: command = shutil.which(command) From 812c1f81c2f4ae053cfa47dc48e14a3149c88c95 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 00:00:36 +0800 Subject: [PATCH 04/26] feat: add JSON config parsing with input token replacement --- src/datapilot/core/mcp_utils/mcp.py | 71 +++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index b520f7c..f049df7 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -21,32 +21,65 @@ def create_mcp_proxy(): content = click.edit() if content is None: click.echo("No input provided.") + return - output = asyncio.run(list_tools()) - click.echo(json.dumps(output, indent=2)) + try: + config = json.loads(content) + except json.JSONDecodeError: + click.echo("Invalid JSON content.") + return -async def list_tools(command: str, args: list[str], env: dict[str, str]) -> str: - command = shutil.which(command) + inputs = {} + mcp_config = config.get("mcp", {}) + + # Process inputs first + for input_def in mcp_config.get("inputs", []): + input_id = input_def["id"] + inputs[input_id] = click.prompt( + input_def.get("description", input_id), + hide_input=input_def.get("password", False) + ) + + # Process servers + servers = mcp_config.get("servers", {}) + for server_name, server_config in servers.items(): + # Replace input tokens in args + processed_args = [ + inputs.get(arg[8:-1], arg) if isinstance(arg, str) and arg.startswith("${input:") else arg + for arg in server_config.get("args", []) + ] + + # Replace input tokens in environment variables + processed_env = { + k: inputs.get(v[8:-1], v) if isinstance(v, str) and v.startswith("${input:") else v + for k, v in server_config.get("env", {}).items() + } + + # Execute with processed parameters + output = asyncio.run(list_tools( + command=server_config["command"], + args=processed_args, + env=processed_env + )) + click.echo(f"\nServer: {server_name}") + click.echo(json.dumps(output, indent=2)) + +async def list_tools(command: str, args: list[str], env: dict[str, str]): + command_path = shutil.which(command) + if not command_path: + raise click.UsageError(f"Command not found: {command}") - # Create server parameters for stdio connection server_params = StdioServerParameters( - command=command, # Executable - args=args, # Optional command line arguments - env=None, # Optional environment variables + command=command_path, + args=args, + env=env, # Now using processed env ) - + async with stdio_client(server_params) as (read, write): - async with ClientSession( - read, write - ) as session: - # Initialize the connection + async with ClientSession(read, write) as session: await session.initialize() - - # List available tools tools = await session.list_tools() - - # print as json - tools_list = [ + return [ { "name": tool.name, "description": tool.description, @@ -55,5 +88,3 @@ async def list_tools(command: str, args: list[str], env: dict[str, str]) -> str: for tool in tools.tools ] - return tools_list - From 55056b67abcf7913a936b2b0c9a40e1dff9a9d1e Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Thu, 22 May 2025 00:10:28 +0800 Subject: [PATCH 05/26] refactor: rename command and improve tool list structure --- src/datapilot/core/mcp_utils/mcp.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index f049df7..885121e 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -7,7 +7,6 @@ from mcp.client.stdio import stdio_client import click - logging.basicConfig(level=logging.INFO) # New mcp group @@ -16,7 +15,7 @@ def mcp(): """mcp specific commands.""" -@mcp.command("create-mcp-proxy") +@mcp.command("inspect-mcp-server") def create_mcp_proxy(): content = click.edit() if content is None: @@ -31,7 +30,7 @@ def create_mcp_proxy(): inputs = {} mcp_config = config.get("mcp", {}) - + # Process inputs first for input_def in mcp_config.get("inputs", []): input_id = input_def["id"] @@ -45,7 +44,7 @@ def create_mcp_proxy(): for server_name, server_config in servers.items(): # Replace input tokens in args processed_args = [ - inputs.get(arg[8:-1], arg) if isinstance(arg, str) and arg.startswith("${input:") else arg + inputs.get(arg[8:-1], arg) if isinstance(arg, str) and arg.startswith("${input:") else arg for arg in server_config.get("args", []) ] @@ -74,12 +73,12 @@ async def list_tools(command: str, args: list[str], env: dict[str, str]): args=args, env=env, # Now using processed env ) - + async with stdio_client(server_params) as (read, write): async with ClientSession(read, write) as session: await session.initialize() tools = await session.list_tools() - return [ + mcp_tools = [ { "name": tool.name, "description": tool.description, @@ -88,3 +87,6 @@ async def list_tools(command: str, args: list[str], env: dict[str, str]): for tool in tools.tools ] + return { + "tools": mcp_tools, + } From 7eb88f2851436de23786c4e324af1630edd3ced6 Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Thu, 22 May 2025 00:11:23 +0800 Subject: [PATCH 06/26] chore: format and organize imports in mcp utils --- setup.py | 2 +- src/datapilot/core/mcp_utils/mcp.py | 20 ++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/setup.py b/setup.py index ff91f49..895bf14 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ def read(*names, **kwargs): "tabulate~=0.9.0", "requests>=2.31", "sqlglot~=25.30.0", - "mcp~=1.9.0" + "mcp~=1.9.0", ], extras_require={ # eg: diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 885121e..a43a98c 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -1,14 +1,16 @@ import asyncio -import logging import json +import logging import shutil -from mcp import ClientSession, StdioServerParameters -from mcp.client.stdio import stdio_client import click +from mcp import ClientSession +from mcp import StdioServerParameters +from mcp.client.stdio import stdio_client logging.basicConfig(level=logging.INFO) + # New mcp group @click.group() def mcp(): @@ -34,10 +36,7 @@ def create_mcp_proxy(): # Process inputs first for input_def in mcp_config.get("inputs", []): input_id = input_def["id"] - inputs[input_id] = click.prompt( - input_def.get("description", input_id), - hide_input=input_def.get("password", False) - ) + inputs[input_id] = click.prompt(input_def.get("description", input_id), hide_input=input_def.get("password", False)) # Process servers servers = mcp_config.get("servers", {}) @@ -55,14 +54,11 @@ def create_mcp_proxy(): } # Execute with processed parameters - output = asyncio.run(list_tools( - command=server_config["command"], - args=processed_args, - env=processed_env - )) + output = asyncio.run(list_tools(command=server_config["command"], args=processed_args, env=processed_env)) click.echo(f"\nServer: {server_name}") click.echo(json.dumps(output, indent=2)) + async def list_tools(command: str, args: list[str], env: dict[str, str]): command_path = shutil.which(command) if not command_path: From 9b3979fa04e53739e876e370199c54b9971aa4bb Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 09:53:18 +0800 Subject: [PATCH 07/26] feat: dynamically discover input tokens from config --- src/datapilot/core/mcp_utils/mcp.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index a43a98c..ea93c7e 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -10,6 +10,18 @@ logging.basicConfig(level=logging.INFO) +def find_input_tokens(data): + tokens = set() + if isinstance(data, list): + for item in data: + tokens.update(find_input_tokens(item)) + elif isinstance(data, dict): + for value in data.values(): + tokens.update(find_input_tokens(value)) + elif isinstance(data, str) and data.startswith("${input:"): + tokens.add(data[8:-1].strip()) + return tokens + # New mcp group @click.group() @@ -33,10 +45,18 @@ def create_mcp_proxy(): inputs = {} mcp_config = config.get("mcp", {}) - # Process inputs first - for input_def in mcp_config.get("inputs", []): - input_id = input_def["id"] - inputs[input_id] = click.prompt(input_def.get("description", input_id), hide_input=input_def.get("password", False)) + # Collect all input IDs from config and server templates + input_ids = set() + input_ids.update(find_input_tokens(mcp_config.get("servers", {}))) + input_ids.update(input_def["id"] for input_def in mcp_config.get("inputs", [])) + + # Create prompt definitions merging config and discovered inputs + for input_id in input_ids: + input_def = next((d for d in mcp_config.get("inputs", []) if d["id"] == input_id), {}) + inputs[input_id] = click.prompt( + input_def.get("description", input_id), + hide_input=input_def.get("password", False) + ) # Process servers servers = mcp_config.get("servers", {}) From 3aa8e71cbe2030a02cdd0c0d56aab05ed64cc6dd Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 09:58:50 +0800 Subject: [PATCH 08/26] feat: include server name in JSON output --- src/datapilot/core/mcp_utils/mcp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index ea93c7e..c32edd2 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -75,8 +75,8 @@ def create_mcp_proxy(): # Execute with processed parameters output = asyncio.run(list_tools(command=server_config["command"], args=processed_args, env=processed_env)) - click.echo(f"\nServer: {server_name}") - click.echo(json.dumps(output, indent=2)) + output_with_name = {"name": server_name, **output} + click.echo(json.dumps(output_with_name, indent=2)) async def list_tools(command: str, args: list[str], env: dict[str, str]): From 4cf1404f19d2ddf1ecfc25fb9420287c68829b0d Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:00:45 +0800 Subject: [PATCH 09/26] feat: prompt user to select server when multiple configured --- src/datapilot/core/mcp_utils/mcp.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index c32edd2..cd2d5aa 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -58,9 +58,24 @@ def create_mcp_proxy(): hide_input=input_def.get("password", False) ) - # Process servers + # Select server servers = mcp_config.get("servers", {}) - for server_name, server_config in servers.items(): + server_names = list(servers.keys()) + + if not server_names: + raise click.UsageError("No servers configured in mcp config") + + if len(server_names) > 1: + server_name = click.prompt( + "Choose a server", + type=click.Choice(server_names), + show_choices=True + ) + else: + server_name = server_names[0] + + if server_name in servers: + server_config = servers[server_name] # Replace input tokens in args processed_args = [ inputs.get(arg[8:-1], arg) if isinstance(arg, str) and arg.startswith("${input:") else arg From dc45a67673e536a0bffd251b5c6cbac9feec30f6 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:04:57 +0800 Subject: [PATCH 10/26] feat: add clipboard support for JSON output --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 895bf14..190e49f 100644 --- a/setup.py +++ b/setup.py @@ -69,6 +69,7 @@ def read(*names, **kwargs): "requests>=2.31", "sqlglot~=25.30.0", "mcp~=1.9.0", + "pyperclip~=1.8.2", ], extras_require={ # eg: From 969f6cc400936c8796d7b0de92cfebe0f7aad162 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:11:18 +0800 Subject: [PATCH 11/26] feat: prompt only for inputs used by selected server --- src/datapilot/core/mcp_utils/mcp.py | 30 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index cd2d5aa..ee86c8a 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -45,19 +45,6 @@ def create_mcp_proxy(): inputs = {} mcp_config = config.get("mcp", {}) - # Collect all input IDs from config and server templates - input_ids = set() - input_ids.update(find_input_tokens(mcp_config.get("servers", {}))) - input_ids.update(input_def["id"] for input_def in mcp_config.get("inputs", [])) - - # Create prompt definitions merging config and discovered inputs - for input_id in input_ids: - input_def = next((d for d in mcp_config.get("inputs", []) if d["id"] == input_id), {}) - inputs[input_id] = click.prompt( - input_def.get("description", input_id), - hide_input=input_def.get("password", False) - ) - # Select server servers = mcp_config.get("servers", {}) server_names = list(servers.keys()) @@ -76,6 +63,23 @@ def create_mcp_proxy(): if server_name in servers: server_config = servers[server_name] + + # Collect input tokens ONLY from this server's config + input_ids = find_input_tokens(server_config.get("args", [])) + input_ids.update(find_input_tokens(server_config.get("env", {}))) + + # Create prompt definitions using BOTH discovered tokens AND configured inputs + existing_input_ids = {i["id"] for i in mcp_config.get("inputs", [])} + inputs_to_prompt = input_ids.intersection(existing_input_ids) + inputs_to_prompt.update(input_ids) # Add any undiscovered-by-config inputs + + for input_id in inputs_to_prompt: + input_def = next((d for d in mcp_config.get("inputs", []) if d["id"] == input_id), {}) + inputs[input_id] = click.prompt( + input_def.get("description", input_id), + hide_input=input_def.get("password", False), + ) + # Replace input tokens in args processed_args = [ inputs.get(arg[8:-1], arg) if isinstance(arg, str) and arg.startswith("${input:") else arg From 1fcb811b1f49f58440d17fded2247f4cfb2391f6 Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Thu, 22 May 2025 10:16:30 +0800 Subject: [PATCH 12/26] feat: add InputParameter dataclass for MCP utils --- src/datapilot/core/mcp_utils/mcp.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index ee86c8a..eb5f625 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -2,6 +2,7 @@ import json import logging import shutil +from dataclasses import dataclass import click from mcp import ClientSession @@ -10,6 +11,14 @@ logging.basicConfig(level=logging.INFO) +@dataclass +class InputParameter(): + name: str + type: str + required: bool + key: str + description: str + def find_input_tokens(data): tokens = set() if isinstance(data, list): @@ -48,10 +57,10 @@ def create_mcp_proxy(): # Select server servers = mcp_config.get("servers", {}) server_names = list(servers.keys()) - + if not server_names: raise click.UsageError("No servers configured in mcp config") - + if len(server_names) > 1: server_name = click.prompt( "Choose a server", @@ -60,7 +69,7 @@ def create_mcp_proxy(): ) else: server_name = server_names[0] - + if server_name in servers: server_config = servers[server_name] From 470ad9d481be355bce4818d57c9adc0620265b82 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:18:21 +0800 Subject: [PATCH 13/26] feat: add input parameters config to command output --- src/datapilot/core/mcp_utils/mcp.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index eb5f625..6af4ea2 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -82,12 +82,21 @@ def create_mcp_proxy(): inputs_to_prompt = input_ids.intersection(existing_input_ids) inputs_to_prompt.update(input_ids) # Add any undiscovered-by-config inputs + input_configs = [] for input_id in inputs_to_prompt: input_def = next((d for d in mcp_config.get("inputs", []) if d["id"] == input_id), {}) inputs[input_id] = click.prompt( input_def.get("description", input_id), hide_input=input_def.get("password", False), ) + # Create InputParameters config entry + input_configs.append(InputParameter( + name=input_def.get("name", input_id), + type=input_def.get("type", "string"), + required=input_def.get("required", True), + key=input_id, + description=input_def.get("description", "") + ).__dict__) # Replace input tokens in args processed_args = [ @@ -103,7 +112,11 @@ def create_mcp_proxy(): # Execute with processed parameters output = asyncio.run(list_tools(command=server_config["command"], args=processed_args, env=processed_env)) - output_with_name = {"name": server_name, **output} + output_with_name = { + "name": server_name, + "config": input_configs, + **output + } click.echo(json.dumps(output_with_name, indent=2)) From 5c9b5fa9eeac47364b8d3b9e018a29cb05ced039 Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Thu, 22 May 2025 10:25:56 +0800 Subject: [PATCH 14/26] refactor: hardcode password input settings in mcp proxy --- src/datapilot/core/mcp_utils/mcp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 6af4ea2..23cd3e0 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -87,13 +87,13 @@ def create_mcp_proxy(): input_def = next((d for d in mcp_config.get("inputs", []) if d["id"] == input_id), {}) inputs[input_id] = click.prompt( input_def.get("description", input_id), - hide_input=input_def.get("password", False), + hide_input=True, ) # Create InputParameters config entry input_configs.append(InputParameter( name=input_def.get("name", input_id), - type=input_def.get("type", "string"), - required=input_def.get("required", True), + type="password", + required=True, key=input_id, description=input_def.get("description", "") ).__dict__) From 4304503a0b6f19f81fa847d820ff82c509b663b4 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:26:00 +0800 Subject: [PATCH 15/26] feat: copy JSON output to clipboard with notification --- src/datapilot/core/mcp_utils/mcp.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 23cd3e0..98a249b 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -5,6 +5,7 @@ from dataclasses import dataclass import click +import pyperclip from mcp import ClientSession from mcp import StdioServerParameters from mcp.client.stdio import stdio_client @@ -117,7 +118,13 @@ def create_mcp_proxy(): "config": input_configs, **output } - click.echo(json.dumps(output_with_name, indent=2)) + output_json = json.dumps(output_with_name, indent=2) + click.echo(output_json) + try: + pyperclip.copy(output_json) + click.secho("\nOutput copied to clipboard!", fg="green") + except pyperclip.PyperclipException as e: + click.secho(f"\nFailed to copy to clipboard: {str(e)}", fg="yellow") async def list_tools(command: str, args: list[str], env: dict[str, str]): From f1bbb3a526263b116deff1edceb2e5be5c88aaca Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:31:27 +0800 Subject: [PATCH 16/26] feat: improve MCP server config handling and error reporting --- src/datapilot/core/mcp_utils/mcp.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 98a249b..ca14464 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -56,11 +56,14 @@ def create_mcp_proxy(): mcp_config = config.get("mcp", {}) # Select server - servers = mcp_config.get("servers", {}) + # Support both "servers" and "mcpServers" naming conventions + servers = mcp_config.get("mcpServers", mcp_config.get("servers", {})) server_names = list(servers.keys()) if not server_names: - raise click.UsageError("No servers configured in mcp config") + ctx = click.get_current_context() + click.secho("Error: No servers configured in mcp config (tried keys: 'mcpServers' and 'servers')", fg="red") + ctx.exit(1) if len(server_names) > 1: server_name = click.prompt( From 3d142a8bd1753acb43b110edfaa5f702d25bfabe Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Thu, 22 May 2025 10:36:03 +0800 Subject: [PATCH 17/26] feat: add error handling for MCP server connection --- src/datapilot/core/mcp_utils/mcp.py | 45 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index ca14464..6a72ac8 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -135,25 +135,28 @@ async def list_tools(command: str, args: list[str], env: dict[str, str]): if not command_path: raise click.UsageError(f"Command not found: {command}") - server_params = StdioServerParameters( - command=command_path, - args=args, - env=env, # Now using processed env - ) - - async with stdio_client(server_params) as (read, write): - async with ClientSession(read, write) as session: - await session.initialize() - tools = await session.list_tools() - mcp_tools = [ - { - "name": tool.name, - "description": tool.description, - "inputSchema": tool.inputSchema, - } - for tool in tools.tools - ] + try: + server_params = StdioServerParameters( + command=command_path, + args=args, + env=env, # Now using processed env + ) - return { - "tools": mcp_tools, - } + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + tools = await session.list_tools() + mcp_tools = [ + { + "name": tool.name, + "description": tool.description, + "inputSchema": tool.inputSchema, + } + for tool in tools.tools + ] + + return { + "tools": mcp_tools, + } + except Exception as e: + raise click.UsageError("Could not connect to MCP server: " + str(e)) From df5385f76137d40fe5210312cff099ef958b700f Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:43:00 +0800 Subject: [PATCH 18/26] feat: add command args and env with config placeholders to output --- src/datapilot/core/mcp_utils/mcp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 6a72ac8..9dc3275 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -116,9 +116,20 @@ def create_mcp_proxy(): # Execute with processed parameters output = asyncio.run(list_tools(command=server_config["command"], args=processed_args, env=processed_env)) + # Add processed parameters to output output_with_name = { "name": server_name, "config": input_configs, + "command": server_config["command"], + "args": [ + f"${{{input_id}}}" if any(input_val in str(arg) for input_val in inputs.values()) else arg + for arg, input_id in zip(server_config.get("args", []), inputs.keys()) + ], + "env": { + k: f"${{{input_id}}}" if any(input_val in str(v) for input_val in inputs.values()) else v + for k, v in server_config.get("env", {}).items() + for input_id in inputs + }, **output } output_json = json.dumps(output_with_name, indent=2) From 51ae30f9a866f07f3fff290af6b108d3dbb10f20 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:46:39 +0800 Subject: [PATCH 19/26] feat: validate MCP server type is stdio --- src/datapilot/core/mcp_utils/mcp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 9dc3275..0920484 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -147,10 +147,15 @@ async def list_tools(command: str, args: list[str], env: dict[str, str]): raise click.UsageError(f"Command not found: {command}") try: + # Only support stdio server type + server_type = server_config.get("type", "stdio") + if server_type != "stdio": + raise click.UsageError(f"Only stdio MCP servers are supported. Found type: {server_type}") + server_params = StdioServerParameters( command=command_path, args=args, - env=env, # Now using processed env + env=env, ) async with stdio_client(server_params) as (read, write): From 14b7e1c952c32c77e44896d00b448c62d1b2d037 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:49:07 +0800 Subject: [PATCH 20/26] fix: add server_config param to list_tools function --- src/datapilot/core/mcp_utils/mcp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 0920484..ab6e704 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -115,7 +115,7 @@ def create_mcp_proxy(): } # Execute with processed parameters - output = asyncio.run(list_tools(command=server_config["command"], args=processed_args, env=processed_env)) + output = asyncio.run(list_tools(server_config=server_config, command=server_config["command"], args=processed_args, env=processed_env)) # Add processed parameters to output output_with_name = { "name": server_name, @@ -141,7 +141,7 @@ def create_mcp_proxy(): click.secho(f"\nFailed to copy to clipboard: {str(e)}", fg="yellow") -async def list_tools(command: str, args: list[str], env: dict[str, str]): +async def list_tools(server_config: dict, command: str, args: list[str], env: dict[str, str]): command_path = shutil.which(command) if not command_path: raise click.UsageError(f"Command not found: {command}") From a8bb1366e25f45995494137ea22d0e8be102bd73 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 10:58:25 +0800 Subject: [PATCH 21/26] fix: normalize input tokens and preserve all server config args/env --- src/datapilot/core/mcp_utils/mcp.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index ab6e704..3687db8 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -122,13 +122,12 @@ def create_mcp_proxy(): "config": input_configs, "command": server_config["command"], "args": [ - f"${{{input_id}}}" if any(input_val in str(arg) for input_val in inputs.values()) else arg - for arg, input_id in zip(server_config.get("args", []), inputs.keys()) + arg.replace("${input:", "${") if isinstance(arg, str) else arg + for arg in server_config.get("args", []) ], "env": { - k: f"${{{input_id}}}" if any(input_val in str(v) for input_val in inputs.values()) else v + k: v.replace("${input:", "${") if isinstance(v, str) else v for k, v in server_config.get("env", {}).items() - for input_id in inputs }, **output } From 8981ac31591805c982eb39f58fdc89c86be3e052 Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Thu, 22 May 2025 11:02:39 +0800 Subject: [PATCH 22/26] refactor: simplify code formatting and improve string formatting --- src/datapilot/core/mcp_utils/mcp.py | 44 +++++++++++++---------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 3687db8..02a5666 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -12,14 +12,16 @@ logging.basicConfig(level=logging.INFO) + @dataclass -class InputParameter(): +class InputParameter: name: str type: str required: bool key: str description: str + def find_input_tokens(data): tokens = set() if isinstance(data, list): @@ -66,11 +68,7 @@ def create_mcp_proxy(): ctx.exit(1) if len(server_names) > 1: - server_name = click.prompt( - "Choose a server", - type=click.Choice(server_names), - show_choices=True - ) + server_name = click.prompt("Choose a server", type=click.Choice(server_names), show_choices=True) else: server_name = server_names[0] @@ -94,13 +92,15 @@ def create_mcp_proxy(): hide_input=True, ) # Create InputParameters config entry - input_configs.append(InputParameter( - name=input_def.get("name", input_id), - type="password", - required=True, - key=input_id, - description=input_def.get("description", "") - ).__dict__) + input_configs.append( + InputParameter( + name=input_def.get("name", input_id), + type="password", + required=True, + key=input_id, + description=input_def.get("description", ""), + ).__dict__ + ) # Replace input tokens in args processed_args = [ @@ -115,21 +115,17 @@ def create_mcp_proxy(): } # Execute with processed parameters - output = asyncio.run(list_tools(server_config=server_config, command=server_config["command"], args=processed_args, env=processed_env)) + output = asyncio.run( + list_tools(server_config=server_config, command=server_config["command"], args=processed_args, env=processed_env) + ) # Add processed parameters to output output_with_name = { "name": server_name, "config": input_configs, "command": server_config["command"], - "args": [ - arg.replace("${input:", "${") if isinstance(arg, str) else arg - for arg in server_config.get("args", []) - ], - "env": { - k: v.replace("${input:", "${") if isinstance(v, str) else v - for k, v in server_config.get("env", {}).items() - }, - **output + "args": [arg.replace("${input:", "${") if isinstance(arg, str) else arg for arg in server_config.get("args", [])], + "env": {k: v.replace("${input:", "${") if isinstance(v, str) else v for k, v in server_config.get("env", {}).items()}, + **output, } output_json = json.dumps(output_with_name, indent=2) click.echo(output_json) @@ -137,7 +133,7 @@ def create_mcp_proxy(): pyperclip.copy(output_json) click.secho("\nOutput copied to clipboard!", fg="green") except pyperclip.PyperclipException as e: - click.secho(f"\nFailed to copy to clipboard: {str(e)}", fg="yellow") + click.secho(f"\nFailed to copy to clipboard: {e!s}", fg="yellow") async def list_tools(server_config: dict, command: str, args: list[str], env: dict[str, str]): From 4b6f47e233ecd3201fdc6e33d18a2736c0047267 Mon Sep 17 00:00:00 2001 From: Saravanan S Date: Thu, 22 May 2025 09:25:29 +0530 Subject: [PATCH 23/26] fix: env variables as array --- src/datapilot/core/mcp_utils/mcp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index 02a5666..fe00e0d 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -124,7 +124,10 @@ def create_mcp_proxy(): "config": input_configs, "command": server_config["command"], "args": [arg.replace("${input:", "${") if isinstance(arg, str) else arg for arg in server_config.get("args", [])], - "env": {k: v.replace("${input:", "${") if isinstance(v, str) else v for k, v in server_config.get("env", {}).items()}, + "env": [ + {"key": k, "value": v.replace("${input:", "${") if isinstance(v, str) else v} + for k, v in server_config.get("env", {}).items() + ], **output, } output_json = json.dumps(output_with_name, indent=2) From 3e0add3b9f17d0018ffef9c0ee20964d1fe8c969 Mon Sep 17 00:00:00 2001 From: "Michiel De Smet (aider)" Date: Thu, 22 May 2025 13:30:49 +0800 Subject: [PATCH 24/26] fix: Add exception chaining in mcp.py and make setup.py executable --- src/datapilot/core/mcp_utils/mcp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datapilot/core/mcp_utils/mcp.py b/src/datapilot/core/mcp_utils/mcp.py index fe00e0d..7c2481a 100644 --- a/src/datapilot/core/mcp_utils/mcp.py +++ b/src/datapilot/core/mcp_utils/mcp.py @@ -173,4 +173,4 @@ async def list_tools(server_config: dict, command: str, args: list[str], env: di "tools": mcp_tools, } except Exception as e: - raise click.UsageError("Could not connect to MCP server: " + str(e)) + raise click.UsageError("Could not connect to MCP server: " + str(e)) from e From 9087a20cb6e97a2eb7ad0833841b67df09a7eb6e Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Thu, 22 May 2025 15:03:26 +0800 Subject: [PATCH 25/26] ci: remove Python 3.9 and PyPy3.9 test jobs --- .github/workflows/github-actions.yml | 60 ---------------------------- 1 file changed, 60 deletions(-) diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml index 1478e85..57ec427 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/github-actions.yml @@ -19,42 +19,6 @@ jobs: toxpython: 'python3.11' tox_env: 'docs' os: 'ubuntu-latest' - - name: 'py39-pydantic28-cover (ubuntu)' - python: '3.9' - toxpython: 'python3.9' - python_arch: 'x64' - tox_env: 'py39-pydantic28-cover' - os: 'ubuntu-latest' - - name: 'py39-pydantic28-cover (windows)' - python: '3.9' - toxpython: 'python3.9' - python_arch: 'x64' - tox_env: 'py39-pydantic28-cover' - os: 'windows-latest' - - name: 'py39-pydantic28-cover (macos)' - python: '3.9' - toxpython: 'python3.9' - python_arch: 'x64' - tox_env: 'py39-pydantic28-cover' - os: 'macos-13' - - name: 'py39-pydantic210-cover' - python: '3.9' - toxpython: 'python3.9' - python_arch: 'x64' - tox_env: 'py39-pydantic210-cover' - os: 'ubuntu-latest' - - name: 'py39-pydantic28-nocov' - python: '3.9' - toxpython: 'python3.9' - python_arch: 'x64' - tox_env: 'py39-pydantic28-nocov' - os: 'ubuntu-latest' - - name: 'py39-pydantic210-nocov' - python: '3.9' - toxpython: 'python3.9' - python_arch: 'x64' - tox_env: 'py39-pydantic210-nocov' - os: 'ubuntu-latest' - name: 'py310-pydantic28-cover' python: '3.10' toxpython: 'python3.10' @@ -127,30 +91,6 @@ jobs: python_arch: 'x64' tox_env: 'py312-pydantic210-nocov' os: 'ubuntu-latest' - - name: 'pypy39-pydantic28-cover' - python: 'pypy-3.9' - toxpython: 'pypy3.9' - python_arch: 'x64' - tox_env: 'pypy39-pydantic28-cover' - os: 'ubuntu-latest' - - name: 'pypy39-pydantic210-cover' - python: 'pypy-3.9' - toxpython: 'pypy3.9' - python_arch: 'x64' - tox_env: 'pypy39-pydantic210-cover' - os: 'ubuntu-latest' - - name: 'pypy39-pydantic28-nocov' - python: 'pypy-3.9' - toxpython: 'pypy3.9' - python_arch: 'x64' - tox_env: 'pypy39-pydantic28-nocov' - os: 'ubuntu-latest' - - name: 'pypy39-pydantic210-nocov' - python: 'pypy-3.9' - toxpython: 'pypy3.9' - python_arch: 'x64' - tox_env: 'pypy39-pydantic210-nocov' - os: 'ubuntu-latest' - name: 'pypy310-pydantic28-cover' python: 'pypy-3.10' toxpython: 'pypy3.10' From 0536ca5909db682474ee3c1ecaa26591d33460fe Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 22 May 2025 00:07:05 -0700 Subject: [PATCH 26/26] =?UTF-8?q?Bump=20version:=200.0.18=20=E2=86=92=200.?= =?UTF-8?q?0.19?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- docs/conf.py | 2 +- setup.py | 2 +- src/datapilot/__init__.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 6ad930e..90cda22 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.0.18 +current_version = 0.0.19 commit = True tag = True diff --git a/docs/conf.py b/docs/conf.py index afea67e..6823468 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,7 +15,7 @@ year = "2024" author = "Altimate Inc." copyright = f"{year}, {author}" -version = release = "0.0.18" +version = release = "0.0.19" pygments_style = "trac" templates_path = ["."] diff --git a/setup.py b/setup.py index 190e49f..2da8546 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ def read(*names, **kwargs): setup( name="altimate-datapilot-cli", - version="0.0.18", + version="0.0.19", license="MIT", description="Assistant for Data Teams", long_description="{}\n{}".format( diff --git a/src/datapilot/__init__.py b/src/datapilot/__init__.py index f18e5d0..a11f0b4 100644 --- a/src/datapilot/__init__.py +++ b/src/datapilot/__init__.py @@ -1 +1 @@ -__version__ = "0.0.18" +__version__ = "0.0.19"