Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 186 additions & 18 deletions src/mcp_server_appwrite/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
import base64
import importlib
import inspect
import ipaddress
import json
import mimetypes
import os
import pkgutil
import re
import socket
import sys
from collections.abc import Mapping
from dataclasses import dataclass
Expand All @@ -18,7 +21,9 @@
from pathlib import Path
from types import UnionType
from typing import Any, Union, get_args, get_origin
from urllib.parse import unquote, urlsplit

import httpx
import mcp.server.stdio
import mcp.types as types
from appwrite.client import Client
Expand Down Expand Up @@ -331,42 +336,200 @@ def _coerce_enum(enum_type: type[Enum], value: Any, param_name: str) -> Any:
) from exc


# Upload behavior is configured once per server process at build time, since a given
# process serves exactly one transport.
# stdio: local filesystem paths are read directly; URL fetch also allowed.
# http : the server runs remotely with no access to the client's filesystem, so local
# paths are rejected with guidance; uploads come via URL fetch or inline bytes.
_UPLOAD_TRANSPORT: str = "stdio"

_MAX_FETCH_BYTES = 25 * 1024 * 1024 # 25 MB cap on server-fetched files
_MAX_INLINE_BYTES = 256 * 1024 # 256 KB cap on decoded inline content
_FETCH_TIMEOUT_SECONDS = 30.0
_FETCH_MAX_REDIRECTS = 5

_HOSTED_PATH_GUIDANCE = (
"The hosted Appwrite MCP server cannot read local file paths. For '{param}', pass a "
'public URL as {{"url": "https://..."}} (preferred), or a small file inline as '
'{{"filename": "...", "content": "<base64>", "encoding": "base64"}}.'
)


def _configure_uploads(transport: str) -> None:
"""Set the upload mode for this server process. Called once from build_mcp_server."""
global _UPLOAD_TRANSPORT
_UPLOAD_TRANSPORT = transport


def _validate_fetch_url(url: str) -> None:
"""Reject non-http(s) schemes and hosts that resolve to non-public addresses.

This is the SSRF guard for server-side URL fetches: it stops the model from making
the hosted server reach internal services, loopback, or the cloud metadata endpoint
(169.254.169.254). Note the resolve-then-reconnect DNS-rebinding gap is accepted.
"""
parts = urlsplit(url)
if parts.scheme not in ("http", "https"):
raise ValueError(
f"Unsupported URL scheme '{parts.scheme}' — only http and https are allowed."
)
host = parts.hostname
if not host:
raise ValueError("URL is missing a host.")

port = parts.port or (443 if parts.scheme == "https" else 80)
try:
infos = socket.getaddrinfo(host, port)
except socket.gaierror as exc:
raise ValueError(f"Could not resolve host '{host}'.") from exc

for info in infos:
ip = ipaddress.ip_address(info[4][0])
if (
ip.is_private
or ip.is_loopback
or ip.is_link_local
or ip.is_reserved
or ip.is_multicast
or ip.is_unspecified
):
raise ValueError(
"Refusing to fetch a URL that resolves to a private, loopback, or "
"link-local address."
)


def _derive_filename(resp: httpx.Response, url: str) -> str:
"""Best-effort filename from Content-Disposition, then the URL path, then a fallback."""
disposition = resp.headers.get("content-disposition", "")
match = re.search(
r"filename\*?=(?:[^']*'[^']*')?\"?([^\";]+)\"?", disposition, re.IGNORECASE
)
candidate = ""
if match:
candidate = unquote(match.group(1)).strip().strip('"')
if not candidate:
segment = urlsplit(url).path.rstrip("/").rsplit("/", 1)[-1]
candidate = unquote(segment).strip()
# Sanitize to a bare filename — strip any directory components or traversal.
candidate = candidate.replace("\\", "/").rsplit("/", 1)[-1]
if candidate in ("", ".", ".."):
mime_type = (resp.headers.get("content-type") or "").split(";")[0].strip()
extension = mimetypes.guess_extension(mime_type) if mime_type else None
candidate = f"upload{extension}" if extension else "upload"
return candidate


def _fetch_input_file(url: str, param_name: str) -> InputFile:
"""Download a public URL (SSRF-guarded, size-capped) into an in-memory InputFile."""
_validate_fetch_url(url)
try:
with httpx.Client(
timeout=_FETCH_TIMEOUT_SECONDS,
follow_redirects=True,
max_redirects=_FETCH_MAX_REDIRECTS,
limits=httpx.Limits(max_connections=1),
) as client:
with client.stream("GET", url) as resp:
resp.raise_for_status()
# The final URL after redirects must also be public.
_validate_fetch_url(str(resp.url))
Comment on lines +427 to +436

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 security Post-redirect SSRF validation fires too late

client.stream("GET", url) with follow_redirects=True means httpx follows every redirect internally and opens a TCP connection to the final destination before the context manager body runs. By the time _validate_fetch_url(str(resp.url)) is called on line 436, the HTTP GET request to the final URL (which may be http://169.254.169.254/latest/meta-data/ or any private IP) has already been sent. The check blocks reading the response body but does not prevent the network request from crossing the trust boundary.

A redirect chain like https://legit.example.com/file → http://10.0.0.1/internal would bypass the pre-request validation entirely. The simplest fix is to disable automatic redirect following and either error on any redirect or validate each Location header before following manually.


declared = resp.headers.get("content-length")
if declared is not None and declared.isdigit():
Comment on lines +438 to +439

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Content-Length skip when value contains a sign or spaces

declared.isdigit() returns False for values like " 1024", "+1024", or the rarely-seen negative sentinel -1, so those bypass the early-exit check. In practice a server lying about Content-Length would still be caught by the streaming cap on line 450, but the declared-size check silently skips any non-digit string and might miss a large file until the whole body has been partially buffered.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

if int(declared) > _MAX_FETCH_BYTES:
raise ValueError(
f"File at URL for '{param_name}' is too large "
f"({declared} bytes); max is {_MAX_FETCH_BYTES} bytes."
)

chunks: list[bytes] = []
total = 0
for chunk in resp.iter_bytes():
total += len(chunk)
if total > _MAX_FETCH_BYTES:
raise ValueError(
f"File at URL for '{param_name}' exceeds the max of "
f"{_MAX_FETCH_BYTES} bytes."
)
chunks.append(chunk)

data = b"".join(chunks)
mime_type = (
(resp.headers.get("content-type") or "").split(";")[0].strip()
)
filename = _derive_filename(resp, url)
except httpx.HTTPError as exc:
raise ValueError(
f"Failed to fetch file from URL for '{param_name}': {exc}"
) from exc

return InputFile.from_bytes(data, filename, mime_type or None)


def _coerce_inline_content(value: Mapping, param_name: str) -> InputFile:
filename = value.get("filename")
content = value.get("content")
encoding = str(value.get("encoding", "utf-8")).lower()
if encoding == "base64":
try:
data = base64.b64decode(content)
except Exception as exc:
raise ValueError(f"Invalid base64 content for '{param_name}'.") from exc
elif encoding == "utf-8":
data = str(content).encode("utf-8")
else:
raise ValueError(
f"Invalid encoding for '{param_name}'. Expected 'utf-8' or 'base64'."
)

if len(data) > _MAX_INLINE_BYTES:
raise ValueError(
f"Inline content for '{param_name}' is too large "
f"({len(data)} bytes, max {_MAX_INLINE_BYTES}). For larger files pass "
'{"url": "https://..."} so the server can download it directly.'
)

return InputFile.from_bytes(data, str(filename), value.get("mime_type"))


def _coerce_path(path: str, param_name: str) -> InputFile:
if _UPLOAD_TRANSPORT != "stdio":
raise ValueError(_HOSTED_PATH_GUIDANCE.format(param=param_name))
return InputFile.from_path(path)


def _coerce_input_file(value: Any, param_name: str) -> InputFile:
if isinstance(value, InputFile):
return value

if isinstance(value, str):
return InputFile.from_path(value)
if urlsplit(value).scheme in ("http", "https"):
return _fetch_input_file(value, param_name)
return _coerce_path(value, param_name)

if not isinstance(value, Mapping):
raise ValueError(
f"Invalid value for '{param_name}'. Provide a file path string or an object with `path` or `filename` and `content`."
f"Invalid value for '{param_name}'. Provide a public URL string, a `url`, or "
"an object with `filename` and `content`."
)

url = value.get("url")
if url:
return _fetch_input_file(str(url), param_name)

path = value.get("path")
if path:
return InputFile.from_path(str(path))
return _coerce_path(str(path), param_name)

filename = value.get("filename")
content = value.get("content")
if filename and content is not None:
encoding = str(value.get("encoding", "utf-8")).lower()
if encoding == "base64":
try:
data = base64.b64decode(content)
except Exception as exc:
raise ValueError(f"Invalid base64 content for '{param_name}'.") from exc
elif encoding == "utf-8":
data = str(content).encode("utf-8")
else:
raise ValueError(
f"Invalid encoding for '{param_name}'. Expected 'utf-8' or 'base64'."
)

return InputFile.from_bytes(data, str(filename), value.get("mime_type"))
return _coerce_inline_content(value, param_name)

raise ValueError(
f"Invalid value for '{param_name}'. Provide `path`, or both `filename` and `content`."
f"Invalid value for '{param_name}'. Provide `url`, or both `filename` and "
"`content`."
)


Expand Down Expand Up @@ -680,11 +843,16 @@ def build_instructions(transport: str = "http") -> str:
"target project: use appwrite_get_context first, then pass the selected "
"project id as project_id to appwrite_call_tool. "
"Organization-scoped console tools (e.g. creating a project) need organization_id. "
"File/image uploads: pass a public URL as the file argument (e.g. "
'{"url": "https://..."}) so the server downloads it directly; for very small '
'files you may pass inline base64 ({"filename": ..., "content": ..., '
'"encoding": "base64"}). '
f"{common}"
)


def build_mcp_server(operator: Operator, *, transport: str = "http") -> Server:
_configure_uploads(transport)
instructions = build_instructions(transport)

server = Server("Appwrite MCP Server", instructions=instructions)
Expand Down
8 changes: 6 additions & 2 deletions src/mcp_server_appwrite/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,18 @@ def _input_file_schema(self) -> dict:
"oneOf": [
{
"type": "string",
"description": "Path to a local file on the machine running the MCP server.",
"description": "A public http(s) URL the server downloads the file from, or a local file path (paths only work when the server runs locally).",
},
{
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Public http(s) URL the server downloads the file from. Preferred for images and any non-trivial file on the hosted server.",
},
"path": {
"type": "string",
"description": "Path to a local file on the machine running the MCP server.",
"description": "Path to a local file. Only works when the server runs locally; on the hosted server use `url` instead.",
},
"filename": {
"type": "string",
Expand Down
Loading
Loading