From e19a89d53f91f31b757dfd037e31f2c2bd044990 Mon Sep 17 00:00:00 2001 From: Lasim Date: Sun, 12 Oct 2025 13:08:12 +0200 Subject: [PATCH 01/11] docs: add background jobs system documentation and integrate into architecture overview --- .../development/frontend/ui-design-charts.mdx | 197 ++++++ docs/development/satellite/architecture.mdx | 4 + .../development/satellite/background-jobs.mdx | 593 ++++++++++++++++++ docs/development/satellite/index.mdx | 11 +- 4 files changed, 804 insertions(+), 1 deletion(-) create mode 100644 docs/development/frontend/ui-design-charts.mdx create mode 100644 docs/development/satellite/background-jobs.mdx diff --git a/docs/development/frontend/ui-design-charts.mdx b/docs/development/frontend/ui-design-charts.mdx new file mode 100644 index 0000000..10c0922 --- /dev/null +++ b/docs/development/frontend/ui-design-charts.mdx @@ -0,0 +1,197 @@ +--- +title: Charts with Vue ECharts +description: Guide to using Apache ECharts for data visualization in DeployStack frontend +sidebar: Charts +--- + +# Charts with Vue ECharts + +DeployStack uses [vue-echarts](https://github.com/ecomfe/vue-echarts) for data visualization, providing powerful and performant charts with Apache ECharts integration for Vue 3. + +## Chart Components + +The chart components are available in `/components/ui/chart/` following the shadcn/vue pattern with CVA variants. + +### LineChart Component + +The `LineChart` component provides a simplified API for time-series data visualization, perfect for displaying metrics like HTTP calls, user activity, or any trend data. + +```vue + + + +``` + +### LineChart Props + +| Prop | Type | Default | Description | +|------|------|---------|-------------| +| `data` | `number[]` | required | Array of data points | +| `labels` | `string[]` | required | Array of x-axis labels | +| `name` | `string` | `'Data'` | Series name for tooltip | +| `smooth` | `boolean` | `true` | Enable smooth line interpolation | +| `showArea` | `boolean` | `true` | Show area fill under line | +| `color` | `string` | `'#0f766e'` | Line color (DeployStack teal) | +| `areaColor` | `string` | `'rgba(15, 118, 110, 0.3)'` | Area gradient color | +| `size` | `'sm' \| 'md' \| 'lg' \| 'xl'` | `'md'` | Chart height | +| `loading` | `boolean` | `false` | Show loading state | +| `autoresize` | `boolean` | `true` | Auto-resize with container | + +### Size Variants + +```vue + + + + + + + + + + + +``` + +### Custom Styling + +```html + + + + + + + + +``` + +## Advanced Usage: Base Chart Component + +For full control over chart configuration, use the base `Chart` component with custom ECharts options: + +```html + + + +``` + +## Best Practices + +1. **Use LineChart for simple cases** - The simplified API reduces boilerplate +2. **Use Chart for complex visualizations** - When you need full ECharts control +3. **Set explicit height** - Always use size prop for consistent layouts +4. **Enable autoresize** - Charts automatically adapt to container size changes +5. **Handle loading states** - Use the `loading` prop for better UX +6. **Use CanvasRenderer** - Better performance for most use cases + +## Resources + +- [GitHub Repository](https://github.com/ecomfe/vue-echarts) +- [Apache ECharts Documentation](https://echarts.apache.org/) +- [Chart Examples](https://echarts.apache.org/examples/) diff --git a/docs/development/satellite/architecture.mdx b/docs/development/satellite/architecture.mdx index 9237bb4..9fec0d9 100644 --- a/docs/development/satellite/architecture.mdx +++ b/docs/development/satellite/architecture.mdx @@ -22,6 +22,7 @@ Satellites operate as edge workers similar to GitHub Actions runners, providing: - **OAuth 2.1 Resource Server**: Token introspection with Backend (implemented) - **Backend Polling Communication**: Outbound-only, firewall-friendly (implemented) - **Process Lifecycle Management**: Spawn, monitor, terminate MCP servers (ready for implementation) +- **Background Jobs System**: Cron-like recurring tasks with automatic error handling (implemented) ## Current Implementation Architecture @@ -474,3 +475,6 @@ The satellite service has completed **Phase 1: MCP Transport Implementation** an - **Logging System**: Pino with structured logging - **Build Pipeline**: TypeScript compilation and bundling - **Development Workflow**: Hot reload and code quality tools +- **Background Jobs System**: Cron-like job management for recurring tasks + +For details on the background jobs system, see [Background Jobs System](/development/satellite/background-jobs). diff --git a/docs/development/satellite/background-jobs.mdx b/docs/development/satellite/background-jobs.mdx new file mode 100644 index 0000000..47a58be --- /dev/null +++ b/docs/development/satellite/background-jobs.mdx @@ -0,0 +1,593 @@ +--- +title: Background Jobs System +description: Cron-like job system for managing recurring background tasks in DeployStack Satellite with automatic error handling and monitoring. +sidebar: Satellite Development +--- + +import { Callout } from 'fumadocs-ui/components/callout'; + +# Background Jobs System + +DeployStack Satellite implements a centralized job management system for recurring background tasks. The system provides a consistent pattern for cron-like operations with automatic error handling, execution metrics, and lifecycle management. + +## Architecture Overview + +The job system consists of three core components: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Job System Architecture │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ BaseJob │ │ JobManager │ │ Concrete Job │ │ +│ │ │ │ │ │ │ │ +│ │ • Interval │◄───│ • Registry │◄───│ HeartbeatJob │ │ +│ │ • Execute │ │ • Lifecycle │ │ CleanupJob │ │ +│ │ • Metrics │ │ • Monitoring │ │ CustomJob │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### BaseJob Abstract Class + +All jobs extend `BaseJob`, which provides: + +- **Automatic Interval Execution**: Jobs run on configured intervals +- **Immediate First Run**: Execute immediately on start, then follow interval +- **Error Handling**: Automatic error catching with structured logging +- **Execution Metrics**: Track execution count, timing, and errors +- **Lifecycle Management**: Start/stop methods with state tracking + +### JobManager + +The `JobManager` provides centralized control: + +- **Job Registry**: Register and track all jobs +- **Lifecycle Control**: Start/stop all jobs or individual jobs +- **Status Monitoring**: Query job statistics and execution state +- **Graceful Shutdown**: Stop all jobs cleanly on satellite shutdown + +## Current Jobs + +| Job Name | Interval | Purpose | Status | +|----------|----------|---------|--------| +| `heartbeat` | 30 seconds | Send status updates to backend | ✅ Active | +| `cleanup` | 5 minutes | Template for new jobs | 📝 Example | + +## Creating a New Job + +Add a new background job in three steps: + +### Step 1: Create Job File + +Create `src/jobs/process-health-job.ts`: + +```typescript +import { BaseJob } from './base-job'; +import { FastifyBaseLogger } from 'fastify'; + +export class ProcessHealthJob extends BaseJob { + constructor(logger: FastifyBaseLogger) { + super('process-health', 120000, logger); // 2 minutes + } + + protected async execute(): Promise { + this.logger.info({ + operation: 'process_health_check' + }, 'Checking process health...'); + + // Your job logic here + + this.logger.info({ + operation: 'process_health_complete' + }, 'Health check completed'); + } +} +``` + +### Step 2: Export from Index + +Add to `src/jobs/index.ts`: + +```typescript +export { ProcessHealthJob } from './process-health-job'; +``` + +### Step 3: Register in Server + +Add to `src/server.ts`: + +```typescript +import { JobManager, HeartbeatJob, CleanupJob, ProcessHealthJob } from './jobs'; + +const jobManager = new JobManager(server.log); +jobManager.register(new HeartbeatJob(heartbeatService)); +jobManager.register(new CleanupJob(server.log)); +jobManager.register(new ProcessHealthJob(server.log)); + +await jobManager.startAll(); +``` + + +That's it! Your job will start running immediately and then execute every 2 minutes automatically. + + +## Job Intervals + +Common interval values in milliseconds: + +```typescript +// Seconds +30 * 1000 // 30 seconds +60 * 1000 // 1 minute + +// Minutes +2 * 60 * 1000 // 2 minutes +5 * 60 * 1000 // 5 minutes +10 * 60 * 1000 // 10 minutes +15 * 60 * 1000 // 15 minutes +30 * 60 * 1000 // 30 minutes + +// Hours +60 * 60 * 1000 // 1 hour +6 * 60 * 60 * 1000 // 6 hours +24 * 60 * 60 * 1000 // 24 hours +``` + +### Environment-Configurable Intervals + +Make job intervals configurable: + +```typescript +export class MyJob extends BaseJob { + constructor(logger: FastifyBaseLogger) { + const interval = parseInt( + process.env.MY_JOB_INTERVAL || '300000', + 10 + ); + super('my-job', interval, logger); + } +} +``` + +Add to `.env.example`: +```bash +# My Job interval in milliseconds (default: 5 minutes) +MY_JOB_INTERVAL=300000 +``` + +## Jobs with Dependencies + +If your job needs access to services, inject them via constructor: + +```typescript +export class ProcessHealthJob extends BaseJob { + constructor( + logger: FastifyBaseLogger, + private processManager: ProcessManager, + private runtimeState: RuntimeState + ) { + super('process-health', 120000, logger); + } + + protected async execute(): Promise { + const processes = this.processManager.getAllProcesses(); + + for (const proc of processes) { + if (proc.errorCount > 10) { + this.logger.warn({ + process_id: proc.config.installation_id, + error_count: proc.errorCount + }, 'Process has high error count'); + } + } + } +} +``` + +Register with dependencies: + +```typescript +jobManager.register( + new ProcessHealthJob(server.log, processManager, runtimeState) +); +``` + +## Job Lifecycle + +### Initialization Flow + +``` +Satellite Startup + │ + ├── Register Satellite with Backend + │ + ├── Initialize Services + │ + ├── Create JobManager + │ + ├── Register Jobs + │ ├── new HeartbeatJob(heartbeatService) + │ ├── new CleanupJob(logger) + │ └── new CustomJob(logger) + │ + ├── jobManager.startAll() + │ ├── Start Job 1 → Execute immediately → Set interval + │ ├── Start Job 2 → Execute immediately → Set interval + │ └── Start Job 3 → Execute immediately → Set interval + │ + └── Satellite Ready +``` + +### Job Execution Flow + +``` +Job Start + │ + ├── Execute Immediately (first run) + │ ├── Log: job_execute_start + │ ├── Run execute() method + │ ├── Track execution time + │ ├── Log: job_execute_success + │ └── Update metrics + │ + ├── Wait for Interval + │ + └── Execute on Interval (repeating) + ├── Log: job_execute_start + ├── Run execute() method + ├── Handle errors (if any) + ├── Log: job_execute_success or job_execute_error + └── Update metrics → Repeat +``` + +## Monitoring and Observability + +### Structured Logging + +All job events are logged with structured data: + +```typescript +// Job started +{ + "operation": "job_start", + "job_name": "process-health", + "interval_ms": 120000, + "interval_seconds": 120 +} + +// Job executing +{ + "operation": "job_execute_start", + "job_name": "process-health", + "execution_number": 5 +} + +// Job completed +{ + "operation": "job_execute_success", + "job_name": "process-health", + "execution_number": 5, + "execution_time_ms": 234 +} + +// Job error +{ + "operation": "job_execute_error", + "job_name": "process-health", + "execution_number": 5, + "error_count": 2, + "error": "Connection timeout" +} +``` + +### Job Statistics + +Query job statistics via JobManager: + +```typescript +const stats = jobManager.getStats('process-health'); +// Returns: +{ + executionCount: 42, + errorCount: 2, + averageExecutionTime: 234, + isRunning: true +} +``` + +Get all job statistics: + +```typescript +const allStats = jobManager.getAllStats(); +// Returns array of all job statistics +``` + +## Error Handling + +### Automatic Error Recovery + +The `BaseJob` class automatically handles errors: + +```typescript +protected async execute(): Promise { + // If this throws, BaseJob catches it + await someOperationThatMightFail(); + + // Job continues running on next interval +} +``` + +### Custom Error Handling + +Add custom error handling for specific scenarios: + +```typescript +protected async execute(): Promise { + try { + await this.criticalOperation(); + } catch (error) { + this.logger.error({ error }, 'Critical operation failed'); + // Don't throw - let BaseJob track the error + } +} +``` + +### Timeout Protection + +Add timeouts for long-running operations: + +```typescript +protected async execute(): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 60000); + + try { + await this.longOperation({ signal: controller.signal }); + } finally { + clearTimeout(timeout); + } +} +``` + +## Best Practices + +### 1. Keep Jobs Focused + +Each job should have a single responsibility: + +**Good:** +```typescript +export class SessionCleanupJob extends BaseJob { + protected async execute(): Promise { + await this.cleanupExpiredSessions(); + } +} +``` + +**Bad:** +```typescript +export class MaintenanceJob extends BaseJob { + protected async execute(): Promise { + await this.cleanupSessions(); + await this.checkProcessHealth(); + await this.rotateBlogs(); + await this.updateMetrics(); + } +} +``` + +### 2. Choose Appropriate Intervals + +- **High-frequency (30s-1m)**: Health checks, critical monitoring +- **Medium (5m-15m)**: Cleanup tasks, periodic updates +- **Low (1h+)**: Reports, analytics, maintenance + +### 3. Document Job Purpose + +Add clear comments explaining what the job does: + +```typescript +/** + * Process Health Check Job + * + * Monitors all running MCP server processes and restarts unhealthy ones. + * Runs every 2 minutes to ensure quick failure detection. + * + * Checks: + * - Process still running + * - Error count within limits + * - Response time acceptable + * - Memory usage not excessive + */ +export class ProcessHealthJob extends BaseJob { + // ... +} +``` + +### 4. Use Structured Logging + +Always log with operation context: + +```typescript +protected async execute(): Promise { + this.logger.info({ + operation: 'cleanup_start', + session_count: sessions.length + }, 'Starting session cleanup...'); + + // ... cleanup logic ... + + this.logger.info({ + operation: 'cleanup_complete', + removed_count: removed + }, 'Session cleanup completed'); +} +``` + +## Common Job Patterns + +### Health Check Pattern + +```typescript +export class HealthCheckJob extends BaseJob { + constructor( + logger: FastifyBaseLogger, + private service: ServiceToMonitor + ) { + super('health-check', 120000, logger); + } + + protected async execute(): Promise { + const isHealthy = await this.service.checkHealth(); + + if (!isHealthy) { + this.logger.warn({ + operation: 'health_check_failed', + service: 'my-service' + }, 'Service health check failed'); + + await this.service.restart(); + } + } +} +``` + +### Cleanup Pattern + +```typescript +export class CleanupJob extends BaseJob { + constructor( + logger: FastifyBaseLogger, + private manager: ResourceManager + ) { + super('cleanup', 900000, logger); // 15 minutes + } + + protected async execute(): Promise { + const expired = await this.manager.findExpired(); + + for (const resource of expired) { + await this.manager.cleanup(resource); + } + + this.logger.info({ + operation: 'cleanup_complete', + count: expired.length + }, 'Cleanup completed'); + } +} +``` + +### Metrics Collection Pattern + +```typescript +export class MetricsJob extends BaseJob { + constructor( + logger: FastifyBaseLogger, + private collector: MetricsCollector + ) { + super('metrics', 300000, logger); // 5 minutes + } + + protected async execute(): Promise { + const metrics = await this.collector.collect(); + await this.collector.report(metrics); + } +} +``` + +## Troubleshooting + +### Job Not Starting + +Check if the job is registered: + +```bash +# Look for job_start logs +grep "job_start" satellite.log | grep "my-job" +``` + +Verify registration in code: + +```typescript +const jobs = jobManager.getRegisteredJobs(); +console.log(jobs); // Should include your job name +``` + +### Job Failing Repeatedly + +Check error logs: + +```bash +# Find job errors +grep "job_execute_error" satellite.log | grep "my-job" +``` + +Review error count in statistics: + +```typescript +const stats = jobManager.getStats('my-job'); +console.log(`Error count: ${stats.errorCount}`); +``` + +### Performance Issues + +Monitor execution time: + +```bash +# Check execution times +grep "job_execute_success" satellite.log | grep "my-job" +``` + +If execution time approaches interval: +- Increase the interval +- Optimize job logic +- Consider breaking into smaller jobs + +### Job Not Executing on Time + +Verify interval configuration: + +```typescript +// Log interval on job creation +this.logger.info({ + job_name: 'my-job', + interval_ms: this.intervalMs, + interval_seconds: this.intervalMs / 1000 +}, 'Job interval configured'); +``` + +Check system clock drift if timing is critical. + +## Future Enhancements + +Planned improvements to the job system: + +- Job dependencies (Job B waits for Job A completion) +- Conditional execution (skip job if condition not met) +- Job state persistence (resume after satellite restart) +- Distributed coordination (multi-satellite job scheduling) +- Retry logic with exponential backoff +- Dynamic interval adjustment based on load +- Prometheus metrics export +- Web UI for job management + +## Implementation Status + +**Current Features:** +- ✅ BaseJob abstract class with interval management +- ✅ JobManager for centralized control +- ✅ Automatic error handling and logging +- ✅ Execution metrics tracking +- ✅ HeartbeatJob integration +- ✅ Template job for reference + +**In Development:** +- 🚧 Job priority levels +- 🚧 Job status API endpoint +- 🚧 Advanced monitoring features + + +The job system is production-ready and actively used for the heartbeat service. The pattern has proven stable and is ready for additional jobs. + diff --git a/docs/development/satellite/index.mdx b/docs/development/satellite/index.mdx index 290feab..7f8863e 100644 --- a/docs/development/satellite/index.mdx +++ b/docs/development/satellite/index.mdx @@ -5,7 +5,7 @@ sidebar: Getting Started --- import { Card, Cards } from 'fumadocs-ui/components/card'; -import { Cloud, Shield, Plug, Settings, Network, TestTube, Wrench, BookOpen, Terminal, Users } from 'lucide-react'; +import { Cloud, Shield, Plug, Settings, Network, TestTube, Wrench, BookOpen, Terminal, Users, Timer } from 'lucide-react'; # DeployStack Satellite Development @@ -28,6 +28,7 @@ DeployStack Satellites are **edge workers** (similar to GitHub Actions runners) - ✅ **Team Isolation** (environment-based: nsjail in production, plain spawn in dev) - ✅ **Auto-Restart Protection** (max 3 attempts, permanently_failed status) - ✅ **Tool Discovery** (HTTP and stdio MCP servers) +- ✅ **Background Jobs System** (cron-like recurring tasks with automatic error handling) ## Architecture Vision @@ -195,6 +196,14 @@ curl -X POST http://localhost:3001/mcp \ > Satellite deployment patterns, monitoring, scaling, and operational considerations. + + } + href="/development/satellite/background-jobs" + title="Background Jobs" + > + Cron-like job system for recurring tasks with automatic error handling and monitoring. + ## Current Features From 1e9f33de7ac234c60b9c79be754ab1fb854d6fa0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 14:46:29 +0000 Subject: [PATCH 02/11] build(deps): bump fumadocs-ui from 15.7.7 to 15.8.5 Bumps [fumadocs-ui](https://github.com/fuma-nama/fumadocs) from 15.7.7 to 15.8.5. - [Release notes](https://github.com/fuma-nama/fumadocs/releases) - [Commits](https://github.com/fuma-nama/fumadocs/commits) --- updated-dependencies: - dependency-name: fumadocs-ui dependency-version: 15.8.5 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- package-lock.json | 67 +++++++++++++++++++++++++++++++---------------- package.json | 2 +- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/package-lock.json b/package-lock.json index 1d73ede..a778182 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,7 @@ "@types/mdx": "^2.0.13", "fumadocs-core": "^15.8.1", "fumadocs-mdx": "^12.0.2", - "fumadocs-ui": "^15.7.7", + "fumadocs-ui": "^15.8.5", "lucide-react": "^0.544.0", "mdx": "^0.3.1", "next": "^15.5.4", @@ -640,9 +640,9 @@ "license": "MIT" }, "node_modules/@formatjs/intl-localematcher": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/@formatjs/intl-localematcher/-/intl-localematcher-0.6.1.tgz", - "integrity": "sha512-ePEgLgVCqi2BBFnTMWPfIghu6FkbZnnBVhO2sSxvLfrdFw7wCHAHiDoM2h4NRgjbaY7+B7HgOLZGkK187pZTZg==", + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/@formatjs/intl-localematcher/-/intl-localematcher-0.6.2.tgz", + "integrity": "sha512-XOMO2Hupl0wdd172Y06h6kLpBz6Dv+J4okPLl4LPtzbr8f66WbIoy4ev98EBuZ6ZK4h5ydTN6XneT4QVpD7cdA==", "license": "MIT", "dependencies": { "tslib": "^2.8.0" @@ -5110,9 +5110,9 @@ } }, "node_modules/fumadocs-ui": { - "version": "15.7.7", - "resolved": "https://registry.npmjs.org/fumadocs-ui/-/fumadocs-ui-15.7.7.tgz", - "integrity": "sha512-nXeEnFI0h+JAbwWsKWcc6aBuR++jWlxhMpXQnPv4zbrrbds436lilrOu/xh5KxPiEe2M9HspKMN+Oee73jHQFw==", + "version": "15.8.5", + "resolved": "https://registry.npmjs.org/fumadocs-ui/-/fumadocs-ui-15.8.5.tgz", + "integrity": "sha512-9pyB+9rOOsrFnmmZ9xREp/OgVhyaSq2ocEpqTNbeQ7tlJ6JWbdFWfW0C9lRXprQEB6DJWUDtDxqKS5QXLH0EGA==", "license": "MIT", "dependencies": { "@radix-ui/react-accordion": "^1.2.12", @@ -5126,11 +5126,11 @@ "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-tabs": "^1.1.13", "class-variance-authority": "^0.7.1", - "fumadocs-core": "15.7.7", + "fumadocs-core": "15.8.5", "lodash.merge": "^4.6.2", "next-themes": "^0.4.6", "postcss-selector-parser": "^7.1.0", - "react-medium-image-zoom": "^5.3.0", + "react-medium-image-zoom": "^5.4.0", "scroll-into-view-if-needed": "^3.1.0", "tailwind-merge": "^3.3.1" }, @@ -5154,38 +5154,42 @@ } }, "node_modules/fumadocs-ui/node_modules/fumadocs-core": { - "version": "15.7.7", - "resolved": "https://registry.npmjs.org/fumadocs-core/-/fumadocs-core-15.7.7.tgz", - "integrity": "sha512-4mo8y1L2VV9TcrQ1gses3c5zzCaPwDPYjfrPET4Qf+m7GPOqZ7wiUeXMTYb98T+N5wS0G/fsr/xFPZkgwD44gQ==", + "version": "15.8.5", + "resolved": "https://registry.npmjs.org/fumadocs-core/-/fumadocs-core-15.8.5.tgz", + "integrity": "sha512-hyJtKGuB2J/5y7tDfI1EnGMKlNbSXM5N5cpwvgCY0DcBJwFMDG/GpSpaVRzh3aWy67pAYDZFIwdtbKXBa/q5bg==", "license": "MIT", "dependencies": { - "@formatjs/intl-localematcher": "^0.6.1", - "@orama/orama": "^3.1.12", - "@shikijs/rehype": "^3.12.0", - "@shikijs/transformers": "^3.12.0", + "@formatjs/intl-localematcher": "^0.6.2", + "@orama/orama": "^3.1.14", + "@shikijs/rehype": "^3.13.0", + "@shikijs/transformers": "^3.13.0", "github-slugger": "^2.0.0", "hast-util-to-estree": "^3.1.3", "hast-util-to-jsx-runtime": "^2.3.6", "image-size": "^2.0.2", "negotiator": "^1.0.0", "npm-to-yarn": "^3.0.1", + "path-to-regexp": "^8.3.0", "react-remove-scroll": "^2.7.1", - "remark": "^15.0.0", + "remark": "^15.0.1", "remark-gfm": "^4.0.1", "remark-rehype": "^11.1.2", "scroll-into-view-if-needed": "^3.1.0", - "shiki": "^3.12.0", + "shiki": "^3.13.0", "unist-util-visit": "^5.0.0" }, "peerDependencies": { "@mixedbread/sdk": "^0.19.0", "@oramacloud/client": "1.x.x || 2.x.x", + "@tanstack/react-router": "1.x.x", "@types/react": "*", "algoliasearch": "5.x.x", + "lucide-react": "*", "next": "14.x.x || 15.x.x", "react": "18.x.x || 19.x.x", "react-dom": "18.x.x || 19.x.x", - "react-router": "7.x.x" + "react-router": "7.x.x", + "waku": "^0.26.0" }, "peerDependenciesMeta": { "@mixedbread/sdk": { @@ -5194,12 +5198,18 @@ "@oramacloud/client": { "optional": true }, + "@tanstack/react-router": { + "optional": true + }, "@types/react": { "optional": true }, "algoliasearch": { "optional": true }, + "lucide-react": { + "optional": true + }, "next": { "optional": true }, @@ -5211,6 +5221,9 @@ }, "react-router": { "optional": true + }, + "waku": { + "optional": true } } }, @@ -11373,6 +11386,16 @@ "node": "20 || >=22" } }, + "node_modules/path-to-regexp": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", + "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/path-type": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", @@ -11629,9 +11652,9 @@ } }, "node_modules/react-medium-image-zoom": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/react-medium-image-zoom/-/react-medium-image-zoom-5.3.0.tgz", - "integrity": "sha512-RCIzVlsKqy3BYgGgYbolUfuvx0aSKC7YhX/IJGEp+WJxsqdIVYJHkBdj++FAj6VD7RiWj6VVmdCfa/9vJE9hZg==", + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/react-medium-image-zoom/-/react-medium-image-zoom-5.4.0.tgz", + "integrity": "sha512-BsE+EnFVQzFIlyuuQrZ9iTwyKpKkqdFZV1ImEQN573QPqGrIUuNni7aF+sZwDcxlsuOMayCr6oO/PZR/yJnbRg==", "funding": [ { "type": "github", diff --git a/package.json b/package.json index d59aa2a..45a22a0 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,7 @@ "@types/mdx": "^2.0.13", "fumadocs-core": "^15.8.1", "fumadocs-mdx": "^12.0.2", - "fumadocs-ui": "^15.7.7", + "fumadocs-ui": "^15.8.5", "lucide-react": "^0.544.0", "mdx": "^0.3.1", "next": "^15.5.4", From 3d1b560b40cf5d91a4637cda80a51afeac81d35d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 14:48:33 +0000 Subject: [PATCH 03/11] build(deps): bump fumadocs-core from 15.8.1 to 15.8.5 Bumps [fumadocs-core](https://github.com/fuma-nama/fumadocs) from 15.8.1 to 15.8.5. - [Release notes](https://github.com/fuma-nama/fumadocs/releases) - [Commits](https://github.com/fuma-nama/fumadocs/commits) --- updated-dependencies: - dependency-name: fumadocs-core dependency-version: 15.8.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- package-lock.json | 33 ++++++++++++++++++++++++--------- package.json | 2 +- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/package-lock.json b/package-lock.json index 1d73ede..d686319 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,7 @@ "hasInstallScript": true, "dependencies": { "@types/mdx": "^2.0.13", - "fumadocs-core": "^15.8.1", + "fumadocs-core": "^15.8.5", "fumadocs-mdx": "^12.0.2", "fumadocs-ui": "^15.7.7", "lucide-react": "^0.544.0", @@ -640,9 +640,9 @@ "license": "MIT" }, "node_modules/@formatjs/intl-localematcher": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/@formatjs/intl-localematcher/-/intl-localematcher-0.6.1.tgz", - "integrity": "sha512-ePEgLgVCqi2BBFnTMWPfIghu6FkbZnnBVhO2sSxvLfrdFw7wCHAHiDoM2h4NRgjbaY7+B7HgOLZGkK187pZTZg==", + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/@formatjs/intl-localematcher/-/intl-localematcher-0.6.2.tgz", + "integrity": "sha512-XOMO2Hupl0wdd172Y06h6kLpBz6Dv+J4okPLl4LPtzbr8f66WbIoy4ev98EBuZ6ZK4h5ydTN6XneT4QVpD7cdA==", "license": "MIT", "dependencies": { "tslib": "^2.8.0" @@ -4984,12 +4984,12 @@ } }, "node_modules/fumadocs-core": { - "version": "15.8.1", - "resolved": "https://registry.npmjs.org/fumadocs-core/-/fumadocs-core-15.8.1.tgz", - "integrity": "sha512-3NBM2U3QlnDr4AwfDCLFaNjRGOj52g3geHSnwC9hU2en34xROe7/I8FI1eLkX68ppGnhSQYm/rIuMAPzvepnsg==", + "version": "15.8.5", + "resolved": "https://registry.npmjs.org/fumadocs-core/-/fumadocs-core-15.8.5.tgz", + "integrity": "sha512-hyJtKGuB2J/5y7tDfI1EnGMKlNbSXM5N5cpwvgCY0DcBJwFMDG/GpSpaVRzh3aWy67pAYDZFIwdtbKXBa/q5bg==", "license": "MIT", "dependencies": { - "@formatjs/intl-localematcher": "^0.6.1", + "@formatjs/intl-localematcher": "^0.6.2", "@orama/orama": "^3.1.14", "@shikijs/rehype": "^3.13.0", "@shikijs/transformers": "^3.13.0", @@ -4999,8 +4999,9 @@ "image-size": "^2.0.2", "negotiator": "^1.0.0", "npm-to-yarn": "^3.0.1", + "path-to-regexp": "^8.3.0", "react-remove-scroll": "^2.7.1", - "remark": "^15.0.0", + "remark": "^15.0.1", "remark-gfm": "^4.0.1", "remark-rehype": "^11.1.2", "scroll-into-view-if-needed": "^3.1.0", @@ -5013,6 +5014,7 @@ "@tanstack/react-router": "1.x.x", "@types/react": "*", "algoliasearch": "5.x.x", + "lucide-react": "*", "next": "14.x.x || 15.x.x", "react": "18.x.x || 19.x.x", "react-dom": "18.x.x || 19.x.x", @@ -5035,6 +5037,9 @@ "algoliasearch": { "optional": true }, + "lucide-react": { + "optional": true + }, "next": { "optional": true }, @@ -11373,6 +11378,16 @@ "node": "20 || >=22" } }, + "node_modules/path-to-regexp": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", + "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/path-type": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", diff --git a/package.json b/package.json index d59aa2a..c12a556 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ }, "dependencies": { "@types/mdx": "^2.0.13", - "fumadocs-core": "^15.8.1", + "fumadocs-core": "^15.8.5", "fumadocs-mdx": "^12.0.2", "fumadocs-ui": "^15.7.7", "lucide-react": "^0.544.0", From 11d88ed8e1128df0db3c12494833777c85b27294 Mon Sep 17 00:00:00 2001 From: Lasim Date: Mon, 13 Oct 2025 21:24:46 +0200 Subject: [PATCH 04/11] Add Satellite Event System documentation and event emission details - Introduced comprehensive documentation for the Satellite Events System, detailing real-time event processing from satellites to the backend. - Added sections on event flow, architecture overview, event types, batching strategy, error handling, and integration points. - Documented specific event types related to MCP server lifecycle, client connections, tool discovery, and configuration management. - Included guidelines for creating new event handlers and best practices for event emission. - Enhanced the existing process management documentation with event emission details for operational visibility. --- docs/development/backend/events.mdx | 2 + docs/development/backend/index.mdx | 8 + .../backend/satellite-communication.mdx | 21 + docs/development/backend/satellite-events.mdx | 563 +++++++++++ docs/development/satellite/architecture.mdx | 44 +- .../satellite/backend-communication.mdx | 24 + docs/development/satellite/event-system.mdx | 914 ++++++++++++++++++ docs/development/satellite/index.mdx | 63 +- .../satellite/process-management.mdx | 33 + 9 files changed, 1618 insertions(+), 54 deletions(-) create mode 100644 docs/development/backend/satellite-events.mdx create mode 100644 docs/development/satellite/event-system.mdx diff --git a/docs/development/backend/events.mdx b/docs/development/backend/events.mdx index d964be2..10e2a55 100644 --- a/docs/development/backend/events.mdx +++ b/docs/development/backend/events.mdx @@ -7,6 +7,8 @@ description: Type-safe event system for decoupled communication between core sys The Global Event Bus enables decoupled communication between core systems and plugins through a type-safe event system. Core systems emit events when important actions occur, and plugins can react without direct coupling to business logic. +**Note**: This documentation covers the **internal backend event bus** for plugin communication. If you're looking for the **satellite events system** (incoming events from satellites), see [Satellite Events](/development/backend/satellite-events). + ## Overview Key features: diff --git a/docs/development/backend/index.mdx b/docs/development/backend/index.mdx index 123a370..3e6a234 100644 --- a/docs/development/backend/index.mdx +++ b/docs/development/backend/index.mdx @@ -112,6 +112,14 @@ The development server starts at `http://localhost:3000` with API documentation > Database-backed job processing system with persistent storage, automatic retries, and rate limiting for long-running background tasks. + + } + href="/deploystack/development/backend/satellite-events" + title="Satellite Events" + > + Real-time event processing from satellites with convention-based handlers routing to business tables for operational visibility. + ## Project Structure diff --git a/docs/development/backend/satellite-communication.mdx b/docs/development/backend/satellite-communication.mdx index 8a17548..4e3ecfb 100644 --- a/docs/development/backend/satellite-communication.mdx +++ b/docs/development/backend/satellite-communication.mdx @@ -71,6 +71,26 @@ Satellites use **outbound-only HTTPS polling** to communicate with the backend, └─────────────────┘ Command Response └─────────────────┘ ``` +### Communication Channels + +The system uses three distinct communication patterns: + +**Command Polling (Backend → Satellite)**: +- Backend creates commands, satellites poll and execute +- Adaptive intervals: 2-60 seconds based on priority +- Used for: MCP server configuration, process management + +**Heartbeat (Satellite → Backend, Periodic)**: +- Satellites report status every 30 seconds +- Contains: System metrics, process counts, resource usage +- Used for: Health monitoring, capacity planning + +**Events (Satellite → Backend, Immediate)**: +- Satellites emit events when actions occur, batched every 3 seconds +- Contains: Point-in-time occurrences with precise timestamps +- Used for: Real-time UI updates, audit trails, user notifications +- See [Satellite Events](/development/backend/satellite-events) for detailed implementation + ### Dual Deployment Models **Global Satellites**: Cloud-hosted by DeployStack team @@ -474,6 +494,7 @@ For detailed API endpoints, request/response formats, and authentication pattern For detailed satellite architecture and implementation: +- [Satellite Events](/development/backend/satellite-events) - Real-time event processing system - [API Security](/development/backend/api-security) - Security patterns and authorization - [Database Management](/development/backend/database) - Schema and data management - [OAuth2 Server](/development/backend/oauth2-server) - OAuth2 implementation details diff --git a/docs/development/backend/satellite-events.mdx b/docs/development/backend/satellite-events.mdx new file mode 100644 index 0000000..8affe0c --- /dev/null +++ b/docs/development/backend/satellite-events.mdx @@ -0,0 +1,563 @@ +--- +title: Satellite Events System +description: Real-time event processing from satellites to backend with convention-based handler architecture and business logic routing. +--- + +# Satellite Events System + +The Satellite Events System provides real-time communication from satellites to the backend for operational visibility, audit trails, and user feedback. Events are processed through a convention-based dispatcher that routes them to handlers updating existing business tables. + +## Architecture Overview + +### Event Flow + +``` +Satellite → EventBus (3s batching) → POST /api/satellites/{id}/events → Backend Dispatcher → Handler → Business Table +``` + +**Key Principle**: Events are **routing triggers** that update existing business tables, not raw event storage. Each handler performs meaningful business logic rather than storing JSON blobs. + +### Why Events vs Heartbeat? + +DeployStack uses three distinct communication channels: + +**Heartbeat (Every 30 seconds)**: +- Aggregate metrics and system health +- Resource monitoring and capacity planning +- Process counts grouped by team + +**Events (Immediate with 3s batching)**: +- Point-in-time occurrences with precise timestamps +- Real-time UI updates and user notifications +- Audit trails for compliance + +**Commands (Polling)**: +- Backend-initiated tasks +- Configuration updates and process management + +## Backend Implementation + +### Directory Structure + +``` +services/backend/src/events/satellite/ +├── index.ts # Event dispatcher (auto-discovers handlers) +├── types.ts # Shared TypeScript interfaces +├── mcp-server-started.ts # Updates satelliteProcesses status +├── mcp-server-crashed.ts # Updates satelliteProcesses with error +├── mcp-tool-executed.ts # Logs to satelliteUsageLogs +└── [future-event-types].ts # Additional handlers as needed +``` + +### Convention-Based Handler Discovery + +The dispatcher automatically discovers and registers handlers from the `handlerModules` array in `index.ts`: + +```typescript +const handlerModules = [ + () => import('./mcp-server-started'), + () => import('./mcp-tool-executed'), + () => import('./mcp-server-crashed'), + // Add new handlers here - they will be automatically registered +]; +``` + +Each handler must export three components: + +1. **EVENT_TYPE**: String constant identifying the event +2. **SCHEMA**: JSON Schema for AJV validation +3. **handle()**: Async function that updates business tables + +### Handler Interface + +All event handlers must implement this interface: + +```typescript +export interface EventHandler { + EVENT_TYPE: string; + SCHEMA: Record; + handle: ( + satelliteId: string, + eventData: Record, + db: LibSQLDatabase, + eventTimestamp: Date + ) => Promise; +} +``` + +## Event Processing + +### Batch Endpoint + +**Route**: `POST /api/satellites/{satelliteId}/events` + +**Authentication**: Satellite API key (Bearer token via `requireSatelliteAuth()` middleware) + +**Request Schema**: +```json +{ + "events": [ + { + "type": "mcp.server.started", + "timestamp": "2025-01-10T10:30:45.123Z", + "data": { + "processId": "proc-123", + "serverId": "filesystem-team-xyz", + "serverName": "Filesystem MCP", + "teamId": "team-xyz", + "pid": 12345, + "localPort": 8080 + } + } + ] +} +``` + +**Response Schema**: +```json +{ + "success": true, + "processed": 45, + "failed": 0, + "event_ids": ["evt_1736512345_abc123", "evt_1736512346_def456"] +} +``` + +### Batch Processing Strategy + +The dispatcher processes batched events with isolated error handling: + +1. Validate request structure (events array present) +2. Validate batch size (1-100 events) +3. Process each event individually: + - Check event type exists in registry + - Validate event data against handler schema using AJV + - Parse and validate timestamp + - Call handler.handle() for valid events + - Track successful and failed events +4. Return aggregated results + +**Error Isolation**: Invalid events are logged and skipped without failing the entire batch. Valid events in the same batch are still processed. + +### Partial Success Handling + +When some events fail validation, the endpoint returns partial success: + +```json +{ + "success": true, + "processed": 43, + "failed": 2, + "event_ids": ["evt_001", "evt_002", "..."], + "failures": [ + { + "index": 5, + "type": "mcp.unknown.event", + "error": "Unknown event type" + }, + { + "index": 12, + "type": "mcp.tool.executed", + "error": "Missing required field: toolName" + } + ] +} +``` + +## Implemented Event Types + +### MCP Server Lifecycle + +#### mcp.server.started +Updates `satelliteProcesses` table when server successfully spawns. + +**Business Logic**: Sets status='running', records start time and process PID. + +**Required Fields**: `processId`, `serverId`, `serverName`, `teamId` + +**Optional Fields**: `pid`, `localPort` + +#### mcp.server.crashed +Updates `satelliteProcesses` table when server exits unexpectedly. + +**Business Logic**: Sets status='failed', logs error details and exit code. + +**Required Fields**: `processId`, `serverId`, `serverName`, `teamId` + +**Optional Fields**: `exitCode`, `signal`, `errorMessage`, `stackTrace` + +### Tool Execution + +#### mcp.tool.executed +Inserts record into `satelliteUsageLogs` for analytics and audit trails. + +**Business Logic**: Logs tool execution with metrics, user context, and performance data. + +**Required Fields**: `toolName`, `serverId`, `teamId` + +**Optional Fields**: `processId`, `userId`, `durationMs`, `statusCode`, `errorMessage`, `requestSizeBytes`, `responseSizeBytes`, `userAgent`, `ipAddress` + +## Creating New Event Handlers + +### Handler Template + +Create a new file in `services/backend/src/events/satellite/`: + +```typescript +import type { LibSQLDatabase } from 'drizzle-orm/libsql'; +import { yourTable } from '../../db/schema.sqlite'; +import { eq } from 'drizzle-orm'; + +export const EVENT_TYPE = 'your.event.type'; + +export const SCHEMA = { + type: 'object', + properties: { + requiredField: { + type: 'string', + minLength: 1, + description: 'Description of this field' + }, + optionalField: { + type: 'number', + description: 'Optional numeric field' + } + }, + required: ['requiredField'], + additionalProperties: true +} as const; + +interface YourEventData { + requiredField: string; + optionalField?: number; +} + +export async function handle( + satelliteId: string, + eventData: Record, + db: LibSQLDatabase, + eventTimestamp: Date +): Promise { + const data = eventData as unknown as YourEventData; + + // Update existing business table + await db.update(yourTable) + .set({ + status: 'updated', + updated_at: eventTimestamp + }) + .where(eq(yourTable.id, data.requiredField)); +} +``` + +### Registration Steps + +1. Create handler file in `services/backend/src/events/satellite/` +2. Export `EVENT_TYPE`, `SCHEMA`, and `handle()` function +3. Add import to `handlerModules` array in `index.ts`: +```typescript +const handlerModules = [ + () => import('./mcp-server-started'), + () => import('./mcp-tool-executed'), + () => import('./mcp-server-crashed'), + () => import('./your-new-handler'), // Add here +]; +``` +4. Handler is automatically registered and ready to process events + +## Schema Validation + +### AJV Configuration + +The dispatcher uses AJV with specific configuration for compatibility: + +```typescript +const ajv = new Ajv({ + allErrors: true, // Report all validation errors + strict: false, // Allow unknown keywords + strictTypes: false // Disable strict type checking +}); +addFormats(ajv); // Add format validators (email, date-time, etc.) +``` + +### Validation Process + +For each event: +1. Compile handler SCHEMA with AJV +2. Validate event.data against compiled schema +3. Log validation errors with instance path details +4. Skip invalid events (don't fail entire batch) + +### Schema Best Practices + +- Use `minLength: 1` for required string fields +- Include descriptive `description` fields for documentation +- Set `additionalProperties: true` to allow future extensibility +- Use `required` array for mandatory fields +- Leverage AJV formats: `email`, `date-time`, `uri`, `uuid` + +## Database Integration + +### Event-to-Table Mapping + +Events route to existing business tables based on their purpose: + +| Event Type | Business Table | Action | +|-----------|----------------|--------| +| `mcp.server.started` | `satelliteProcesses` | Update status='running', set start time | +| `mcp.server.crashed` | `satelliteProcesses` | Update status='failed', log error details | +| `mcp.tool.executed` | `satelliteUsageLogs` | Insert usage record with metrics | + +### Transaction Strategy + +Each event is processed in a separate database transaction: +- Failed events don't rollback other events +- Maintains data consistency per event +- Isolated error handling prevents cascade failures + +### Database Driver Compatibility + +When updating records, use the driver-compatible pattern: + +```typescript +const result = await db.update(table).set(data).where(condition); + +// Handle both SQLite (changes) and Turso (rowsAffected) +const updated = (result.changes || result.rowsAffected || 0) > 0; +``` + +## Performance Considerations + +### Batch Processing Efficiency + +- **Target**: < 100ms per 100-event batch +- **Isolation**: Each event in separate transaction +- **Logging**: Structured logging with batch metrics +- **Monitoring**: Track processing duration and success rates + +### Database Performance + +- Updates use indexed lookups (processId, satelliteId) +- Inserts optimized for high-volume logging +- No generic JSON storage overhead +- Leverages existing optimized table schemas + +### Memory Usage + +- Batch size limited to 100 events (backend validation) +- Event processing is sequential (simple implementation) +- No long-lived memory allocations +- Efficient JSON parsing with TypeScript interfaces + +## Error Handling + +### Invalid Event Type + +**Response**: Partial success with failure details + +**Logging**: Warn level with event type + +**Action**: Skip event, continue batch processing + +### Schema Validation Failure + +**Response**: Partial success with validation errors + +**Logging**: Warn level with instance path details + +**Action**: Skip event, log validation errors + +### Handler Execution Error + +**Response**: Partial success with error message + +**Logging**: Error level with stack trace + +**Action**: Catch error, track failure, continue batch + +### Database Transaction Failure + +**Response**: Partial success with database error + +**Logging**: Error level with query details + +**Action**: Rollback transaction, track failure, continue batch + +## Testing + +### Unit Testing + +Test individual event handlers in isolation: + +```typescript +// Test handler validation +const validData = { processId: 'proc-123', serverId: 'server-xyz', ... }; +await handler.handle('satellite-id', validData, mockDb, new Date()); + +// Test schema validation +const validate = ajv.compile(handler.SCHEMA); +expect(validate(validData)).toBe(true); +``` + +### Integration Testing + +Test full endpoint with satellite authentication: + +```bash +curl -X POST http://localhost:3000/api/satellites/{satelliteId}/events \ + -H "Authorization: Bearer {satellite_api_key}" \ + -H "Content-Type: application/json" \ + -d '{ + "events": [ + { + "type": "mcp.server.started", + "timestamp": "2025-01-10T10:30:45.123Z", + "data": { + "processId": "proc-123", + "serverId": "filesystem-test", + "serverName": "Filesystem MCP", + "teamId": "test-team" + } + } + ] + }' +``` + +### Batch Processing Tests + +- Single event batch (1 event) +- Normal batch (50 events) +- Maximum batch (100 events) +- Oversized batch (> 100 events, should reject) +- Mixed success/failure batch +- Unknown event type handling +- Invalid timestamp handling +- Schema validation failures + +## Monitoring and Debugging + +### Structured Logging + +All event operations are logged with structured data: + +```bash +# Event processing started +{"level":"info","satelliteId":"sat-123","batchSize":45} + +# Successful processing +{"level":"info","satelliteId":"sat-123","eventType":"mcp.server.started","msg":"Event processed"} + +# Validation failure +{"level":"warn","eventType":"unknown.type","msg":"Unknown event type"} + +# Batch complete +{"level":"info","satelliteId":"sat-123","processed":43,"failed":2,"msg":"Batch complete"} +``` + +### Debug Queries + +Check registered event types: + +```typescript +import { getRegisteredEventTypes } from '../events/satellite'; + +const types = await getRegisteredEventTypes(); +console.log('Registered event types:', types); +``` + +Verify database updates: + +```sql +-- Check process status after mcp.server.started +SELECT status, started_at, process_pid +FROM satelliteProcesses +WHERE id = 'proc-123'; + +-- Check tool execution logs +SELECT tool_name, duration_ms, status_code, timestamp +FROM satelliteUsageLogs +WHERE satellite_id = 'sat-123' +ORDER BY timestamp DESC +LIMIT 10; +``` + +## Best Practices + +### Event Handler Design + +**DO**: +- Update existing business tables with structured data +- Use TypeScript interfaces for type safety +- Include comprehensive field descriptions in schemas +- Log important state changes +- Handle optional fields gracefully + +**DON'T**: +- Store raw JSON in generic events tables +- Assume all optional fields are present +- Skip error handling in database operations +- Use blocking operations (keep handlers async) +- Duplicate business logic across handlers + +### Schema Design + +**DO**: +- Use descriptive field names matching domain concepts +- Include `description` for documentation +- Set appropriate `minLength` and format constraints +- Use `additionalProperties: true` for extensibility +- Mark truly required fields in `required` array + +**DON'T**: +- Over-constrain with excessive validation +- Use generic field names like `data` or `info` +- Forget to set `as const` on schema objects +- Validate business logic in schemas (do that in handlers) +- Create schemas with circular references + +### Database Operations + +**DO**: +- Use parameterized queries via Drizzle ORM +- Handle both SQLite and Turso driver differences +- Include timestamps for all state changes +- Use transactions for multi-step operations +- Index frequently queried fields + +**DON'T**: +- Concatenate SQL strings manually +- Assume specific driver properties exist +- Skip error handling for database operations +- Create N+1 query patterns +- Store large BLOBs in event data + +## Future Enhancements + +### Planned Event Types + +- **Client Connections**: `mcp.client.connected`, `mcp.client.disconnected` +- **Tool Discovery**: `mcp.tools.discovered`, `mcp.tools.updated` +- **Configuration**: `config.refreshed`, `config.error` +- **Satellite Lifecycle**: `satellite.registered`, `satellite.deregistered` +- **Process Management**: `mcp.server.restarted`, `mcp.server.permanently_failed` + +### Performance Optimizations + +- Batch database insertions for high-volume events +- Async event processing with job queue +- Event sampling for high-frequency events +- Compression for large event payloads + +### Analytics Features + +- Real-time event aggregation +- Custom alert rules based on events +- Event replay for debugging +- Historical event analysis dashboards + +## Related Documentation + +- [Satellite Event System](/development/satellite/event-system) - Satellite-side event emission +- [Satellite Communication](/development/backend/satellite-communication) - Full satellite communication architecture +- [API Documentation](/development/backend/api) - OpenAPI specification generation +- [Database Management](/development/backend/database) - Schema and migrations diff --git a/docs/development/satellite/architecture.mdx b/docs/development/satellite/architecture.mdx index 9fec0d9..571ae08 100644 --- a/docs/development/satellite/architecture.mdx +++ b/docs/development/satellite/architecture.mdx @@ -19,14 +19,15 @@ Satellites operate as edge workers similar to GitHub Actions runners, providing: - **MCP Transport Protocols**: SSE, Streamable HTTP, Direct HTTP communication - **Dual MCP Server Management**: HTTP proxy + stdio subprocess support (ready for implementation) - **Team Isolation**: nsjail sandboxing with built-in resource limits (ready for implementation) -- **OAuth 2.1 Resource Server**: Token introspection with Backend (implemented) -- **Backend Polling Communication**: Outbound-only, firewall-friendly (implemented) +- **OAuth 2.1 Resource Server**: Token introspection with Backend +- **Backend Polling Communication**: Outbound-only, firewall-friendly +- **Real-Time Event System**: Immediate satellite → backend event emission with automatic batching - **Process Lifecycle Management**: Spawn, monitor, terminate MCP servers (ready for implementation) -- **Background Jobs System**: Cron-like recurring tasks with automatic error handling (implemented) +- **Background Jobs System**: Cron-like recurring tasks with automatic error handling ## Current Implementation Architecture -### Phase 1: MCP Transport Layer (Implemented) +### Phase 1: MCP Transport Layer The current satellite implementation provides complete MCP client interface support: @@ -83,7 +84,7 @@ MCP Client Satellite │◀─── Response via SSE ─────│ (Stream response back) ``` -### Core Components (Implemented) +### Core Components **Session Manager:** - Cryptographically secure 32-byte base64url session IDs @@ -237,7 +238,7 @@ Each satellite instance will contain five core components: ## Communication Patterns -### Client-to-Satellite Communication (Implemented) +### Client-to-Satellite Communication **Multiple Transport Protocols:** - **SSE (Server-Sent Events)**: Real-time streaming with session management @@ -263,7 +264,7 @@ MCP Client Satellite - **Activity Tracking**: Updated on each message - **State Management**: Client info and initialization status -### Satellite-to-Backend Communication (Implemented) +### Satellite-to-Backend Communication **HTTP Polling Pattern:** ``` @@ -289,6 +290,34 @@ Satellite Backend For complete implementation details, see [Backend Polling Implementation](/development/satellite/polling). +### Real-Time Event System + +**Event Emission with Batching:** +``` +Satellite Operations EventBus Backend + │ │ │ + │─── mcp.server.started ──▶│ │ + │─── mcp.tool.executed ───▶│ [Queue] │ + │─── mcp.client.connected ─▶│ │ + │ [Every 3 seconds] │ + │ │ │ + │ │─── POST /events ───▶│ + │ │◀─── 200 OK ─────────│ +``` + +**Event Features:** +- **Immediate Emission**: Events emitted when actions occur (not delayed by 30s heartbeat) +- **Automatic Batching**: Events collected for 3 seconds, then sent as single batch (max 100 events) +- **Memory Management**: In-memory queue (10,000 event limit) with overflow protection +- **Graceful Error Handling**: 429 exponential backoff, 400 drops invalid events, 500/network errors retry +- **10 Event Types**: Server lifecycle, client connections, tool discovery, configuration updates + +**Difference from Heartbeat:** +- **Heartbeat** (every 30s): Aggregate metrics, system health, resource usage +- **Events** (immediate): Point-in-time occurrences, user actions, precise timestamps + +For complete event system documentation, see [Event System](/development/satellite/event-system). + ## Security Architecture ### Current Security (No Authentication) @@ -469,6 +498,7 @@ The satellite service has completed **Phase 1: MCP Transport Implementation** an - **Command Processing**: HTTP MCP server management (spawn/kill/restart/health_check) - **Heartbeat Service**: Process status reporting and system metrics - **Configuration Sync**: Real-time MCP server configuration updates +- **Event System**: Real-time event emission with automatic batching (10 event types) **Foundation Infrastructure:** - **HTTP Server**: Fastify with Swagger documentation diff --git a/docs/development/satellite/backend-communication.mdx b/docs/development/satellite/backend-communication.mdx index c022988..5464501 100644 --- a/docs/development/satellite/backend-communication.mdx +++ b/docs/development/satellite/backend-communication.mdx @@ -43,6 +43,30 @@ Satellites adjust polling frequency based on Backend guidance: - **Backoff Mode**: Exponential backoff up to 5 minutes on errors - **Maintenance Mode**: Reduced polling during maintenance windows +### Communication Channels + +The satellite uses three distinct communication channels with the Backend: + +**1. Command Polling (Backend → Satellite)** +- Backend creates commands, satellite polls and executes +- Adaptive intervals: 2-60 seconds based on command priority +- Used for: MCP server configuration, process management, system updates +- Direction: Backend initiates, satellite responds + +**2. Heartbeat (Satellite → Backend, Periodic)** +- Satellite reports status every 30 seconds +- Contains: System metrics, process counts, resource usage +- Used for: Health monitoring, capacity planning, aggregate analytics +- Direction: Satellite reports on fixed schedule + +**3. Events (Satellite → Backend, Immediate)** +- Satellite emits events when actions occur, batched every 3 seconds +- Contains: Point-in-time occurrences with precise timestamps +- Used for: Real-time UI updates, audit trails, user notifications +- Direction: Satellite reports immediately (not waiting for heartbeat) + +For detailed event system documentation, see [Event System](/development/satellite/event-system). + ## Current Implementation ### Phase 1: Basic Connection Testing ✅ diff --git a/docs/development/satellite/event-system.mdx b/docs/development/satellite/event-system.mdx new file mode 100644 index 0000000..c1f861c --- /dev/null +++ b/docs/development/satellite/event-system.mdx @@ -0,0 +1,914 @@ +--- +title: Event System +description: Real-time event emission from satellite to backend for operational visibility, audit trails, and user feedback. +sidebar: Satellite Development +--- + +import { Callout } from 'fumadocs-ui/components/callout'; + +# Satellite Event System + +The Satellite Event System provides real-time communication from satellites to the backend for operational visibility. Unlike the 30-second heartbeat cycle, events are emitted immediately when significant actions occur and batched for efficient transmission. + +## Why Events? + +DeployStack Satellites use a **polling-based communication pattern** where satellites make outbound HTTP requests to the backend. This is firewall-friendly and NAT-compatible, but creates a timing gap: + +**Problem**: Important satellite operations need immediate backend visibility +- MCP client connects (user expects instant UI feedback) +- Tool executes (audit trail needs precise timestamps) +- Process crashes (alerts need immediate dispatch) +- Security events (compliance requires real-time logging) + +**Solution**: Event emission with automatic batching +- Events emitted immediately when actions occur +- Batched every 3 seconds for network efficiency +- Sent to backend via existing authentication +- Zero impact on satellite performance + +## Architecture Overview + +``` +Satellite Components EventBus Backend + │ │ │ + │─── emit('mcp.server.started') ───▶│ │ + │ │ │ + │─── emit('mcp.tool.executed') ────▶│ │ + │ [Queue] │ + │─── emit('mcp.client.connected') ─▶│ │ + │ │ │ + │ [Every 3 seconds] │ + │ │ │ + │ │─── POST /events ───▶│ + │ │ │ + │ │◀─── 200 OK ─────────│ +``` + +### Key Components + +**EventBus Service** (`src/services/event-bus.ts`) +- In-memory queue for event collection +- 3-second batch window (configurable) +- Automatic transmission to backend +- Graceful error handling and retry + +**Event Registry** (`src/events/registry.ts`) +- Type-safe event definitions +- Event data structures +- Compile-time validation + +**Backend Integration** (`src/services/backend-client.ts`) +- `sendEvents()` method for batch transmission +- Uses existing satellite authentication +- Handles partial success responses + +## Event Types + +The satellite emits 10 event types across 4 categories: + +### MCP Server Lifecycle + +#### `mcp.server.started` +Emitted when MCP server process successfully spawns and completes handshake. + +**Data Structure:** +```typescript +{ + server_id: string; // installation_id + server_slug: string; // installation_name + team_id: string; + process_id: number; // OS process ID + transport: 'stdio'; + tool_count: number; // Tools discovered (0 initially) + spawn_duration_ms: number; +} +``` + +**Example:** +```typescript +eventBus.emit('mcp.server.started', { + server_id: 'inst_abc123', + server_slug: 'filesystem', + team_id: 'team_xyz', + process_id: 12345, + transport: 'stdio', + tool_count: 0, + spawn_duration_ms: 234 +}); +``` + +#### `mcp.server.crashed` +Emitted when MCP server process exits unexpectedly with non-zero code. + +**Data Structure:** +```typescript +{ + server_id: string; + server_slug: string; + team_id: string; + process_id: number; + exit_code: number; + signal: string; // 'SIGTERM', 'SIGKILL', etc. + uptime_seconds: number; + crash_count: number; + will_restart: boolean; +} +``` + +#### `mcp.server.restarted` +Emitted after successful automatic restart following a crash. + +**Data Structure:** +```typescript +{ + server_id: string; + server_slug: string; + team_id: string; + old_process_id: number; + new_process_id: number; + restart_reason: 'crash'; + attempt_number: number; // 1, 2, or 3 +} +``` + +#### `mcp.server.permanently_failed` +Emitted when server exhausts all 3 restart attempts. + +**Data Structure:** +```typescript +{ + server_id: string; + server_slug: string; + team_id: string; + total_crashes: number; + last_error: string; + failed_at: string; // ISO 8601 timestamp +} +``` + +### Client Connections + +#### `mcp.client.connected` +Emitted when MCP client establishes SSE connection to satellite. + +**Data Structure:** +```typescript +{ + session_id: string; + client_type: 'vscode' | 'cursor' | 'claude' | 'unknown'; + user_agent: string; + team_id: string; + transport: 'sse'; + ip_address: string; +} +``` + +**Client Type Detection:** +- Parses `User-Agent` header +- Detects VS Code, Cursor, Claude Desktop +- Falls back to 'unknown' for unrecognized clients + +#### `mcp.client.disconnected` +Emitted when SSE connection closes (client disconnect, timeout, or error). + +**Data Structure:** +```typescript +{ + session_id: string; + team_id: string; + connection_duration_seconds: number; + tool_execution_count: number; + disconnect_reason: 'client_close' | 'timeout' | 'error'; +} +``` + +### Tool Discovery + +#### `mcp.tools.discovered` +Emitted after successful tool discovery from HTTP or stdio MCP server. + +**Data Structure:** +```typescript +{ + server_id: string; + server_slug: string; + team_id: string; + tool_count: number; + tool_names: string[]; + discovery_duration_ms: number; + previous_tool_count: number; +} +``` + +#### `mcp.tools.updated` +Emitted when tool list changes during configuration refresh. + +**Data Structure:** +```typescript +{ + server_id: string; + server_slug: string; + team_id: string; + added_tools: string[]; + removed_tools: string[]; + total_tools: number; +} +``` + +### Configuration Management + +#### `config.refreshed` +Emitted after successful configuration fetch from backend. + +**Data Structure:** +```typescript +{ + config_hash: string; + server_count: number; + teams_count: number; + change_detected: boolean; + fetch_duration_ms: number; +} +``` + +#### `config.error` +Emitted when configuration fetch fails. + +**Data Structure:** +```typescript +{ + error_type: 'server_error'; + error_message: string; + status_code: number | null; + retry_in: number; // Seconds until next retry +} +``` + +## Event Batching + +### Batch Window: 3 Seconds + +Events are collected in memory for 3 seconds, then sent as a single batch: + +``` +0s 3s 6s 9s +│───────────────│───────────────│───────────────│ +│ Collect events│ Send batch │ Collect events│ +│ (6 events) │ (6 events) │ (2 events) │ +``` + +**Benefits:** +- Reduces HTTP request overhead +- Efficient network usage +- Near real-time (3s latency acceptable) +- Backend-friendly batching + +### Max Batch Size: 100 Events + +If more than 100 events accumulate, only first 100 are sent: + +``` +0-3s: Collect 150 events +3s: Send first 100, keep 50 in queue +3-6s: Collect 30 more (queue = 80) +6s: Send 80 events +``` + +### Empty Batch Handling + +If no events occur, no HTTP request is made: + +``` +0-3s: No events +3s: Skip sending (no request) +3-6s: No events +6s: Skip sending (no request) +``` + +## Memory Management + +### Queue Limit: 10,000 Events + +The in-memory queue holds a maximum of 10,000 events: + +**Normal Operation:** +- Events queued and sent every 3 seconds +- Queue size typically under 100 events + +**Backend Outage:** +- Events accumulate in queue +- Queue grows up to 10,000 events +- Oldest events dropped when limit reached +- Dropped count logged for monitoring + +**Memory Usage:** +- Average event: 1-2KB +- 10,000 events ≈ 10-20MB RAM +- Acceptable footprint for satellite process + + +Queue is in-memory only. Satellite restarts clear the queue. This is acceptable because events are operational telemetry, not critical data requiring persistence. + + +## Error Handling + +### Backend Response Codes + +**400 Bad Request** (Invalid event data) +- Drops the invalid event immediately +- Logs error with event details +- Continues processing other events +- No retry for malformed data + +**401 Unauthorized** (Authentication failed) +- Keeps events in queue +- Logs authentication error +- Retries in next batch cycle +- May indicate satellite needs re-registration + +**429 Too Many Requests** (Rate limited) +- Implements exponential backoff +- Backoff sequence: 3s → 6s → 12s → 24s → 48s (max) +- Keeps all events in queue +- Resumes normal 3s batching after successful send + +**500 Internal Server Error** (Backend failure) +- Keeps events in queue +- Logs backend error +- Retries in next 3s batch cycle +- Continues normal operations + +**Network Timeout / Connection Refused** +- Keeps events in queue +- Logs connection failure +- Retries in next 3s batch cycle +- Satellite continues operating normally + +### Retry Strategy + +**Natural Retry Pattern:** +- Failed batches remain in queue +- Next 3-second cycle automatically includes them +- No explicit retry logic needed + +**Exponential Backoff:** +- Only applies to 429 rate limit responses +- Temporary increase in batch interval +- Returns to 3s after successful send + +**Event Dropping:** +- Only drops events for 400 validation errors +- Never drops events for temporary failures +- Queue overflow drops oldest events (logged) + +## Graceful Shutdown + +When satellite receives SIGTERM or SIGINT: + +``` +1. Stop accepting new events +2. Cancel next scheduled batch +3. Flush all queued events immediately +4. Wait up to 5 seconds for completion +5. If successful: Log success, proceed with shutdown +6. If timeout: Force shutdown, log lost event count +``` + +**Configuration:** +```bash +EVENT_FLUSH_TIMEOUT_MS=5000 # 5-second grace period +``` + +## Integration Points + +### ProcessManager + +**Location:** `src/process/manager.ts` + +**Events Emitted:** +- `mcp.server.started` - After spawn + handshake +- `mcp.server.crashed` - On unexpected exit +- `mcp.server.restarted` - After auto-restart +- `mcp.server.permanently_failed` - After 3 failed restarts + +**Implementation Pattern:** +```typescript +// Constructor accepts optional EventBus +constructor( + logger: Logger, + eventBus?: EventBus +) { + this.eventBus = eventBus; +} + +// Emit events with try-catch protection +try { + this.eventBus?.emit('mcp.server.started', { + server_id: config.installation_id, + server_slug: config.installation_name, + team_id: config.team_id, + process_id: process.pid, + transport: 'stdio', + tool_count: 0, + spawn_duration_ms: elapsed + }); +} catch (error) { + this.logger.warn({ error }, 'Failed to emit event (non-fatal)'); +} +``` + +### SessionManager + +**Location:** `src/core/session-manager.ts` + +**Events Emitted:** +- `mcp.client.connected` - On new SSE session creation +- `mcp.client.disconnected` - On session cleanup + +**Client Type Detection:** +```typescript +private detectClientType(userAgent?: string): string { + if (!userAgent) return 'unknown'; + const ua = userAgent.toLowerCase(); + if (ua.includes('vscode')) return 'vscode'; + if (ua.includes('cursor')) return 'cursor'; + if (ua.includes('claude')) return 'claude'; + return 'unknown'; +} +``` + +### RemoteToolDiscoveryManager + +**Location:** `src/services/remote-tool-discovery-manager.ts` + +**Events Emitted:** +- `mcp.tools.discovered` - After initial tool discovery +- `mcp.tools.updated` - When tool list changes + +**Tool Change Detection:** +```typescript +// Compare previous and current tool lists +const addedTools = currentTools.filter(t => !previousTools.includes(t)); +const removedTools = previousTools.filter(t => !currentTools.includes(t)); + +if (addedTools.length > 0 || removedTools.length > 0) { + eventBus.emit('mcp.tools.updated', { + server_id, + server_slug, + team_id, + added_tools: addedTools, + removed_tools: removedTools, + total_tools: currentTools.length + }); +} +``` + +### DynamicConfigManager + +**Location:** `src/services/dynamic-config-manager.ts` + +**Events Emitted:** +- `config.refreshed` - After successful config fetch +- `config.error` - On config fetch failure + +**Configuration Hash:** +```typescript +import crypto from 'crypto'; + +const configHash = crypto + .createHash('sha256') + .update(JSON.stringify(config)) + .digest('hex') + .substring(0, 12); +``` + +## Configuration + +### Environment Variables + +```bash +# Event batching (default: 3000ms = 3 seconds) +EVENT_BATCH_INTERVAL_MS=3000 + +# Max events per batch (default: 100) +EVENT_MAX_BATCH_SIZE=100 + +# Max events in memory (default: 10000) +EVENT_MAX_QUEUE_SIZE=10000 + +# Graceful shutdown timeout (default: 5000ms) +EVENT_FLUSH_TIMEOUT_MS=5000 +``` + +### Development vs Production + +**Development:** +```bash +EVENT_BATCH_INTERVAL_MS=1000 # 1s for faster feedback +EVENT_MAX_QUEUE_SIZE=1000 # Smaller queue +LOG_LEVEL=debug # Verbose logging +``` + +**Production:** +```bash +EVENT_BATCH_INTERVAL_MS=3000 # Standard 3s +EVENT_MAX_QUEUE_SIZE=10000 # Full queue +LOG_LEVEL=info # Standard logging +``` + +## Monitoring Events + +### Structured Logging + +All event operations are logged with structured data: + +```bash +# Event emission +{"level":"debug","operation":"event_emitted","event_type":"mcp.server.started","queue_size":23} + +# Batch sending +{"level":"info","operation":"event_batch_sending","event_count":45,"queue_size":45} + +# Batch success +{"level":"info","operation":"event_batch_success","event_count":45,"duration_ms":234} + +# Queue overflow +{"level":"warn","operation":"event_queue_overflow","dropped_count":10,"queue_size":10000} + +# Backend errors +{"level":"error","operation":"event_batch_error","error":"Connection refused"} +``` + +### Log Searches + +```bash +# All event emissions +grep "event_emitted" logs/satellite.log + +# Specific event type +grep "mcp.server.started" logs/satellite.log + +# Batch operations +grep "event_batch" logs/satellite.log + +# Errors only +grep "event.*error" logs/satellite.log + +# Queue issues +grep "event_queue_overflow" logs/satellite.log +``` + +### EventBus Statistics + +Access runtime statistics programmatically: + +```typescript +const stats = eventBus.getStats(); +console.log({ + queueSize: stats.queueSize, // Current events in queue + totalEmitted: stats.totalEmitted, // Total events emitted + totalSent: stats.totalSent, // Total events sent to backend + totalFailed: stats.totalFailed, // Total send failures + totalDropped: stats.totalDropped, // Total events dropped + lastBatchSentAt: stats.lastBatchSentAt, // ISO timestamp + lastErrorAt: stats.lastErrorAt, // ISO timestamp + isShuttingDown: stats.isShuttingDown // Graceful shutdown status +}); +``` + +## Type Safety + +### Compile-Time Validation + +The event system uses TypeScript for complete type safety: + +```typescript +// ✅ Valid: Correct event type and data structure +eventBus.emit('mcp.server.started', { + server_id: 'inst_123', + server_slug: 'filesystem', + team_id: 'team_xyz', + process_id: 12345, + transport: 'stdio', + tool_count: 0, + spawn_duration_ms: 234 +}); + +// ❌ TypeScript Error: Unknown event type +eventBus.emit('invalid.event.type', { ... }); + +// ❌ TypeScript Error: Missing required field +eventBus.emit('mcp.server.started', { + server_id: 'inst_123', + // Missing: server_slug, team_id, process_id, etc. +}); + +// ❌ TypeScript Error: Wrong field type +eventBus.emit('mcp.server.started', { + server_id: 123, // Should be string + // ... +}); +``` + +### Event Registry + +**Location:** `src/events/registry.ts` + +```typescript +// Event type union +export type EventType = + | 'mcp.server.started' + | 'mcp.server.crashed' + | 'mcp.client.connected' + // ... all event types + +// Event data mapping +export interface EventDataMap { + 'mcp.server.started': { + server_id: string; + server_slug: string; + team_id: string; + process_id: number; + transport: 'stdio'; + tool_count: number; + spawn_duration_ms: number; + }; + // ... all event data structures +} + +// Complete event structure +export interface SatelliteEvent { + type: EventType; + timestamp: string; // ISO 8601 + data: EventDataMap[EventType]; +} +``` + +## Best Practices + +### DO ✅ + +**Wrap emit() calls in try-catch:** +```typescript +try { + this.eventBus?.emit('event.type', { ... }); +} catch (error) { + this.logger.warn({ error }, 'Failed to emit event (non-fatal)'); +} +``` + +**Use optional chaining:** +```typescript +// EventBus might be undefined during initialization +this.eventBus?.emit('event.type', { ... }); +``` + +**Include all required fields:** +```typescript +// TypeScript enforces this, but be explicit +eventBus.emit('mcp.server.started', { + server_id: config.installation_id, // Required + server_slug: config.installation_name, // Required + team_id: config.team_id, // Required + // ... all required fields +}); +``` + +**Calculate metrics before emitting:** +```typescript +const duration = Date.now() - startTime; +this.logger.info({ duration_ms: duration }); +eventBus.emit('operation.completed', { duration_ms: duration }); +``` + +**Use descriptive event names:** +```typescript +// ✅ Clear intent +'mcp.server.crashed' +'mcp.client.connected' + +// ❌ Vague +'server.event' +'client.update' +``` + +### DON'T ❌ + +**Never block on event emission:** +```typescript +// ❌ BAD: Don't await event emission +await eventBus.emit('event.type', { ... }); + +// ✅ GOOD: Fire-and-forget +eventBus.emit('event.type', { ... }); +``` + +**Never throw errors from emission failures:** +```typescript +// ❌ BAD: Event failure crashes service +eventBus.emit('event.type', { ... }); // Might throw + +// ✅ GOOD: Wrapped in try-catch +try { + eventBus.emit('event.type', { ... }); +} catch (error) { + logger.warn({ error }, 'Event emission failed (non-fatal)'); +} +``` + +**Never emit sensitive data:** +```typescript +// ❌ BAD: Includes passwords +eventBus.emit('auth.failed', { + username: 'user@example.com', + password: 'secret123' // Never log passwords! +}); + +// ✅ GOOD: Sanitized data +eventBus.emit('auth.failed', { + username: 'user@example.com', + reason: 'invalid_credentials' +}); +``` + +**Avoid high-frequency emission without sampling:** +```typescript +// ❌ BAD: Emits thousands of events +for (const item of largeArray) { + eventBus.emit('item.processed', { item }); +} + +// ✅ GOOD: Emit summary after batch +const processed = largeArray.map(processItem); +eventBus.emit('batch.processed', { + item_count: largeArray.length, + duration_ms: elapsed +}); +``` + +**Never assume EventBus is defined:** +```typescript +// ❌ BAD: Crashes if EventBus not initialized +this.eventBus.emit('event.type', { ... }); + +// ✅ GOOD: Optional chaining +this.eventBus?.emit('event.type', { ... }); +``` + +## Troubleshooting + +### Events Not Emitting + +**Symptom:** No `event_emitted` logs in satellite logs + +**Diagnosis:** +```typescript +// Check if EventBus is defined +console.log('EventBus defined:', !!this.eventBus); +``` + +**Fix:** Verify EventBus is assigned to service in `src/server.ts`: +```typescript +(yourService as any).eventBus = eventBus; +``` + +### Events Not Reaching Backend + +**Symptom:** Events emitted but not in backend database + +**Check backend connectivity:** +```bash +curl http://localhost:3000/api/health +``` + +**Check event batch errors:** +```bash +grep "event_batch_error" logs/satellite.log +``` + +**Check backend logs:** +```bash +# Backend should log received events +grep "satelliteEvents" logs/backend.log +``` + +### High Queue Size + +**Symptom:** `event_queue_overflow` warnings in logs + +**Causes:** +- Backend unreachable (network issues) +- Backend overloaded (429 responses) +- Very high event volume + +**Solutions:** +```bash +# Check backend connectivity +curl http://localhost:3000/api/satellites/{id}/events + +# Check for rate limiting +grep "429" logs/satellite.log + +# Monitor queue size +grep "queue_size" logs/satellite.log | tail -20 +``` + +### Batch Send Failures + +**Symptom:** Repeated `event_batch_error` logs + +**Check error details:** +```bash +grep "event_batch_error" logs/satellite.log | jq . +``` + +**Common causes:** +- Network timeout → Check network connectivity +- 401 Unauthorized → Verify satellite API key +- 500 Server Error → Check backend logs +- Connection refused → Verify backend running + +## Performance Considerations + +### Network Efficiency + +**Batch Size Impact:** +- 100 events/batch ≈ 100-200KB payload +- Single HTTP request vs 100 individual requests +- Reduced network overhead +- Backend-friendly batching + +**Batch Interval Trade-offs:** +- 3s default: Near real-time with efficient batching +- 1s interval: More real-time, more requests +- 5s interval: Less real-time, fewer requests + +### Memory Usage + +**Queue Memory:** +- Average event: 1-2KB +- Max queue: 10,000 events +- Total memory: 10-20MB +- Acceptable for satellite process + +**Queue Growth:** +- Normal: < 100 events +- Backend outage: Grows to 10,000 +- Overflow: Oldest events dropped + +### CPU Impact + +**Event Emission:** +- Synchronous queue operation +- No I/O during emit() +- < 1ms overhead per event + +**Batch Processing:** +- JSON serialization every 3 seconds +- Single HTTP POST request +- Minimal CPU impact + +## Future Enhancements + +### Disk-Based Queue (Planned) + +**Benefits:** +- Survive satellite restarts +- No event loss during crashes +- Longer backend outage tolerance + +**Trade-offs:** +- Increased complexity +- Disk I/O overhead +- Not needed for operational telemetry + +### Event Sampling (Planned) + +**High-Volume Events:** +- Sample 10% of tool executions +- 100% sampling for errors +- Configurable sampling rates + +**Benefits:** +- Reduced network traffic +- Lower backend load +- Maintained visibility into patterns + +### Real-Time Streaming (Future) + +**WebSocket Event Stream:** +- Real-time event delivery to frontend +- Sub-second latency +- Live operational dashboards + +**Requirements:** +- WebSocket infrastructure +- Frontend event handling +- Connection management + +## Related Documentation + +- [Backend Communication](/development/satellite/backend-communication) - Satellite-backend communication patterns +- [Polling](/development/satellite/polling) - Command polling system +- [Logging](/development/satellite/logging) - Structured logging configuration +- [Process Management](/development/satellite/process-management) - MCP server lifecycle diff --git a/docs/development/satellite/index.mdx b/docs/development/satellite/index.mdx index 7f8863e..725a492 100644 --- a/docs/development/satellite/index.mdx +++ b/docs/development/satellite/index.mdx @@ -23,6 +23,7 @@ DeployStack Satellites are **edge workers** (similar to GitHub Actions runners) - ✅ **TypeScript + Webpack** build system with full type safety - ✅ **Development Workflow** with hot reload and linting - ✅ **Backend Communication** (polling, commands, heartbeat with team-grouped processes) +- ✅ **Real-Time Event System** (immediate event emission with 3s batching, 10 event types) - ✅ **OAuth 2.1 Authentication** (token introspection, team context) - ✅ **stdio MCP Server Process Management** (spawn, monitor, auto-restart, terminate) - ✅ **Team Isolation** (environment-based: nsjail in production, plain spawn in dev) @@ -90,30 +91,6 @@ npm run lint # ESLint with auto-fix npm run release # Release management ``` -### Current MCP Transport Endpoints - -- **GET** `/sse` - Establish SSE connection with session management -- **POST** `/message?session={id}` - Send JSON-RPC messages via SSE sessions -- **GET/POST** `/mcp` - Streamable HTTP transport with optional sessions -- **OPTIONS** `/mcp` - CORS preflight handling - -### Testing MCP Transport - -```bash -# Test SSE connection -curl -N -H "Accept: text/event-stream" http://localhost:3001/sse - -# Send JSON-RPC message (replace SESSION_ID) -curl -X POST "http://localhost:3001/message?session=SESSION_ID" \ - -H "Content-Type: application/json" \ - -d '{"jsonrpc":"2.0","id":"1","method":"initialize","params":{}}' - -# Direct HTTP transport -curl -X POST http://localhost:3001/mcp \ - -H "Content-Type: application/json" \ - -d '{"jsonrpc":"2.0","id":"1","method":"tools/list","params":{}}' -``` - ## Development Guides @@ -204,11 +181,19 @@ curl -X POST http://localhost:3001/mcp \ > Cron-like job system for recurring tasks with automatic error handling and monitoring. + + } + href="/development/satellite/event-system" + title="Event System" + > + Real-time event emission from satellite to backend with automatic batching and error handling. + ## Current Features -### MCP Transport Layer (Implemented) +### MCP Transport Layer - **SSE Transport**: Server-Sent Events with session management - **SSE Messaging**: JSON-RPC message sending via established sessions - **Streamable HTTP**: Direct HTTP communication with optional streaming @@ -231,29 +216,30 @@ curl -X POST http://localhost:3001/mcp \ ## Implemented Features -### Phase 2: MCP Server Process Management ✅ COMPLETED +### Phase 2: MCP Server Process Management - **Process Lifecycle**: Spawn, monitor, auto-restart (max 3), and terminate MCP servers - **stdio Communication**: Full JSON-RPC 2.0 protocol over stdin/stdout -- **HTTP Proxy**: Reverse proxy for external MCP server endpoints ✅ working +- **HTTP Proxy**: Reverse proxy for external MCP server endpoints working - **Health Monitoring**: Process crash detection with auto-restart - **Resource Limits**: nsjail with 100MB RAM, 60s CPU, 50 processes (production Linux) - **Tool Discovery**: Automatic tool caching from both HTTP and stdio servers - **Team-Grouped Heartbeat**: processes_by_team reporting every 30 seconds -### Phase 3: Team Isolation (Infrastructure Ready) +### Phase 3: Team Isolation - **nsjail Sandboxing**: Complete process isolation with built-in resource limits - **Namespace Isolation**: PID, mount, UTS, IPC namespaces per team - **Filesystem Isolation**: Team-specific read-only and writable directories - **Credential Management**: Secure environment injection via nsjail -### Phase 4: Backend Integration ✅ COMPLETED +### Phase 4: Backend Integration - **HTTP Polling**: Outbound communication with DeployStack Backend - **Configuration Sync**: Dynamic configuration updates from Backend - **Status Reporting**: Real-time satellite health and usage metrics - **Command Processing**: Execute Backend commands with acknowledgment +- **Event System**: Real-time event emission with automatic batching (10 event types) ### Phase 5: Enterprise Features -- **OAuth 2.1 Authentication**: Resource server with token introspection ✅ COMPLETED +- **OAuth 2.1 Authentication**: Resource server with token introspection - **Audit Logging**: Complete audit trails for compliance - **Multi-Region Support**: Global satellite deployment - **Auto-Scaling**: Dynamic resource allocation based on demand @@ -278,28 +264,12 @@ Follow established patterns when adding new routes: 4. Use manual JSON serialization with `JSON.stringify()` 5. Register routes in `src/routes/index.ts` -### Logging Best Practices -- Use structured logging with context objects -- Pass logger instances as parameters to services -- Include operation identifiers for traceability -- Use appropriate log levels (debug, info, warn, error) -- Avoid console.log statements in favor of Pino logger - ### Configuration Management - Use environment variables for configuration - Provide sensible defaults for development - Document all configuration options - Support both development and production modes -## Strategic Context - -The satellite service represents DeployStack's evolution from a developer tool into a comprehensive enterprise MCP management platform. This strategic pivot addresses: - -- **Adoption Friction**: Eliminates CLI installation barriers (12x better conversion) -- **Market Differentiation**: Creates new "MCP-as-a-Service" category -- **Enterprise Requirements**: Provides team isolation and compliance features -- **Scalability**: Enables horizontal scaling and global deployment - ## Contributing When contributing to satellite development: @@ -309,5 +279,4 @@ When contributing to satellite development: 3. **Document Changes**: Update relevant documentation for new features 4. **Test Thoroughly**: Ensure changes work in both development and production 5. **Consider Enterprise**: Design features with team isolation and security in mind -6. **MCP Compliance**: Ensure JSON-RPC 2.0 protocol compliance diff --git a/docs/development/satellite/process-management.mdx b/docs/development/satellite/process-management.mdx index fd4f0a2..e8c746b 100644 --- a/docs/development/satellite/process-management.mdx +++ b/docs/development/satellite/process-management.mdx @@ -244,6 +244,39 @@ The ProcessManager emits events for monitoring and integration: - Request tracking includes: `request_id`, `method`, `duration_ms` - Error context includes: error messages, exit codes, signals +## Event Emission + +The ProcessManager emits real-time events to the Backend for operational visibility and audit trails. These events are batched every 3 seconds and sent via the Event System. + +### Lifecycle Events + +**mcp.server.started** +- Emitted after successful spawn and handshake completion +- Includes: server_id, process_id, spawn_duration_ms, tool_count +- Provides immediate visibility into new MCP server availability + +**mcp.server.crashed** +- Emitted on unexpected process exit with non-zero code +- Includes: exit_code, signal, uptime_seconds, crash_count, will_restart +- Enables real-time alerting for process failures + +**mcp.server.restarted** +- Emitted after successful automatic restart +- Includes: old_process_id, new_process_id, restart_reason, attempt_number +- Tracks restart attempts for reliability monitoring + +**mcp.server.permanently_failed** +- Emitted when restart limit (3 attempts) is exceeded +- Includes: total_crashes, last_error, failed_at timestamp +- Critical alert requiring manual intervention + +**Event vs Internal Events:** +- ProcessManager internal events (processSpawned, processTerminated, etc.) are for satellite-internal coordination +- Event System events (mcp.server.started, etc.) are sent to Backend for external visibility +- Both work together: Internal events trigger state changes, Event System events provide audit trail + +For complete event system documentation and all event types, see [Event System](/development/satellite/event-system). + ## Team Isolation ### Installation Name Format From a690d7da7d78eba36c903b0f06ea97058c1ef3fe Mon Sep 17 00:00:00 2001 From: Lasim Date: Mon, 13 Oct 2025 22:55:39 +0200 Subject: [PATCH 05/11] chore: migrate to mintlify party 1 --- ...loy_preview.yml => __deploy_preview.yml__} | 0 .../{deploy_prod.yml => __deploy_prod.yml__} | 0 .github/workflows/_deploy_prod.yml_ | 27 - .github/workflows/ci.yml | 5 +- _DEPRECATED/gateway/api.mdx | 146 ---- _DEPRECATED/gateway/caching-system.mdx | 219 ------ _DEPRECATED/gateway/device-management.mdx | 178 ----- _DEPRECATED/gateway/enterprise-management.mdx | 303 -------- _DEPRECATED/gateway/index.mdx | 217 ------ _DEPRECATED/gateway/mcp.mdx | 165 ----- _DEPRECATED/gateway/meta.json | 9 - _DEPRECATED/gateway/oauth.mdx | 367 ---------- _DEPRECATED/gateway/process-management.mdx | 227 ------ _DEPRECATED/gateway/security.mdx | 374 ---------- _DEPRECATED/gateway/session-management.mdx | 320 -------- _DEPRECATED/gateway/sse-transport.mdx | 219 ------ _DEPRECATED/gateway/structure.mdx | 134 ---- _DEPRECATED/gateway/teams.mdx | 140 ---- _DEPRECATED/gateway/tech-stack.mdx | 264 ------- _DEPRECATED/gateway/testing.mdx | 110 --- app/[[...slug]]/page.tsx | 129 ---- app/global.css | 70 -- app/layout.config.tsx | 42 -- app/layout.tsx | 52 -- app/sitemap.ts | 19 - lib/components/DeployStackLogo.tsx | 14 - lib/debug-source.ts | 26 - lib/h1-extractor.ts | 60 -- lib/seo-utils.ts | 128 ---- lib/source.ts | 77 -- lib/structured-data.ts | 178 ----- package-lock.json | 689 +++++------------- package.json | 4 +- source.config.ts | 2 +- 34 files changed, 185 insertions(+), 4729 deletions(-) rename .github/workflows/{deploy_preview.yml => __deploy_preview.yml__} (100%) rename .github/workflows/{deploy_prod.yml => __deploy_prod.yml__} (100%) delete mode 100644 .github/workflows/_deploy_prod.yml_ delete mode 100644 _DEPRECATED/gateway/api.mdx delete mode 100644 _DEPRECATED/gateway/caching-system.mdx delete mode 100644 _DEPRECATED/gateway/device-management.mdx delete mode 100644 _DEPRECATED/gateway/enterprise-management.mdx delete mode 100644 _DEPRECATED/gateway/index.mdx delete mode 100644 _DEPRECATED/gateway/mcp.mdx delete mode 100644 _DEPRECATED/gateway/meta.json delete mode 100644 _DEPRECATED/gateway/oauth.mdx delete mode 100644 _DEPRECATED/gateway/process-management.mdx delete mode 100644 _DEPRECATED/gateway/security.mdx delete mode 100644 _DEPRECATED/gateway/session-management.mdx delete mode 100644 _DEPRECATED/gateway/sse-transport.mdx delete mode 100644 _DEPRECATED/gateway/structure.mdx delete mode 100644 _DEPRECATED/gateway/teams.mdx delete mode 100644 _DEPRECATED/gateway/tech-stack.mdx delete mode 100644 _DEPRECATED/gateway/testing.mdx delete mode 100644 app/[[...slug]]/page.tsx delete mode 100644 app/global.css delete mode 100644 app/layout.config.tsx delete mode 100644 app/layout.tsx delete mode 100644 app/sitemap.ts delete mode 100644 lib/components/DeployStackLogo.tsx delete mode 100644 lib/debug-source.ts delete mode 100644 lib/h1-extractor.ts delete mode 100644 lib/seo-utils.ts delete mode 100644 lib/source.ts delete mode 100644 lib/structured-data.ts diff --git a/.github/workflows/deploy_preview.yml b/.github/workflows/__deploy_preview.yml__ similarity index 100% rename from .github/workflows/deploy_preview.yml rename to .github/workflows/__deploy_preview.yml__ diff --git a/.github/workflows/deploy_prod.yml b/.github/workflows/__deploy_prod.yml__ similarity index 100% rename from .github/workflows/deploy_prod.yml rename to .github/workflows/__deploy_prod.yml__ diff --git a/.github/workflows/_deploy_prod.yml_ b/.github/workflows/_deploy_prod.yml_ deleted file mode 100644 index f9a2a9c..0000000 --- a/.github/workflows/_deploy_prod.yml_ +++ /dev/null @@ -1,27 +0,0 @@ -name: Release to CloudFlare Prod - -on: - push: - branches: - - prod - -permissions: - contents: write - issues: write - pull-requests: write - -jobs: - - deploy_prod: - runs-on: ubuntu-latest - permissions: - contents: read - deployments: write - name: Deploy to DeployStack.io - environment: - name: 'Production' - url: https://deploystack.io/docs - steps: - - name: Checkout - uses: actions/checkout@v4 - - run: echo "Executing webhook to deploy" \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d110cd..65cefce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,10 +34,7 @@ jobs: run: npm run lint:md - name: Run Lint Links - run: npm run lint:links - - - name: Run Lint Links - run: npm run build + run: npm run lint:links release: name: Run Release diff --git a/_DEPRECATED/gateway/api.mdx b/_DEPRECATED/gateway/api.mdx deleted file mode 100644 index 719b609..0000000 --- a/_DEPRECATED/gateway/api.mdx +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Gateway API Communication -description: Backend communication patterns and URL management for CLI commands -sidebar: API -icon: Globe ---- - -# Gateway API Communication - -The DeployStack Gateway CLI manages backend communication automatically through stored configuration and credential management. This guide covers how CLI commands interact with the backend and manage different environments. - -## Backend URL Management - -### Automatic URL Storage - -When users authenticate with the gateway, the backend URL is automatically stored alongside their credentials. This eliminates the need to specify the backend URL for every command after initial login. - -**Storage Location:** -- **Primary**: macOS Keychain, Windows Credential Manager, or Linux Secret Service -- **Fallback**: Encrypted file at `~/.deploystack/credentials.enc` - -The backend URL is stored as part of the `StoredCredentials` object and persists across CLI sessions. - -### URL Resolution Priority - -CLI commands resolve the backend URL using this priority order: - -1. **Command-line override** - `--url` flag when provided -2. **Stored URL** - URL saved during authentication -3. **Default fallback** - `https://cloud.deploystack.io` - -This approach supports both development workflows with local backends and production usage seamlessly. - -### Environment Detection - -The gateway automatically adapts behavior based on the backend URL: - -**Production Mode** (`https://cloud.deploystack.io`): -- Strict HTTPS enforcement -- Full SSL certificate validation -- Standard error messages - -**Development Mode** (localhost or custom URLs): -- HTTP connections allowed for localhost -- Development-specific error messages -- Additional debugging context - -## Command Implementation Patterns - -### Authentication Check - -All API-dependent commands should verify authentication before making requests. The credential storage handles token validation and expiration checking automatically. - -### Backend URL Usage - -Commands should retrieve stored credentials and use the embedded backend URL rather than requiring URL parameters. The URL resolution pattern ensures consistency across all commands. - -### Error Handling - -Different backend environments may return different error formats. Commands should handle both production and development error responses gracefully. - -## API Client Configuration - -### Credential Integration - -The API client accepts stored credentials and automatically extracts the appropriate backend URL. No additional URL configuration is required when credentials contain the backend information. - -### Request Headers - -All authenticated requests include: -- Bearer token authentication -- User-Agent identification -- Content-Type specification - -### Timeout Handling - -Network operations include appropriate timeouts with different values for various operation types: -- OAuth callback operations -- API requests -- Token refresh operations - -## Development Workflow - -### Local Backend Testing - -Developers working with local backends can authenticate once and have all commands automatically use the development server: - -The authentication flow stores the development URL, and subsequent commands use it automatically without additional configuration. - -### URL Override Capability - -Commands maintain `--url` override options for testing different backends or switching environments temporarily without re-authentication. - -### Environment Switching - -To switch between environments, users can either: -- Re-authenticate with a different backend URL -- Use command-line URL overrides for temporary testing - -## Security Considerations - -### URL Validation - -Backend URLs are validated during authentication to ensure they meet security requirements for the target environment. - -### Credential Isolation - -Each backend URL maintains separate credential storage, preventing credential leakage between development and production environments. - -### HTTPS Enforcement - -Production environments enforce HTTPS communication, while development environments allow HTTP for localhost testing. - -## Error Response Handling - -### Network Errors - -Commands should provide helpful error messages that include the backend URL being used, especially for development environments where connectivity issues are common. - -### Authentication Errors - -Token expiration and invalid token errors should guide users to re-authenticate, preserving their backend URL preference. - -### Backend-Specific Errors - -Different backend versions or configurations may return varying error formats. Commands should handle these gracefully and provide consistent user experience. - -## Integration Guidelines - -### New Command Development - -When developing new CLI commands that interact with the backend: - -1. Use the credential storage system for authentication -2. Extract backend URL from stored credentials -3. Implement URL override options for flexibility -4. Handle environment-specific error cases -5. Provide clear error messages with backend context - -### API Client Usage - -The DeployStack API client handles most backend communication complexity automatically. Commands should focus on business logic rather than HTTP details. - -### Testing Considerations - -Test commands against both production and development backends to ensure consistent behavior across environments. The URL storage system supports this testing workflow naturally. diff --git a/_DEPRECATED/gateway/caching-system.mdx b/_DEPRECATED/gateway/caching-system.mdx deleted file mode 100644 index bb5da8e..0000000 --- a/_DEPRECATED/gateway/caching-system.mdx +++ /dev/null @@ -1,219 +0,0 @@ ---- -title: Gateway Caching System -description: Team-aware tool caching architecture that enables fast gateway startup and automatic tool discovery across MCP servers -sidebar: Caching System -icon: Database ---- - -import { Card, Cards } from 'fumadocs-ui/components/card'; -import { Zap, Users, RefreshCw, Shield, Clock, HardDrive } from 'lucide-react'; - -# Gateway Caching System - -The DeployStack Gateway implements a sophisticated team-aware caching system that dramatically improves performance by pre-discovering and caching tools from MCP servers. This enables instant gateway startup and seamless tool availability for development teams. - -## Architecture Overview - -The caching system operates on a **cache-as-manifest philosophy** where tools are proactively discovered and stored locally, serving as both a performance optimization and a configuration manifest that defines what should be running versus what is actually running in the persistent background process model. - -## Core Concepts - - - } - title="Fast Gateway Startup" - > - Cached tools enable instant gateway startup without waiting for MCP server discovery - - - } - title="Team-Aware Isolation" - > - Each team's tools are cached separately with complete isolation and security boundaries - - - } - title="Automatic Discovery" - > - Tools are automatically discovered and cached when switching teams or refreshing configurations - - - } - title="Secure Storage" - > - Cache files are stored securely with team-specific access controls and encryption - - - } - title="Intelligent Invalidation" - > - Cache is automatically invalidated based on configuration changes and time-based policies - - - } - title="Fallback Mechanisms" - > - Graceful fallback to cached data when live discovery fails or servers are unavailable - - - -## Cache Architecture - -### Storage Structure -The caching system uses a hierarchical file-based storage approach: - -- **Base Directory**: `~/.deploystack/cache/` -- **Team Isolation**: `teams/{teamId}/` -- **Cache Files**: `tools-cache.json` per team - -This structure ensures complete isolation between teams while providing fast local access to cached tool information. - -### Cache Content -Each team's cache contains: - -- **Tool Definitions**: Complete tool schemas with input parameters and descriptions -- **Server Metadata**: Information about which MCP server provides each tool -- **Namespaced Names**: Tools are namespaced as `serverName-toolName` for conflict resolution -- **Discovery Timestamps**: When each tool was last discovered and validated -- **Configuration Hashes**: Checksums to detect when server configurations change - -## Tool Discovery Workflow - -### Automatic Discovery Triggers -Tool discovery is automatically triggered during: - -- **Team Switching**: When developers switch to a different team context -- **Configuration Refresh**: When MCP server configurations are updated from the cloud -- **Manual Refresh**: When developers explicitly request tool discovery -- **Cache Invalidation**: When cached data becomes stale or invalid - -### Discovery Process -The discovery workflow follows these steps: - -1. **Server Enumeration**: Identify all MCP servers configured for the team -2. **Process Communication**: Connect to already-running MCP server processes as described in [Gateway Process Management](/development/gateway/process-management) -3. **Tool Interrogation**: Query each running server for its available tools using MCP protocol -4. **Schema Extraction**: Extract complete tool schemas including parameters and descriptions -5. **Namespacing**: Apply server-specific namespacing to prevent tool name conflicts -6. **Cache Storage**: Store discovered tools in the team-specific cache file - -**Note**: In the persistent background process model, tool discovery communicates with already-running MCP servers rather than spawning processes specifically for discovery. - -### Centralized Management -All tool discovery operations are managed through a centralized `ToolDiscoveryManager` that: - -- **Eliminates Code Duplication**: Single source of truth for all discovery logic -- **Provides Consistent Behavior**: Uniform discovery behavior across all Gateway components -- **Handles Error Recovery**: Robust error handling with fallback mechanisms -- **Manages Progress Feedback**: Consistent user feedback during discovery operations - -## Cache Invalidation Strategy - -### Time-Based Invalidation -Cache entries are automatically invalidated based on: - -- **Maximum Age**: Default 24-hour time-to-live for cached tool information -- **Configuration Changes**: Immediate invalidation when server configurations change -- **Team Context Changes**: Cache clearing when switching between teams - -### Configuration-Based Invalidation -The system detects configuration changes through: - -- **Server Configuration Hashing**: Checksums of server spawn commands and environment variables -- **Team Membership Changes**: Detection of team member additions or removals -- **Permission Updates**: Changes to team-based access policies - -### Manual Invalidation -Developers and administrators can manually invalidate cache through: - -- **CLI Commands**: Explicit cache clearing and refresh commands -- **Team Switching**: Automatic cache refresh when switching team contexts -- **Configuration Updates**: Cache refresh when updating MCP server configurations - -## Performance Optimization - -### Cache-First Strategy -The Gateway prioritizes cached data for optimal performance: - -- **Instant Tool Exposure**: Cached tools are immediately available to MCP clients -- **Background Refresh**: Cache updates happen asynchronously without blocking operations -- **Predictive Loading**: Frequently-used tools are kept warm in cache -- **Lazy Discovery**: New servers are discovered on-demand when first accessed - -### Fallback Mechanisms -When live discovery fails, the system provides graceful degradation: - -- **Cached Tool Fallback**: Use previously cached tools when servers are unavailable -- **Partial Discovery**: Continue with available tools even if some servers fail -- **Error State Caching**: Cache error states to avoid repeated failed discovery attempts -- **Recovery Strategies**: Automatic retry with exponential backoff for failed discoveries - -## Team Isolation and Security - -### Access Control -Each team's cache is completely isolated through: - -- **Directory Separation**: Team-specific cache directories prevent cross-team access -- **File Permissions**: Operating system-level permissions restrict cache file access -- **Encryption**: Sensitive cache data is encrypted using team-specific keys -- **Audit Logging**: All cache operations are logged for security and compliance - -### Data Privacy -The caching system ensures data privacy by: - -- **Local Storage Only**: Cache files are stored locally and never transmitted -- **Credential Exclusion**: No sensitive credentials are stored in cache files -- **Metadata Only**: Only tool schemas and metadata are cached, not actual data -- **Automatic Cleanup**: Cache files are automatically cleaned up when teams are removed - -## Integration Points - -The caching system integrates seamlessly with other Gateway components: - -- **[MCP Configuration Management](/development/gateway/mcp)**: Uses team configurations to determine which servers to discover -- **[Gateway Process Management](/development/gateway/process-management)**: Coordinates with process spawning for tool discovery -- **[Gateway Project Structure](/development/gateway/structure)**: Implements the centralized architecture through the utils layer -- **HTTP Proxy Server**: Provides cached tool information for immediate client responses - -## Cache Management Operations - -### Developer Commands -The Gateway provides several commands for cache management: - -- **Status Checking**: View current cache status and tool counts -- **Manual Refresh**: Force refresh of cached tools from all servers -- **Cache Clearing**: Remove cached data for troubleshooting -- **Discovery Testing**: Validate tool discovery for specific servers - -### Administrative Operations -Enterprise administrators can manage caching through: - -- **Team-Wide Refresh**: Refresh cache for all team members -- **Policy Enforcement**: Apply caching policies across teams -- **Usage Analytics**: Monitor cache hit rates and discovery patterns -- **Troubleshooting**: Diagnose cache-related issues and performance problems - -## Monitoring and Observability - -### Cache Metrics -The system tracks comprehensive caching metrics: - -- **Cache Hit Rates**: Percentage of requests served from cache vs. live discovery -- **Discovery Success Rates**: Success/failure rates for tool discovery operations -- **Cache Size**: Storage usage and tool counts per team -- **Refresh Frequency**: How often cache is refreshed and invalidated - -### Performance Indicators -Key performance indicators include: - -- **Gateway Startup Time**: Time from start to tool availability -- **Tool Discovery Duration**: Time required to discover tools from each server -- **Cache Effectiveness**: Reduction in discovery time due to caching -- **Error Recovery Time**: Time to recover from failed discovery operations - -This caching system ensures that the DeployStack Gateway provides instant tool availability while maintaining the security, isolation, and performance requirements of enterprise development teams. diff --git a/_DEPRECATED/gateway/device-management.mdx b/_DEPRECATED/gateway/device-management.mdx deleted file mode 100644 index 6e75ec9..0000000 --- a/_DEPRECATED/gateway/device-management.mdx +++ /dev/null @@ -1,178 +0,0 @@ ---- -title: Device Management Architecture -description: Technical implementation of device detection, caching, and management in the DeployStack Gateway CLI -sidebar: Device Management ---- - -# Gateway Device Management Architecture - -The DeployStack Gateway implements a sophisticated device management system that balances security, performance, and user experience. This document explains the technical architecture, design decisions, and implementation details from a developer perspective. - -## Architecture Overview - -The Gateway's device management system consists of three core components: - -**Device Detection System** -- Hardware fingerprinting for unique device identification -- System information collection for compatibility and analytics -- Lightweight signature generation for cache validation - -**Device Information Cache** -- High-performance caching to eliminate redundant device detection -- Secure storage using OS keychain with encrypted fallback -- Integrity validation and automatic cache invalidation - -**OAuth2 Integration** -- Device registration during authentication flow -- Device information included in token exchange -- No separate device management endpoints required - -## The Performance Problem We Solved - -### Original Challenge - -Before implementing device caching, every Gateway command suffered from a significant performance bottleneck: - -- **Device fingerprinting took 3+ seconds** on every command execution -- Commands like `deploystack refresh` and `deploystack mcp` felt sluggish -- Users experienced poor CLI responsiveness -- System resources were wasted on redundant hardware detection - -### Root Cause Analysis - -Device fingerprinting is inherently expensive because it requires: -- Network interface enumeration to collect MAC addresses -- System information queries across multiple OS APIs -- Cryptographic hashing of collected hardware data -- File system operations to gather system details - -This expensive operation was happening on **every single command** because device information is required for: -- Backend API authentication and device tracking -- Security validation and audit logging -- Configuration management and team analytics - -## Device Caching Architecture - -### Design Principles - -**Performance First** -- Cache-first architecture with graceful fallback -- 30x performance improvement (3s → 0.1s) -- Persistent cache across logout/login sessions - -**Security Without Compromise** -- Hardware signature validation for cache integrity -- Automatic invalidation on hardware changes -- Encrypted storage with integrity checksums - -**Developer Experience** -- Completely transparent to end users -- No manual cache management required -- Automatic background operation - -### Cache Storage Strategy - -We implemented a dual-storage approach for maximum reliability: - -**Primary: OS Keychain Storage** -- macOS: Keychain Services -- Windows: Credential Manager -- Linux: Secret Service API -- Benefits: Native OS security, encrypted at rest, user-scoped access - -**Fallback: Encrypted File Storage** -- AES-256-GCM encryption with derived keys -- Stored in `~/.deploystack/device-cache.enc` -- File permissions restricted to user only (0o600) -- Used when keychain access fails or is unavailable - -### Cache Validation System - -**Hardware Signature Validation** -- Lightweight hardware signature (not full fingerprint) -- Detects major hardware changes without expensive operations -- Automatically invalidates cache when hardware changes detected - -**Integrity Protection** -- SHA256 checksums with random salts prevent tampering -- Cache version tracking for schema evolution -- Automatic cleanup of corrupted or invalid cache entries - -**Time-Based Expiration** -- 30-day cache lifetime for security -- Automatic renewal during normal usage -- Configurable expiration for different deployment scenarios - -## Device Detection Implementation - -### Hardware Fingerprinting Process - -**Network Interface Collection** -- Enumerate all network interfaces -- Extract MAC addresses from physical interfaces -- Filter out virtual and temporary interfaces -- Handle cross-platform interface naming differences - -**System Information Gathering** -- Operating system type and version -- System architecture (x64, arm64, etc.) -- Hostname and system identifiers -- Node.js runtime version for compatibility - -**Fingerprint Generation** -- Combine hardware identifiers in deterministic order -- Apply cryptographic hashing (SHA256) -- Generate stable, unique device identifier -- Ensure consistency across reboots and minor system changes - -### Lightweight Hardware Signatures - -For cache validation, we use a much faster "hardware signature" instead of full fingerprinting: - -**Why Separate Signatures?** -- Full fingerprinting: 3+ seconds, comprehensive hardware analysis -- Hardware signature: \<100ms, basic system identifiers -- Signature detects major changes (new hardware, different machine) -- Signature allows minor changes (software updates, network changes) - -**Signature Components** -- Primary MAC address of main network interface -- System hostname and basic OS identifiers -- Minimal set of stable hardware characteristics -- Fast to compute, sufficient for cache validation - -## Security Architecture - -### Threat Model Considerations - -**Cache Tampering Protection** -- SHA256 checksums with random salts -- Integrity validation on every cache access -- Automatic invalidation of corrupted cache -- Secure key derivation for encryption - -**Hardware Change Detection** -- Automatic cache invalidation when hardware changes -- Prevents cache reuse on different machines -- Detects both major and minor hardware modifications -- Balances security with usability - -**Storage Security** -- OS keychain provides encrypted storage -- Fallback encryption uses industry-standard AES-256-GCM -- File permissions restrict access to user only -- No plaintext device information stored - -### Privacy Considerations - -**Minimal Data Collection** -- Only collect device information necessary for functionality -- No tracking or analytics data in device cache -- User control over device naming and identification -- Clear data retention and cleanup policies - -**Data Isolation** -- Device cache is user-scoped and isolated -- No cross-user cache sharing or access -- Secure cleanup when users are removed -- Audit trail separate from cached data diff --git a/_DEPRECATED/gateway/enterprise-management.mdx b/_DEPRECATED/gateway/enterprise-management.mdx deleted file mode 100644 index e77463c..0000000 --- a/_DEPRECATED/gateway/enterprise-management.mdx +++ /dev/null @@ -1,303 +0,0 @@ ---- -title: Enterprise MCP Management -description: How the Gateway transforms MCP servers into enterprise governance tools with toggleable controls -sidebar: Enterprise Management -icon: Building2 ---- - -import { Card, Cards } from 'fumadocs-ui/components/card'; -import { Building2, ToggleLeft, Eye, Shield } from 'lucide-react'; - -# Enterprise MCP Management - -The DeployStack Gateway transforms individual MCP servers into enterprise governance tools, presenting each server as a toggleable tool with comprehensive management capabilities for organizational control. - -## Business Context - -### The Enterprise Challenge -Traditional MCP implementations expose individual tools from multiple servers, creating a complex landscape that's difficult to govern at scale. Enterprise organizations need: - -- **Visibility**: Clear overview of which MCP servers are available and active -- **Control**: Ability to enable/disable entire MCP servers based on policy -- **Governance**: Centralized management with audit trails -- **Compliance**: Team-based access controls and usage monitoring - -### DeployStack Solution -The Gateway addresses these challenges by presenting **MCP servers as tools** rather than exposing individual server tools, enabling enterprise governance while maintaining developer productivity. - -## Architecture Overview - - - } - title="Server-as-Tool Model" - > - Each MCP server appears as a single toggleable tool with rich metadata - - - } - title="Management Actions" - > - Enable, disable, and status operations for operational control - - - } - title="Enterprise Visibility" - > - Rich descriptions and metadata from secure catalog integration - - - } - title="Policy Enforcement" - > - Team-based access controls with centralized governance - - - -## Tool Transformation - -### From Individual Tools to Server Management -**Traditional MCP Approach:** -```json -{ - "tools": [ - {"name": "brightdata__search", "description": "Search the web"}, - {"name": "brightdata__scrape", "description": "Scrape webpage content"}, - {"name": "calculator__add", "description": "Add two numbers"}, - {"name": "calculator__multiply", "description": "Multiply numbers"} - ] -} -``` - -**DeployStack Enterprise Approach:** -```json -{ - "tools": [ - { - "name": "brightdata-mcp", - "description": "brightdata-mcp MCP server - Web scraping and data collection", - "inputSchema": { - "type": "object", - "properties": { - "action": { - "type": "string", - "enum": ["enable", "disable", "status"] - } - } - } - }, - { - "name": "calculator-server", - "description": "calculator-server MCP server - Mathematical operations and calculations", - "inputSchema": { - "type": "object", - "properties": { - "action": { - "type": "string", - "enum": ["enable", "disable", "status"] - } - } - } - } - ] -} -``` - -## Management Actions - -### Enable Action -**Purpose**: Activate an MCP server for use -**Usage**: `{"action": "enable"}` - -**Process:** -1. Validates server configuration from team catalog -2. Spawns MCP server process with injected credentials -3. Establishes stdio communication channel -4. Returns operational status and process information - -**Response Example:** -```json -{ - "server": "brightdata-mcp", - "action": "enabled", - "status": "running", - "message": "brightdata-mcp MCP server has been enabled and is running" -} -``` - -### Disable Action -**Purpose**: Deactivate a running MCP server -**Usage**: `{"action": "disable"}` - -**Process:** -1. Locates running MCP server process -2. Gracefully terminates process with 5-second timeout -3. Cleans up resources and communication channels -4. Confirms successful shutdown - -**Response Example:** -```json -{ - "server": "brightdata-mcp", - "action": "disabled", - "status": "stopped", - "message": "brightdata-mcp MCP server has been disabled" -} -``` - -### Status Action (Default) -**Purpose**: Retrieve comprehensive server information -**Usage**: `{"action": "status"}` or no action parameter - -**Information Provided:** -- Current operational status (running/stopped) -- Server description from enterprise catalog -- Runtime environment details -- Performance metrics (uptime, message count, error count) -- Process health information - -**Response Example:** -```json -{ - "server": "brightdata-mcp", - "action": "status_check", - "status": "running", - "description": "Web scraping and data collection platform", - "runtime": "nodejs", - "message": "brightdata-mcp MCP server is running", - "uptime": 1847293, - "messageCount": 42, - "errorCount": 0 -} -``` - -## Enterprise Benefits - -### Centralized Governance -- **Policy Enforcement**: Administrators control which MCP servers are available per team -- **Access Control**: Team-based permissions determine server availability -- **Audit Trail**: All enable/disable actions logged for compliance -- **Resource Management**: Centralized control over computational resources - -### Developer Experience -- **Simplified Interface**: Developers see clean server names instead of complex tool hierarchies -- **Rich Metadata**: Comprehensive descriptions help developers understand capabilities -- **Operational Control**: Developers can manage server lifecycle as needed -- **Status Transparency**: Clear visibility into server health and performance - -### Operational Excellence -- **Resource Optimization**: Servers only run when needed, reducing resource consumption -- **Error Isolation**: Server-level management isolates issues to specific services -- **Performance Monitoring**: Built-in metrics for operational visibility -- **Graceful Degradation**: Individual server failures don't impact other services - -## Metadata Integration - -### Catalog-Driven Descriptions -Server descriptions are pulled from the enterprise catalog stored securely: - -```typescript -// From team configuration -const installation = teamConfig.installations.find( - inst => inst.installation_name === serverName -); - -const description = installation?.server?.description || ''; - -// Resulting tool description -const toolDescription = `${serverName} MCP server${description ? ` - ${description}` : ''}`; -``` - -### Rich Server Information -Each server tool includes: -- **Installation Name**: Clean, human-readable identifier -- **Description**: Business context from enterprise catalog -- **Runtime**: Technical environment (nodejs, python, go, etc.) -- **Team Context**: Access permissions and policies -- **Operational Metrics**: Performance and health data - -## Security and Compliance - -### Credential Management -- **Secure Injection**: Credentials injected at process spawn time -- **No Exposure**: Developers never see or handle credentials directly -- **Centralized Control**: All credentials managed through enterprise catalog -- **Audit Trail**: Credential usage tracked for compliance - -### Access Control -- **Team-Based**: Server availability determined by team membership -- **Policy-Driven**: Enterprise policies control server access -- **Role-Based**: Different permissions for different team roles -- **Centralized Management**: All access control managed through cloud control plane - -### Monitoring and Compliance -- **Usage Tracking**: All server interactions logged and monitored -- **Performance Metrics**: Operational data for capacity planning -- **Error Reporting**: Centralized error tracking and alerting -- **Compliance Reporting**: Audit trails for regulatory requirements - -## Implementation Workflow - -### Tool Discovery Flow -1. **Client Request**: Development tool calls `tools/list` -2. **Server Enumeration**: Gateway iterates through team's MCP server configurations -3. **Metadata Enrichment**: Descriptions pulled from secure catalog -4. **Tool Generation**: Each server becomes a management tool -5. **Response**: Clean list of server management tools returned - -### Tool Execution Flow -1. **Action Request**: Client calls server tool with management action -2. **Server Identification**: Gateway maps tool name to server configuration -3. **Action Processing**: Enable/disable/status action executed -4. **Process Management**: Server processes spawned/terminated as needed -5. **Response**: Operational status and metadata returned - -## Developer Workflow - -### Typical Usage Pattern -1. **Discovery**: Developer calls `tools/list` to see available MCP servers -2. **Status Check**: Calls server tool with `status` action to understand current state -3. **Activation**: Uses `enable` action to start needed MCP servers -4. **Work**: Utilizes MCP server capabilities through other tools/interfaces -5. **Cleanup**: Uses `disable` action to stop servers when done - -### VS Code Integration -In VS Code, developers see: -``` -🔧 Available Tools: -├── brightdata-mcp - brightdata-mcp MCP server - Web scraping and data collection -├── calculator-server - calculator-server MCP server - Mathematical operations -└── github-integration - github-integration MCP server - GitHub API access -``` - -Each tool can be toggled on/off with simple actions, providing enterprise governance with developer-friendly controls. - -## Developer Tool Discovery - -### CLI-Based Exploration -Before enabling MCP servers through the enterprise management interface, developers can explore available tools using the CLI tool discovery feature: - -**Command**: `deploystack mcp --tools ` - -**Purpose**: Allows developers to understand what capabilities each MCP server provides before activation, enabling informed decisions about which servers to enable for their workflow. - -**Benefits**: -- **Preview Capabilities**: See all available tools and their descriptions without starting the server -- **Parameter Understanding**: Review required and optional parameters for each tool -- **Informed Decisions**: Choose the right MCP servers based on actual tool availability -- **Development Planning**: Plan workflows around available tool capabilities - -### Integration with Enterprise Management -The CLI tool discovery complements the enterprise management approach: - -1. **Discovery Phase**: Developer uses `deploystack mcp --tools` to explore server capabilities -2. **Planning Phase**: Developer identifies which servers provide needed functionality -3. **Activation Phase**: Developer enables specific servers through enterprise management tools -4. **Utilization Phase**: Developer uses the activated servers' capabilities in their workflow - -This workflow ensures developers make informed decisions about server activation while maintaining enterprise governance and control. - -The enterprise management layer transforms complex MCP server ecosystems into manageable, governable, and developer-friendly tools that meet both organizational requirements and developer productivity needs. diff --git a/_DEPRECATED/gateway/index.mdx b/_DEPRECATED/gateway/index.mdx deleted file mode 100644 index 047ea12..0000000 --- a/_DEPRECATED/gateway/index.mdx +++ /dev/null @@ -1,217 +0,0 @@ ---- -title: Gateway Development -description: Developer documentation for the DeployStack Gateway - the local secure proxy that manages MCP servers and credentials for enterprise teams. -sidebar: Gateway -icon: Terminal ---- - -import { Card, Cards } from 'fumadocs-ui/components/card'; -import { Terminal, Code2, Settings, Shield, Zap, Users, Rocket } from 'lucide-react'; - -# DeployStack Gateway Development - -The DeployStack Gateway is the local secure proxy that connects developers to their team's MCP servers through a centralized control plane. It acts as a smart process manager and credential vault, running MCP server processes as persistent background services while enforcing access policies from the cloud. - -## Architecture Overview - -The Gateway implements a sophisticated Control Plane / Data Plane architecture with comprehensive transport support: - -- **Control Plane**: Authenticates with `cloud.deploystack.io` to download team configurations and access policies -- **Data Plane**: Manages local MCP server processes with stdio, SSE, and Streamable HTTP transport protocols -- **Security Layer**: Injects credentials securely into process environments without exposing them to developers -- **Session Management**: Handles secure SSE connections with cryptographic session IDs for VS Code compatibility -- **Transport Layer**: Supports both legacy SSE transport and modern Streamable HTTP transport for maximum client compatibility - -## Core Features - - - } - title="Triple Transport Support" - > - Supports stdio transport for CLI tools, SSE transport for VS Code compatibility, and Streamable HTTP for modern MCP clients - - - } - title="Secure Credential Injection" - > - Injects API tokens and credentials directly into process environments without developer exposure - - - } - title="Individual Tool Exposure" - > - Exposes individual MCP tools with namespacing (e.g., brightdata-search_engine) for direct use in development environments - - - } - title="Session Management" - > - Cryptographically secure session handling with automatic cleanup for persistent connections - - - } - title="Unified Proxy" - > - Single HTTP endpoint supporting multiple client types with intelligent request routing - - - } - title="Team-Based Access" - > - Enforces team-based access control policies downloaded from the cloud control plane - - - } - title="Tool Caching System" - > - Team-aware caching enables instant gateway startup and automatic tool discovery on team switching - - - -## Development Setup - -### Prerequisites - -- Node.js (v18 or higher) -- npm (v8 or higher) -- TypeScript development environment -- A DeployStack account at [cloud.deploystack.io](https://cloud.deploystack.io) - -### Local Development - -```bash -# Navigate to the gateway service -cd services/gateway - -# Install dependencies -npm install - -# Start development server -npm run dev - -# Build for production -npm run build - -# Start production build -npm start -``` - -## Key Components - -### Authentication Module -Handles secure authentication with the DeployStack cloud control plane and manages access tokens. - -### Configuration Sync -Downloads and synchronizes team MCP server configurations, including process spawn commands and environment variables. - -### Process Manager -Manages the lifecycle of MCP server processes, including: -- On-demand process spawning -- Stdio communication handling -- Process cleanup and resource management -- Environment variable injection - -### HTTP Proxy Server -Exposes multiple endpoints for different client types: -- **GET /sse**: SSE connection establishment for VS Code and legacy clients -- **POST /message**: Session-based JSON-RPC for SSE clients -- **POST /mcp**: Streamable HTTP endpoint for modern MCP clients -- **GET /health**: Health check endpoint for monitoring - -### Session Manager -Handles secure SSE connections with: -- Cryptographically secure session ID generation -- Session lifecycle management and cleanup -- Connection state tracking and validation -- Automatic timeout and resource management - -### Enterprise Management Layer -Transforms MCP servers into enterprise governance tools: -- Each MCP server appears as a toggleable tool -- Enable/disable/status actions for operational control -- Rich metadata from secure catalog integration -- Team-based access policy enforcement - -### Security Layer -Ensures credentials are handled securely: -- Encrypted storage of downloaded configurations -- Secure environment variable injection -- No credential exposure to developer environment -- Session-based authentication for persistent connections - -## Configuration Format - -The Gateway works with MCP server configurations in this format: - -```json -{ - "name": "brightdata", - "command": "npx", - "args": ["@brightdata/mcp"], - "env": { - "API_TOKEN": "secure-token-from-vault" - } -} -``` - -## Development Workflow - -1. **Authentication**: Gateway authenticates with cloud control plane -2. **Config Download**: Downloads team's MCP server configurations -3. **Persistent Process Startup**: Starts all configured MCP servers as background processes when gateway launches -4. **HTTP Server**: Starts local HTTP server with multiple endpoints immediately available: - - SSE endpoint: `localhost:9095/sse` (for VS Code and legacy clients) - - Messages endpoint: `localhost:9095/message` (for session-based JSON-RPC) - - MCP endpoint: `localhost:9095/mcp` (for modern Streamable HTTP clients) - - Health endpoint: `localhost:9095/health` (for monitoring) -5. **Request Handling**: Receives MCP requests from development tools and intelligently routes to appropriate transport -6. **Process Management**: Maintains persistent background processes as described in [Gateway Process Management](/development/gateway/process-management). -7. **Credential Injection**: Securely injects environment variables into running processes at startup -8. **Tool Routing**: Routes namespaced tool calls to persistent MCP servers via stdio transport -9. **Transport Selection**: Automatically detects client capabilities and uses appropriate transport (SSE or Streamable HTTP) - -For detailed information about the caching system, see [Gateway Caching System](/development/gateway/caching-system). - -## Language Support - -The Gateway is language-agnostic and supports MCP servers written in: - -- **Node.js**: `npx`, `node` commands -- **Python**: `python`, `pip`, `pipenv` commands -- **Go**: Compiled binary execution -- **Rust**: Compiled binary execution -- **Any Language**: Via appropriate runtime commands - -## Security Considerations - -### Credential Management -- Credentials are never written to disk in plain text -- Environment variables are injected directly into spawned processes -- No credential exposure to the developer's shell environment - -### Process Isolation -- Each MCP server runs in its own isolated process -- Process cleanup ensures no resource leaks -- Automatic process termination after idle periods - -### Network Security -- Local HTTP server only binds to localhost -- No external network exposure by default -- Secure communication with cloud control plane - -## Contributing - -The Gateway is actively under development. Key areas for contribution: - -- **Process Management**: Improving spawn/cleanup logic -- **Security**: Enhancing credential handling -- **Performance**: Optimizing stdio communication -- **Platform Support**: Adding Windows/Linux compatibility -- **Error Handling**: Robust error recovery diff --git a/_DEPRECATED/gateway/mcp.mdx b/_DEPRECATED/gateway/mcp.mdx deleted file mode 100644 index 73391e0..0000000 --- a/_DEPRECATED/gateway/mcp.mdx +++ /dev/null @@ -1,165 +0,0 @@ ---- -title: Gateway MCP Configuration Management -description: How the DeployStack Gateway CLI downloads, processes, and securely stores MCP server configurations for teams -sidebar: MCP Configuration -icon: Bot ---- - -# Gateway MCP Configuration Management - -The DeployStack Gateway CLI automatically manages MCP (Model Context Protocol) server configurations for teams, downloading installation data from the backend API and storing it securely for local process management. - -## Overview - -The Gateway implements a sophisticated MCP configuration system that: - -- **Downloads** team MCP installations from the backend API -- **Processes** raw API data into Gateway-ready server configurations -- **Stores** both raw and processed data securely using OS-level storage -- **Manages** team context switching with automatic config updates - -## API Integration - -### Legacy Team-Based Endpoint -The Gateway can fetch MCP installations from the legacy team-based endpoint: -``` -GET /api/teams/{teamId}/mcp/installations -``` - -### Modern Three-Tier Gateway Endpoint -For optimal performance and device-specific configurations, the Gateway uses the modern three-tier endpoint: -``` -GET /api/gateway/me/mcp-configurations?hardware_id={hardwareId} -``` - -This endpoint automatically merges Template + Team + User configurations and returns ready-to-use server configurations with device-specific user arguments and environment variables. For detailed information about this endpoint, see the [Backend API Documentation](/development/backend/api). - -### Response Structure -The API returns team MCP installations with this interface: -```typescript -interface MCPInstallationsResponse { - success: boolean; - data: MCPInstallation[]; -} -``` - -## Data Storage Architecture - -### Dual Storage Approach -The Gateway stores **both** raw API data and processed configurations: - -1. **Raw Installations** - Complete API response for audit and debugging -2. **Processed Server Configs** - Gateway-ready configurations for process spawning - -### Storage Interface -```typescript -interface TeamMCPConfig { - team_id: string; - team_name: string; - installations: MCPInstallation[]; // Raw API data - servers: MCPServerConfig[]; // Processed configs - last_updated: string; -} -``` - -### Secure Storage -- **Primary**: OS Keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service) -- **Fallback**: AES-256-CBC encrypted files -- **Key Format**: `${userEmail}-${teamId}` for team isolation - -## Configuration Processing - -The Gateway transforms raw API installations into executable server configurations: - -### Runtime Detection -- **Node.js**: `npx @package-name` -- **Python**: `python -m package_name` -- **Go**: Direct binary execution -- **Custom**: Uses `installation_methods` from API - -### Environment Variable Merging -1. Server default environment variables -2. User-customized overrides from `user_environment_variables` -3. Secure injection at process spawn time - -## Team Context Integration - -### Automatic Management -- **Login**: Downloads default team's MCP configuration -- **Team Switch**: Clears old config, downloads new team's config -- **Logout**: Clears all stored MCP configurations - -### Configuration Lifecycle -1. API authentication and team selection -2. MCP installations download via API -3. Data validation and filtering -4. Configuration processing and transformation -5. Secure storage with team isolation -6. Runtime access for process management - -## Developer Commands - -### Configuration Management -- `deploystack mcp --status` - Show current configuration status -- `deploystack mcp --refresh` - Force refresh from API -- `deploystack mcp --clear` - Clear stored configuration -- `deploystack mcp --test` - Run processing validation tests - -### Debug Information -The `deploystack mcp` command shows raw stored data including: -- Complete team information -- Processed server configurations -- Raw API installation data -- Environment variables (with sensitive data masking) - -## Security Considerations - -### Data Isolation -- Each team's configuration stored with unique keys -- No cross-team data access possible -- Automatic cleanup on team changes - -### Credential Protection -- Environment variables injected at runtime only -- No plain text storage of sensitive data -- OS-level keychain integration for maximum security - -## Tool Discovery and Caching - -Beyond configuration management, the Gateway implements an advanced tool discovery system that automatically identifies and caches individual tools from each MCP server. This system operates seamlessly with the configuration management to provide: - -### Automatic Discovery -- **Team Switching**: Tools are automatically discovered from all servers when switching teams -- **Configuration Updates**: Tool cache is refreshed when server configurations change -- **Manual Refresh**: Developers can explicitly refresh tools using CLI commands - -### Team-Aware Caching -- **Isolated Storage**: Each team's discovered tools are cached separately -- **Fast Startup**: Gateway starts instantly using cached tool information -- **Fallback Support**: Cached tools remain available even when servers are temporarily unavailable - -For comprehensive details about the tool discovery and caching system, see [Gateway Caching System](/development/gateway/caching-system). - -## Developer Commands - -### Configuration Management -- `deploystack mcp --status` - Show current configuration status -- `deploystack mcp --refresh` - Force refresh from API -- `deploystack mcp --clear` - Clear stored configuration - -### Tool Discovery -- `deploystack mcp --tools ` - Discover and display tools from a specific MCP server (requires running gateway) -- `deploystack teams --switch ` - Switch teams with automatic tool discovery - -**Note**: The `--tools` command only works when the gateway is running (`deploystack start`), as it communicates with already-running MCP server processes rather than spawning them on-demand. - -## Integration Points - -The stored MCP configurations are consumed by: - -- **Process Manager** - Spawns MCP server processes using stored configs as described in [Process Management](/development/gateway/process-management) -- **HTTP Proxy** - Routes requests to appropriate MCP servers using cached tool information -- **Environment Injection** - Securely provides credentials to spawned processes -- **Tool Discovery System** - Uses configurations to discover and cache available tools as detailed in [Gateway Caching System](/development/gateway/caching-system) - -This system ensures that the Gateway has immediate access to team-specific MCP server configurations while maintaining security and team isolation throughout the development workflow. diff --git a/_DEPRECATED/gateway/meta.json b/_DEPRECATED/gateway/meta.json deleted file mode 100644 index f72ea6f..0000000 --- a/_DEPRECATED/gateway/meta.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "title": "Gateway Development", - "description": "Documentation for DeployStack Gateway Development", - "icon": "Plug", - "root": false, - "pages": [ - "..." - ] -} diff --git a/_DEPRECATED/gateway/oauth.mdx b/_DEPRECATED/gateway/oauth.mdx deleted file mode 100644 index c610dcc..0000000 --- a/_DEPRECATED/gateway/oauth.mdx +++ /dev/null @@ -1,367 +0,0 @@ ---- -title: Gateway OAuth Implementation -description: OAuth2 client implementation for CLI authentication with DeployStack backend -sidebar: OAuth -icon: Shield ---- - -# Gateway OAuth Implementation - -The DeployStack Gateway implements an OAuth2 client for secure CLI authentication with the DeployStack backend. This enables users to authenticate via their browser and use the CLI with proper access tokens. - -## Architecture Overview - -The gateway acts as an OAuth2 client implementing the **Authorization Code flow with PKCE** (Proof Key for Code Exchange) for enhanced security. The implementation consists of: - -- **OAuth2 Client** - Handles the complete authorization flow -- **Callback Server** - Temporary HTTP server for receiving authorization codes -- **API Client** - Makes authenticated requests to backend APIs -- **Credential Storage** - Secure token storage and retrieval - -## OAuth2 Flow Process - -### 1. Authorization Request - -When a user runs the login command, the CLI: - -- Generates a cryptographically secure PKCE code verifier (128 random bytes) -- Creates a SHA256 code challenge from the verifier -- Generates a random state parameter for CSRF protection -- Builds the authorization URL with all required OAuth2 parameters -- Opens the user's default browser to the authorization endpoint -- Starts a temporary callback server on localhost port 8976 - -The authorization URL includes: -- `response_type=code` for authorization code flow -- `client_id=deploystack-gateway-cli` for client identification -- `redirect_uri=http://localhost:8976/oauth/callback` for callback handling -- Requested scopes (see [OAuth Scope Management](#oauth-scope-management) below) -- PKCE parameters: `code_challenge` and `code_challenge_method=S256` -- Random `state` parameter for security - -### 2. User Authorization - -The browser opens to the backend's consent page where the user: - -- Reviews the requested permissions and scopes -- Sees security warnings about CLI access -- Can approve or deny the authorization request -- Is redirected back to the CLI's callback server upon decision - -### 3. Callback Handling - -The temporary callback server: - -- Listens only on localhost for security -- Validates the callback path (`/oauth/callback`) -- Extracts the authorization code and state parameters -- Validates the state parameter matches the original request -- Displays a success or error page to the user -- Automatically shuts down after receiving the callback - -### 4. Token Exchange with Device Registration - -After receiving the authorization code, the CLI: - -- Detects device information (hostname, OS, hardware fingerprint) -- Exchanges the code for access and refresh tokens -- Includes the PKCE code verifier for verification -- **Automatically registers the device** during token exchange -- Validates the token response from the backend -- Fetches user information using the new access token -- Stores credentials securely for future use - -#### Automatic Device Registration - -During the token exchange process, the gateway automatically registers the current device with the backend for security and management purposes: - -**Device Information Collected:** -- `device_name`: User-friendly name (defaults to hostname) -- `hostname`: System hostname -- `hardware_id`: Unique hardware fingerprint based on MAC addresses and system info -- `os_type`: Operating system (macOS, Windows, Linux) -- `os_version`: OS version string -- `arch`: System architecture (x64, arm64, etc.) -- `node_version`: Node.js version for compatibility tracking -- `user_agent`: CLI version and platform information - -**Security Benefits:** -- Device registration happens only during authenticated login sessions -- **No separate device registration endpoints exist** - this prevents unauthorized device registration and enhances security -- Hardware fingerprinting provides unique device identification -- Enables device management and access control in the backend -- Eliminates the need for manual device registration API calls - -**Process Flow:** -1. Gateway detects current device information using system APIs -2. Device info is included in the OAuth2 token request -3. Backend validates the token request and registers the device -4. Device information is returned in the token response -5. Gateway logs successful device registration to the user (e.g., "📱 Device registered: MacBook-Pro.local") - -**Error Handling:** -If device registration fails during token exchange: -- The OAuth2 login process continues successfully -- User authentication is not affected -- Device context may be limited for some features -- Error is logged but doesn't break the login flow - -## PKCE Security Implementation - -The gateway implements PKCE (Proof Key for Code Exchange) following RFC 7636: - -- **Code Verifier**: 128 random bytes encoded as base64url -- **Code Challenge**: SHA256 hash of the verifier, base64url encoded -- **Challenge Method**: Always uses `S256` (SHA256) -- **State Validation**: Cryptographically secure random state parameter - -PKCE provides security benefits: -- Prevents authorization code interception attacks -- No client secret required (suitable for public clients) -- Protects against malicious applications - -## Client Configuration - -The gateway is pre-registered with the backend as: - -- **Client ID**: `deploystack-gateway-cli` -- **Client Type**: Public client (no secret required) -- **Redirect URIs**: `http://localhost:8976/oauth/callback`, `http://127.0.0.1:8976/oauth/callback` -- **Allowed Scopes**: See source code at `services/gateway/src/utils/auth-config.ts` -- **PKCE**: Required with SHA256 method -- **Token Lifetime**: 1 week access tokens, 30 day refresh tokens - -## Command Integration - -### Login Command - -The login command orchestrates the complete OAuth2 flow: - -- Checks if the user is already authenticated -- Displays "already logged in" message if credentials are valid -- Initiates the OAuth2 flow if authentication is needed -- Handles browser opening and callback server management -- Stores credentials securely upon successful authentication -- Provides clear success confirmation with user email - -### Authenticated Commands - -Commands like `whoami`, `teams`, and `start` use stored credentials: - -- Check authentication status before proceeding -- Display helpful error messages if not authenticated -- Use Bearer token authentication for API requests -- Automatically refresh expired tokens when possible -- Handle token expiration gracefully - -## Error Handling - -The OAuth implementation includes comprehensive error handling: - -### Error Types - -- **TIMEOUT**: OAuth callback not received within time limit -- **ACCESS_DENIED**: User denied the authorization request -- **BROWSER_ERROR**: Failed to open browser automatically -- **NETWORK_ERROR**: Network connectivity issues -- **STORAGE_ERROR**: Failed to store credentials securely -- **TOKEN_EXPIRED**: Access token has expired -- **INVALID_TOKEN**: Token format or signature invalid -- **INVALID_GRANT**: Authorization code or refresh token invalid - -### User Guidance - -Each error type provides specific user guidance: -- Timeout errors suggest retrying the command -- Access denied errors explain the approval requirement -- Browser errors offer manual URL opening -- Network errors suggest connectivity checks -- Storage errors indicate keychain permission issues - -## Browser Integration - -The CLI provides seamless browser integration: - -- **Automatic Opening**: Uses the system's default browser -- **Cross-Platform**: Works on Windows, macOS, and Linux -- **Fallback Handling**: Displays manual URL if auto-open fails -- **User Feedback**: Clear messages about browser actions -- **Security Warnings**: Alerts for development server usage - -## Token Management - -### Token Refresh - -The gateway automatically handles token refresh: - -- Monitors token expiration with 5-minute buffer -- Attempts refresh before tokens expire -- Uses refresh tokens for seamless re-authentication -- Falls back to full re-authentication if refresh fails -- Updates stored credentials with new tokens - -### Token Validation - -Before each API request, the gateway: - -- Checks token expiration locally -- Validates token format and structure -- Handles 401 responses with automatic refresh -- Provides clear error messages for invalid tokens - -## Development vs Production - -The OAuth client adapts to different environments: - -### Development Mode -- Uses HTTP for localhost callback server -- Accepts self-signed certificates for development -- Displays security warnings for non-production servers -- Provides detailed error information for debugging - -### Production Mode -- Enforces HTTPS for all communications -- Validates SSL certificates strictly -- Uses secure callback URLs -- Limits error information exposure - -## Integration with Backend - -The gateway OAuth client integrates with the [backend OAuth2 server](/development/backend/oauth2-server): - -- **Client Registration**: Pre-registered with known client ID -- **PKCE Support**: Uses SHA256 method as required by backend -- **Scope Validation**: Requests only backend-supported scopes -- **Token Format**: Handles backend's custom JWT-like token format -- **Error Responses**: Processes standard OAuth2 error responses -- **Endpoint Discovery**: Uses standard OAuth2 endpoint paths -- **Device Registration**: Automatic device registration during token exchange - -### Device Management Integration - -The gateway's device registration integrates seamlessly with the backend's device management system: - -**Backend Integration Points:** -- **OAuth2 Token Endpoint**: Extended to accept optional `device_info` in token requests -- **Device Service**: Uses existing `DeviceService.registerOrUpdateDevice()` method -- **Database Storage**: Device information stored in the `devices` table -- **User Association**: Devices automatically linked to the authenticated user - -**Token Request Enhancement:** -The gateway includes device information in the OAuth2 token request: -```json -{ - "grant_type": "authorization_code", - "code": "authorization_code_here", - "redirect_uri": "http://localhost:8976/oauth/callback", - "client_id": "deploystack-gateway-cli", - "code_verifier": "pkce_verifier_here", - "device_info": { - "device_name": "MacBook-Pro.local", - "hostname": "MacBook-Pro.local", - "hardware_id": "a1b2c3d4e5f6789012345678901234ab", - "os_type": "macOS", - "os_version": "14.2.1", - "arch": "arm64", - "node_version": "v20.10.0", - "user_agent": "DeployStack-CLI/1.0.0 (darwin; arm64)" - } -} -``` - -**Token Response Enhancement:** -When device registration succeeds, the backend includes device information in the token response: -```json -{ - "access_token": "...", - "token_type": "Bearer", - "expires_in": 3600, - "refresh_token": "...", - "scope": "mcp:read account:read...", - "device": { - "id": "550e8400-e29b-41d4-a716-446655440000", - "device_name": "MacBook-Pro.local", - "is_active": true, - "is_trusted": true, - "created_at": "2025-08-23T10:20:30Z" - } -} -``` - -**Security Design:** -- Device registration only occurs during authenticated OAuth2 flows -- **No separate device creation endpoints exist** - this architectural decision prevents unauthorized device registration and eliminates potential security vulnerabilities -- Hardware fingerprinting ensures unique device identification across multiple login sessions -- Device information is validated using JSON schema before processing -- Gateway automatically handles device lookup using hardware fingerprints without requiring manual registration - -For comprehensive information about device management and hardware fingerprinting, see the [Device Management Documentation](/device-management). - -## Security Considerations - -The OAuth implementation follows security best practices: - -- **PKCE Required**: All authorization requests use PKCE -- **State Validation**: Prevents CSRF attacks -- **Localhost Binding**: Callback server only accepts local connections -- **Timeout Protection**: All operations have reasonable timeouts -- **Secure Storage**: Credentials stored using OS keychain -- **No Secrets**: Public client design eliminates secret management - -For detailed security implementation including credential storage, token expiration, and local file security, see the [Gateway Security Guide](/development/gateway/security). - -## OAuth Scope Management - -The gateway requests specific OAuth scopes during authentication to access backend APIs. Scope configuration must stay synchronized between the gateway and backend. - -### Current Scopes - -For the current list of supported scopes, check the source code at: -- **Gateway scopes**: `services/gateway/src/utils/auth-config.ts` in the `scopes` array -- **Backend validation**: `services/backend/src/services/oauth/authorizationService.ts` in the `validateScope()` method - -### Adding New Scopes - -When the backend adds support for a new OAuth scope, you must update the gateway configuration: - -1. **Add the scope** to the `scopes` array in `services/gateway/src/utils/auth-config.ts` -2. **Add a description** to the `SCOPE_DESCRIPTIONS` object in the same file -3. **Test the login flow** to ensure the new scope is requested and granted - -Example: -```typescript -// In services/gateway/src/utils/auth-config.ts -scopes: [ - 'mcp:read', - 'mcp:categories:read', - 'your-new-scope', // Add new scope here - // ... other scopes -], - -// And add description -export const SCOPE_DESCRIPTIONS: Record = { - 'mcp:read': 'Access your MCP server installations and configurations', - 'your-new-scope': 'Description of what this scope allows', // Add description - // ... other descriptions -}; -``` - -### Scope Synchronization - -**Critical**: The gateway and backend must have matching scope configurations: -- If backend supports a scope but gateway doesn't request it, users won't get that permission -- If gateway requests a scope but backend doesn't support it, authentication will fail - -Always coordinate scope changes between both services. - -## Testing OAuth Flow - -During development, the OAuth flow can be tested: - -1. Start the backend in development mode -2. Build the gateway CLI -3. Run the login command with development URL -4. Complete the browser authorization flow -5. Verify authentication with the whoami command - -The OAuth implementation provides a secure, user-friendly authentication experience that follows industry standards while integrating seamlessly with the DeployStack backend. diff --git a/_DEPRECATED/gateway/process-management.mdx b/_DEPRECATED/gateway/process-management.mdx deleted file mode 100644 index 37991f9..0000000 --- a/_DEPRECATED/gateway/process-management.mdx +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: Gateway Process Management -description: How the DeployStack Gateway manages MCP server processes with persistent background processes, secure credential injection, and enterprise governance -sidebar: Process Management -icon: Cpu ---- - -import { Card, Cards } from 'fumadocs-ui/components/card'; -import { Zap, Shield, Monitor, RefreshCw, AlertTriangle, Users } from 'lucide-react'; - -# Gateway Process Management - -The DeployStack Gateway implements sophisticated process management to handle MCP server lifecycles with enterprise-grade security, performance, and governance. Each MCP server runs as a persistent background process with secure credential injection and continuous availability. - -## Architecture Overview - -The Gateway's process management system operates on a **persistent background process** model, similar to Claude Desktop, where all configured MCP server processes are started when the gateway launches and run continuously until shutdown. This approach provides instant tool availability and eliminates the latency associated with process spawning during development workflows. - -## Core Concepts - - - } - title="Persistent Background Processes" - > - All configured MCP servers start with the gateway and run continuously, providing instant tool availability - - - } - title="Secure Credential Injection" - > - API tokens and credentials are injected directly into process environments without developer exposure - - - } - title="Runtime State Management" - > - Comprehensive tracking of running processes with health monitoring and team isolation - - - } - title="Graceful Lifecycle Management" - > - Proper MCP shutdown sequence following protocol specifications for clean termination - - - } - title="State Comparison & Recovery" - > - Compares expected vs actual running processes with automatic recovery mechanisms - - - } - title="Team Context Switching" - > - Seamless switching between teams with complete process lifecycle management - - - -## Selective Restart Capability - -The Gateway supports **selective restart** functionality, allowing individual MCP servers to be managed without requiring a full gateway restart. This feature dramatically improves configuration update performance and eliminates downtime for unchanged servers. - -### Key Features - -- **Individual Server Control**: Add, remove, or restart specific MCP servers via HTTP API -- **Change Detection**: Automatically detects added, removed, and modified server configurations -- **Fallback Safety**: Falls back to full restart if selective operations fail -- **Zero Downtime**: Unchanged servers continue running during configuration updates - -### API Endpoints - -The Gateway exposes HTTP endpoints for selective server management: - -- `POST /api/mcp/servers` - Add new MCP servers to running gateway -- `DELETE /api/mcp/servers/:serverName` - Remove specific servers -- `POST /api/mcp/servers/:serverName/restart` - Restart individual servers - -### Implementation Services - -- **Selective Restart Service**: Handles HTTP communication with running gateway processes -- **Configuration Change Service**: Detects configuration differences and orchestrates selective operations -- **Process Manager Integration**: Provides individual server lifecycle control capabilities - -## Process Lifecycle - -### Gateway Startup Phase -When the Gateway starts (`deploystack start`), all configured MCP servers for the selected team are launched simultaneously: - -- **Team Configuration Loading**: Downloads and validates team MCP server configurations -- **Bulk Process Spawning**: Starts all configured MCP servers as background processes -- **Runtime Detection**: Automatic detection of Node.js, Python, Go, or custom runtime requirements -- **Environment Preparation**: Secure injection of team-specific credentials and configuration -- **MCP Protocol Handshake**: Establishes JSON-RPC communication with 30-second timeout for package downloads -- **Runtime State Registration**: Adds all successfully started processes to the runtime state manager - -### Continuous Operation Phase -During normal operation, all MCP servers run continuously in the background: - -- **Persistent Availability**: All tools are immediately available without process spawning delays -- **Request Routing**: Direct routing of tool calls to already-running MCP server processes -- **Health Monitoring**: Continuous monitoring of process status, uptime, and responsiveness -- **State Comparison**: Regular comparison of expected vs actual running processes -- **Error Logging**: Proper distinction between informational stderr output and actual errors - -### Team Context Switching -When switching teams, the Gateway performs complete process lifecycle management: - -- **Graceful Shutdown**: Stops all MCP servers for the current team following MCP protocol -- **Configuration Refresh**: Downloads new team's MCP server configurations -- **Process Restart**: Starts all MCP servers for the new team -- **State Synchronization**: Updates runtime state to reflect the new team context - -### Gateway Shutdown Phase -When the Gateway stops (`deploystack stop` or Ctrl+C), processes are terminated gracefully: - -- **MCP Protocol Compliance**: Follows proper MCP shutdown sequence (close stdin → wait → SIGTERM → wait → SIGKILL) -- **Parallel Shutdown**: All processes are stopped concurrently for faster shutdown -- **Resource Cleanup**: Ensures all file descriptors and system resources are properly released -- **State Cleanup**: Clears runtime state and removes process tracking information - -## Security Model - -### Credential Isolation -The Gateway implements a **zero-exposure credential model** where: - -- Credentials are never written to disk in plain text -- Environment variables are injected directly into spawned processes -- No credential access from the developer's shell environment -- Automatic credential rotation when team configurations change - -### Process Isolation -Each MCP server runs in complete isolation with: - -- **Separate Process Space**: No shared memory or resources between MCP servers -- **Independent Environments**: Each process has its own environment variable set -- **Resource Boundaries**: CPU and memory limits to prevent resource exhaustion -- **Network Isolation**: Controlled network access based on server requirements - -## Enterprise Governance - -### Tool-Level Management -The Gateway transforms traditional MCP servers into enterprise-manageable tools by presenting each server as: - -- **Enable/Disable Controls**: Administrators can control which MCP servers are available -- **Status Monitoring**: Real-time visibility into process health and performance -- **Usage Analytics**: Tracking of tool usage patterns and resource consumption -- **Access Policies**: Team-based access control enforcement - -### Operational Controls -Enterprise administrators gain operational control through: - -- **Centralized Configuration**: All MCP server configurations managed through the cloud control plane -- **Policy Enforcement**: Automatic enforcement of team-based access policies -- **Audit Logging**: Comprehensive logging of all process management activities -- **Resource Management**: Monitoring and control of system resource usage - -## Performance Optimization - -### Resource Efficiency -The Gateway optimizes resource usage through the persistent background process model: - -- **Continuous Operation**: All processes run continuously, eliminating spawn/cleanup overhead -- **Shared Process Pool**: Multiple tool requests reuse the same persistent MCP server processes -- **Memory Stability**: Consistent memory usage patterns with no spawn/cleanup cycles -- **CPU Optimization**: Direct request routing to running processes minimizes CPU overhead - -### Response Time Optimization -Instant response times are achieved through: - -- **Zero Latency**: Tools are immediately available from already-running processes -- **Parallel Processing**: Concurrent handling of multiple tool requests across persistent processes -- **Persistent Connections**: Maintained stdio connections eliminate connection establishment overhead -- **Cache-as-Manifest**: Cached tool information serves as configuration manifest for instant startup - -## Error Handling and Recovery - -### Failure Detection -The Gateway monitors for various failure scenarios: - -- **Process Crashes**: Automatic detection of terminated or crashed processes -- **Communication Failures**: Identification of broken stdio communication channels -- **Timeout Conditions**: Detection of unresponsive processes -- **Resource Exhaustion**: Monitoring for memory or CPU limit violations - -### Recovery Strategies -When failures are detected, the Gateway implements: - -- **Automatic Restart**: Immediate restart of crashed processes with exponential backoff -- **Fallback Mechanisms**: Graceful degradation when processes are unavailable -- **Error Reporting**: Detailed error reporting to developers and administrators -- **Circuit Breaker**: Temporary disabling of problematic processes to prevent cascading failures - -## Integration Points - -The process management system integrates with other Gateway components: - -- **[MCP Configuration Management](/development/gateway/mcp)**: Uses team configurations to determine spawn parameters -- **[Caching System](/development/gateway/caching-system)**: Coordinates with tool discovery and caching mechanisms -- **[Project Structure](/development/gateway/structure)**: Implements the architecture defined in the core modules -- **HTTP Proxy Server**: Provides process information for request routing decisions - -## Monitoring and Observability - -### Process Metrics -The Gateway tracks comprehensive metrics including: - -- **Process Count**: Number of active MCP server processes -- **Resource Usage**: CPU, memory, and file descriptor consumption -- **Request Throughput**: Number of requests processed per process -- **Error Rates**: Frequency and types of process errors -- **Response Times**: Latency metrics for tool requests - -### Health Indicators -Key health indicators monitored include: - -- **Process Responsiveness**: Time to respond to health check requests -- **Memory Growth**: Detection of memory leaks or excessive memory usage -- **Error Patterns**: Identification of recurring error conditions -- **Resource Limits**: Proximity to configured resource boundaries - -This process management system ensures that the DeployStack Gateway can reliably handle enterprise workloads while maintaining the security, performance, and governance requirements of modern development teams. diff --git a/_DEPRECATED/gateway/security.mdx b/_DEPRECATED/gateway/security.mdx deleted file mode 100644 index 2148544..0000000 --- a/_DEPRECATED/gateway/security.mdx +++ /dev/null @@ -1,374 +0,0 @@ ---- -title: Gateway Security -description: Security implementation and best practices for the DeployStack Gateway CLI -sidebar: Security -icon: Lock ---- - -# Gateway Security - -The DeployStack Gateway implements multiple layers of security to protect user credentials, ensure secure communication, and maintain system integrity. This document covers the security architecture and implementation details. - -## Credential Storage Security - -### OS Keychain Integration - -The gateway uses the **Zowe Secrets SDK** for cross-platform secure credential storage, providing native integration with each operating system's secure storage mechanism: - -**Platform-specific storage:** -- **macOS**: Keychain Access using the Security.framework -- **Windows**: Credential Manager using CredWrite/CredRead APIs -- **Linux**: Secret Service API using libsecret - -The keychain integration stores credentials with the service name `deploystack-gateway` and uses the user's email address as the account identifier. This approach leverages the operating system's built-in security features including: - -- Hardware-backed encryption where available -- User authentication requirements for access -- Automatic credential isolation between users -- Integration with system security policies - -### Encrypted File Fallback - -When OS keychain access is unavailable or fails, credentials are stored in encrypted files as a secure fallback: - -**Encryption Details:** -- **Algorithm**: AES-256-CBC encryption -- **Key Derivation**: Fixed key with padding (development approach) -- **Initialization Vector**: Random 16-byte IV generated per encryption -- **Storage Format**: `IV:encrypted_data` in hexadecimal encoding - -**File Security:** -- **Location**: `~/.deploystack/credentials.enc` -- **Permissions**: `0o600` (owner read/write only) -- **Directory Permissions**: `0o700` (owner access only) - -### Account Management - -The gateway maintains a secure account tracking system: - -**Account List:** -- **Location**: `~/.deploystack/accounts.json` -- **Content**: Array of user email addresses (no sensitive data) -- **Purpose**: Enables credential discovery from keychain storage -- **Format**: JSON array with most recent accounts first - -**Security Considerations:** -- Contains only email addresses, no tokens or passwords -- Used for keychain credential lookup -- Automatically maintained during login/logout operations -- Cleaned up when credentials are cleared - -## Token Security - -### Access Token Format - -Access tokens use a custom JWT-like format designed for the DeployStack backend: - -**Token Structure:** -``` -. -``` - -**Components:** -- **Random Token**: 512-bit cryptographically secure random value -- **Payload**: Base64-encoded JSON containing user info, scopes, and expiration -- **Database Storage**: Argon2 hash of the complete token for verification - -**Security Features:** -- No client-side signature verification required -- Embedded user information reduces database lookups -- Cryptographically secure random component -- Server-side hash verification prevents tampering - -### Token Expiration - -**Access Tokens**: 1 week (604,800 seconds) -- Provides reasonable balance between security and usability -- Reduces frequent re-authentication during development -- Long enough for typical CLI usage patterns -- Short enough to limit exposure if compromised - -**Refresh Tokens**: 30 days -- Enables seamless token renewal -- Longer lifetime for better user experience -- Stored securely alongside access tokens -- Automatically used for token refresh - -### Token Validation - -The gateway implements comprehensive token validation: - -**Local Validation:** -- Checks token expiration with 5-minute buffer -- Validates token format and structure -- Prevents unnecessary API calls with expired tokens - -**Server Validation:** -- Backend verifies token hash using Argon2 -- Checks database expiration timestamps -- Validates user permissions and scopes - -## Network Security - -### HTTPS Enforcement - -The gateway enforces secure communication: - -**Production Requirements:** -- All API communications must use HTTPS -- SSL certificate validation is strictly enforced -- Self-signed certificates are rejected -- Insecure HTTP connections are blocked - -**Development Flexibility:** -- Localhost connections allow HTTP for development -- Self-signed certificates accepted for local testing -- Security warnings displayed for non-production servers -- Clear distinction between development and production modes - -### Request Security - -All API requests include comprehensive security headers: - -**Standard Headers:** -- **Authorization**: Bearer token authentication -- **Content-Type**: Proper content type specification -- **User-Agent**: Identifies the CLI client and version - -**Security Measures:** -- Bearer token authentication for all authenticated requests -- Proper content type validation -- Request timeout protection -- Automatic retry logic with exponential backoff - -### Callback Server Security - -The temporary OAuth callback server implements multiple security layers: - -**Network Security:** -- **Binding**: Only accepts connections from localhost/127.0.0.1 -- **Port**: Fixed port 8976 for consistency -- **Protocol**: HTTP (acceptable for localhost) - -**Request Validation:** -- **Path Validation**: Only `/oauth/callback` path is handled -- **Parameter Validation**: Required OAuth parameters are verified -- **State Validation**: CSRF protection through state parameter - -**Lifecycle Management:** -- **Auto-cleanup**: Server automatically shuts down after callback -- **Timeout Protection**: Configurable timeout (default: 5 minutes) -- **Resource Cleanup**: Proper cleanup of server resources - -## OAuth2 Security (PKCE) - -The gateway implements PKCE (Proof Key for Code Exchange) following RFC 7636: - -### Code Verifier Generation - -**Specifications:** -- **Length**: 128 characters (96 random bytes base64url encoded) -- **Entropy**: Cryptographically secure random generation -- **Format**: Base64url encoding without padding -- **Uniqueness**: New verifier generated for each authentication - -### Code Challenge Generation - -**Process:** -- **Input**: Code verifier string -- **Hashing**: SHA256 hash of the verifier -- **Encoding**: Base64url encoding of the hash -- **Method**: Always uses `S256` (SHA256) - -### State Parameter Security - -**Generation:** -- **Length**: 32 random bytes base64url encoded -- **Purpose**: CSRF protection -- **Validation**: Strict comparison with received state -- **Storage**: Temporarily stored during OAuth flow - -**PKCE Security Benefits:** -- Prevents authorization code interception attacks -- Eliminates need for client secrets in public clients -- Provides cryptographic proof of authorization request origin -- Protects against malicious applications - -## Error Handling Security - -### Secure Error Messages - -The gateway implements secure error handling principles: - -**User-Facing Messages:** -- Generic error descriptions to avoid information disclosure -- Helpful guidance without revealing system internals -- No exposure of tokens, credentials, or sensitive data -- Clear action items for users to resolve issues - -**Error Categories:** -- **Authentication Errors**: Login and token-related issues -- **Network Errors**: Connectivity and communication problems -- **Storage Errors**: Credential storage and retrieval issues -- **Authorization Errors**: Permission and scope-related problems - -### Timeout Protection - -All network operations include timeout protection: - -**Timeout Types:** -- **OAuth Callback**: 5-minute default timeout for user authorization -- **API Requests**: Reasonable timeouts for backend communication -- **Token Refresh**: Quick timeout for refresh operations -- **Browser Opening**: Timeout for automatic browser launch - -**Security Benefits:** -- Prevents indefinite resource consumption -- Limits exposure time for temporary servers -- Provides clear failure modes -- Enables graceful error recovery - -## File System Security - -### Directory Permissions - -The gateway creates secure directories for credential storage: - -**Directory Structure:** -- **Base Directory**: `~/.deploystack/` -- **Permissions**: `0o700` (owner read/write/execute only) -- **Creation**: Automatic creation with secure permissions -- **Platform Compatibility**: Works across Windows, macOS, and Linux - -### File Permissions - -**Credential Files:** -- **Encrypted Credentials**: `0o600` (owner read/write only) -- **Account List**: `0o644` (owner write, others read - no sensitive data) -- **Temporary Files**: Secure permissions and automatic cleanup - -### Secure File Cleanup - -Credential removal includes comprehensive cleanup: - -**Cleanup Process:** -- **Keychain Removal**: Credentials removed from OS keychain -- **File Deletion**: Encrypted files securely deleted -- **Account List**: Account entries removed from tracking -- **Directory Cleanup**: Empty directories removed when appropriate - -**Security Considerations:** -- Multiple cleanup attempts for reliability -- Graceful handling of partial failures -- No sensitive data left in temporary files -- Proper error handling during cleanup - -## Development vs Production Security - -### Environment Detection - -The gateway automatically detects and adapts to different environments: - -**Development Mode Indicators:** -- URLs containing `localhost` -- Non-HTTPS protocols for local servers -- Development-specific configuration options - -**Production Mode Requirements:** -- HTTPS enforcement for all communications -- Strict SSL certificate validation -- Limited error information exposure -- Enhanced security warnings - -### Security Warnings - -The CLI provides appropriate security warnings: - -**Development Warnings:** -- Alerts when connecting to non-production servers -- Warnings about HTTP usage in development -- Reminders about development-only features - -**Production Safeguards:** -- Blocks insecure connections -- Validates server certificates -- Limits debug information exposure - -## Security Best Practices - -### 1. Credential Protection -- Never log credentials or tokens in plain text -- Use OS keychain as primary storage mechanism -- Encrypt fallback storage with strong encryption -- Restrict file permissions to owner-only access -- Implement secure credential cleanup - -### 2. Network Security -- Enforce HTTPS in production environments -- Validate SSL certificates strictly -- Use secure headers in all requests -- Implement proper request timeouts -- Handle network errors gracefully - -### 3. OAuth2 Security -- Always use PKCE for authorization code flow -- Validate state parameters to prevent CSRF attacks -- Use cryptographically secure random values -- Implement proper token refresh logic -- Handle authorization errors appropriately - -### 4. Error Handling -- Avoid exposing sensitive data in error messages -- Log detailed errors for debugging (server-side only) -- Provide helpful user guidance without revealing internals -- Implement proper timeout handling -- Use structured error codes for programmatic handling - -### 5. Process Security -- Exit cleanly after operations complete -- Clean up temporary resources properly -- Handle interruption signals gracefully -- Validate all user inputs -- Implement proper resource management - -For OAuth2 flow details and implementation specifics, see the [Gateway OAuth Guide](/development/gateway/oauth). - -## Security Auditing - -### Credential Audit - -**File System Checks:** -- Verify credential directory permissions (`~/.deploystack/`) -- Check encrypted file permissions (`credentials.enc`) -- Validate account list format (`accounts.json`) - -**Keychain Verification:** -- Check for stored credentials in OS keychain -- Verify service name and account identifiers -- Validate keychain access permissions - -### Network Security Audit - -**Connection Monitoring:** -- Monitor HTTPS usage in production -- Verify SSL certificate validation -- Check for secure header usage - -**Certificate Validation:** -- Verify SSL certificate chains -- Check certificate expiration dates -- Validate certificate authority trust - -### Security Monitoring - -**Authentication Events:** -- Monitor login success and failure rates -- Track token refresh patterns -- Identify unusual authentication behavior - -**Error Analysis:** -- Review authentication error patterns -- Monitor network connectivity issues -- Analyze credential storage problems - -The gateway's security implementation follows industry best practices and provides multiple layers of protection for user credentials and system integrity. diff --git a/_DEPRECATED/gateway/session-management.mdx b/_DEPRECATED/gateway/session-management.mdx deleted file mode 100644 index 36d2e95..0000000 --- a/_DEPRECATED/gateway/session-management.mdx +++ /dev/null @@ -1,320 +0,0 @@ ---- -title: Session Management -description: Cryptographically secure session lifecycle management for SSE and Streamable HTTP connections -sidebar: Session Management -icon: Key ---- - -import { Card, Cards } from 'fumadocs-ui/components/card'; -import { Key, Clock, Shield, Trash2 } from 'lucide-react'; - -# Session Management - -The DeployStack Gateway implements a robust session management system that provides cryptographically secure session handling for both persistent SSE connections and optional Streamable HTTP sessions while ensuring automatic cleanup and resource management. - -## Architecture Overview - -The session management system consists of multiple components working together to provide secure connections across different transport protocols: - -- **SessionManager**: Handles session lifecycle, validation, and SSE stream management -- **SSEHandler**: Manages Server-Sent Events connections and message routing -- **StreamableHTTPHandler**: Manages Streamable HTTP connections with optional session support -- **Transport Layer**: Intelligent routing between SSE and Streamable HTTP based on client capabilities - -## Core Components - - - } - title="Cryptographic Security" - > - 256-bit entropy session IDs with base64url encoding for maximum security - - - } - title="Lifecycle Management" - > - Automatic session creation, validation, activity tracking, and timeout handling - - - } - title="Connection Validation" - > - Session-bound SSE streams with comprehensive validation and error handling - - - } - title="Automatic Cleanup" - > - Resource cleanup on disconnect, timeout, or error conditions - - - -## Session ID Generation - -### Cryptographic Properties -- **Algorithm**: Node.js `crypto.randomBytes(32)` -- **Entropy**: 256 bits (32 bytes) of cryptographically secure randomness -- **Encoding**: Base64url for URL safety and compatibility -- **Format**: `L8B-xaw3HBZEftyo-JCrHoGWb_iikRZiwGfp9B71-GA` - -### Security Features -- **Unpredictability**: Cryptographically secure random number generation -- **Collision Resistance**: 2^256 possible values make collisions virtually impossible -- **URL Safety**: Base64url encoding ensures compatibility in query parameters -- **No Sequential Patterns**: Each session ID is completely independent - -### Validation Process -```typescript -private validateSessionId(sessionId: string): boolean { - if (!sessionId || typeof sessionId !== 'string') return false; - if (sessionId.length < 32) return false; - if (!/^[A-Za-z0-9_-]+$/.test(sessionId)) return false; - return true; -} -``` - -## Session Lifecycle - -### 1. Creation Phase -**Triggers**: -- SSE connection establishment via `GET /sse` -- Optional session creation for Streamable HTTP via `POST /mcp` with session headers - -**Process:** -1. Generate cryptographically secure session ID -2. Create session object with metadata -3. Associate with SSE stream (for SSE transport) or track session state (for Streamable HTTP) -4. Schedule automatic cleanup timer -5. Send endpoint event to client (SSE) or return session headers (Streamable HTTP) - -**Session Object:** -```typescript -interface SessionInfo { - id: string; - createdAt: number; - lastActivity: number; - sseStream: ServerResponse; - clientInfo?: { name: string; version: string }; - mcpInitialized: boolean; - requestCount: number; - errorCount: number; -} -``` - -### 2. Active Phase -**Duration**: Until timeout or disconnect - -**Activities:** -- **Activity Tracking**: Updated on every JSON-RPC request -- **Request Counting**: Incremented for each message processed -- **Error Tracking**: Incremented on processing failures -- **Client Info Storage**: MCP client metadata stored during initialization - -### 3. Cleanup Phase -**Triggers:** -- Client disconnect (`close` event) -- Connection error (`error` event) -- Stream finish (`finish` event) -- 30-minute inactivity timeout - -**Process:** -1. Close SSE stream if still open -2. Remove session from active sessions map -3. Log cleanup completion -4. Free associated resources - -## Connection Management - -### SSE Stream Handling -The session manager maintains direct references to SSE streams for efficient message delivery: - -```typescript -sendToSession(sessionId: string, event: { id?: string; event?: string; data: string }): boolean { - const session = this.sessions.get(sessionId); - if (!session || session.sseStream.destroyed) { - return false; - } - - try { - let sseData = ''; - if (event.id) sseData += `id: ${event.id}\n`; - if (event.event) sseData += `event: ${event.event}\n`; - sseData += `data: ${event.data}\n\n`; - - session.sseStream.write(sseData); - return true; - } catch (error) { - this.cleanupSession(sessionId); - return false; - } -} -``` - -### Connection State Tracking -- **Stream Health**: Monitors SSE stream status and handles disconnects -- **Activity Monitoring**: Tracks last activity timestamp for timeout detection -- **Error Handling**: Graceful handling of connection failures and cleanup -- **Resource Management**: Prevents memory leaks through automatic cleanup - -## Security Considerations - -### Session Security -- **Unpredictable IDs**: Impossible to guess or enumerate session IDs -- **Time-Limited**: Automatic expiration prevents indefinite access -- **Connection-Bound**: Sessions tied to specific SSE connections -- **Validation**: Comprehensive validation on every request - -### Timeout Management -- **Inactivity Timeout**: 30 minutes of inactivity triggers cleanup -- **Automatic Scheduling**: Cleanup scheduled at session creation -- **Activity Extension**: Timeout reset on each valid request -- **Resource Protection**: Prevents accumulation of stale sessions - -### Error Handling -- **Graceful Degradation**: Connection errors don't crash the system -- **Automatic Recovery**: Failed connections cleaned up automatically -- **Error Isolation**: Session errors don't affect other sessions -- **Logging**: Comprehensive error logging for debugging - -## Performance Optimization - -### Memory Management -- **Efficient Storage**: Sessions stored in Map for O(1) lookup -- **Automatic Cleanup**: Prevents memory leaks through timeout handling -- **Resource Tracking**: Monitors session count and resource usage -- **Garbage Collection**: Proper cleanup enables efficient garbage collection - -### Connection Efficiency -- **Persistent Connections**: SSE streams maintained for duration of session -- **Minimal Overhead**: Lightweight session objects with essential data only -- **Fast Lookup**: Session validation and retrieval optimized for speed -- **Batch Operations**: Efficient handling of multiple concurrent sessions - -## Monitoring and Debugging - -### Session Statistics -The session manager provides comprehensive statistics for monitoring: - -```typescript -getStatus() { - return { - activeCount: this.sessions.size, - sessions: Array.from(this.sessions.values()).map(session => ({ - id: session.id, - createdAt: session.createdAt, - lastActivity: session.lastActivity, - uptime: Date.now() - session.createdAt, - requestCount: session.requestCount, - errorCount: session.errorCount, - clientInfo: session.clientInfo, - mcpInitialized: session.mcpInitialized - })) - }; -} -``` - -### Logging and Observability -- **Session Creation**: Logged with session ID for tracking -- **Activity Updates**: Request and error counts tracked -- **Cleanup Events**: Cleanup reasons and timing logged -- **Error Conditions**: Detailed error logging for troubleshooting - -## Transport-Specific Session Handling - -### SSE Transport Sessions -SSE transport requires persistent sessions for connection management: - -- **Mandatory Sessions**: All SSE connections must have associated sessions -- **Stream Binding**: Sessions are bound to specific SSE streams -- **Real-time Communication**: Messages sent via SSE events in real-time -- **Connection Lifecycle**: Session lifecycle tied to SSE connection state - -### Streamable HTTP Transport Sessions -Streamable HTTP transport supports optional sessions for enhanced functionality: - -- **Optional Sessions**: Sessions can be used but are not required -- **Stateless Operation**: Supports both stateless and session-based operation -- **Header-Based**: Session IDs passed via `Mcp-Session-Id` header -- **Flexible Lifecycle**: Sessions can span multiple HTTP requests - -## Integration Points - -### SSE Handler Integration -The session manager works closely with the SSE handler: - -```typescript -// Session creation during SSE establishment -const sessionId = this.sessionManager.createSession(reply.raw); - -// Message routing through sessions -this.sseHandler.sendMessage(sessionId, response); - -// Error handling via sessions -this.sseHandler.sendError(sessionId, errorResponse); -``` - -### Streamable HTTP Handler Integration -The session manager provides optional session support for Streamable HTTP: - -```typescript -// Optional session validation for Streamable HTTP -const sessionId = request.headers['mcp-session-id']; -if (sessionId) { - const session = this.sessionManager.getSession(sessionId); - if (session) { - this.sessionManager.updateActivity(sessionId); - } -} - -// Stateless operation when no session provided -if (!sessionId) { - // Handle request without session context -} -``` - -### HTTP Proxy Integration -Session validation across both transports in the HTTP proxy: - -```typescript -// Transport-aware session handling -if (isSSETransport) { - // SSE requires session validation - const session = this.sessionManager.getSession(sessionId); - if (!session) { - throw new Error('Invalid session for SSE transport'); - } - this.sessionManager.updateActivity(sessionId); -} else if (isStreamableHTTP && sessionId) { - // Streamable HTTP optional session support - const session = this.sessionManager.getSession(sessionId); - if (session) { - this.sessionManager.updateActivity(sessionId); - } -} -``` - -## Best Practices - -### Session Lifecycle -- **Immediate Creation**: Sessions created immediately on SSE connection -- **Activity Tracking**: Update activity on every valid request -- **Graceful Cleanup**: Always clean up resources on session end -- **Error Handling**: Handle all error conditions gracefully - -### Security Practices -- **Validate Always**: Validate session ID on every request -- **Time Limits**: Enforce reasonable session timeouts -- **Resource Limits**: Monitor and limit concurrent sessions if needed -- **Audit Trail**: Log session activities for security monitoring - -### Performance Practices -- **Efficient Lookup**: Use Map for O(1) session lookup -- **Minimal Data**: Store only essential session data -- **Cleanup Scheduling**: Schedule cleanup to prevent resource leaks -- **Error Recovery**: Implement robust error recovery mechanisms - -The session management system provides a secure, efficient, and robust foundation for persistent SSE connections while maintaining enterprise-grade security and operational requirements. diff --git a/_DEPRECATED/gateway/sse-transport.mdx b/_DEPRECATED/gateway/sse-transport.mdx deleted file mode 100644 index 64f7ec6..0000000 --- a/_DEPRECATED/gateway/sse-transport.mdx +++ /dev/null @@ -1,219 +0,0 @@ ---- -title: SSE Transport Implementation -description: Server-Sent Events transport layer for VS Code compatibility and dual-endpoint architecture -sidebar: SSE Transport -icon: Radio ---- - -import { Card, Cards } from 'fumadocs-ui/components/card'; -import { Radio, MessageSquare, Shield, Zap } from 'lucide-react'; - -# SSE Transport Implementation - -The DeployStack Gateway implements Server-Sent Events (SSE) transport to provide VS Code compatibility through a clean dual-endpoint architecture. - -## Architecture Overview - -The Gateway uses a **dual-endpoint architecture** for SSE-based communication: - -- **GET /sse**: Establishes SSE connection and returns session endpoint -- **POST /message**: Handles JSON-RPC requests with session context - -## Core Components - - - } - title="SSE Handler" - > - Manages Server-Sent Events connections, event formatting, and message routing - - - } - title="Session Manager" - > - Handles cryptographically secure session lifecycle with automatic cleanup - - - } - title="Dual Endpoints" - > - Supports both SSE and traditional HTTP clients with intelligent routing - - - } - title="Real-time Communication" - > - Persistent connections enable real-time bidirectional communication - - - -## Connection Flow - -### 1. SSE Connection Establishment -```http -GET /sse HTTP/1.1 -Accept: text/event-stream -``` - -**Response:** -``` -HTTP/1.1 200 OK -Content-Type: text/event-stream -Cache-Control: no-cache -Connection: keep-alive - -event: endpoint -data: /message?session=L8B-xaw3HBZEftyo-JCrHoGWb_iikRZiwGfp9B71-GA -``` - -### 2. Session-Based JSON-RPC -```http -POST /message?session=L8B-xaw3HBZEftyo-JCrHoGWb_iikRZiwGfp9B71-GA -Content-Type: application/json - -{ - "jsonrpc": "2.0", - "id": 1, - "method": "initialize", - "params": { - "clientInfo": {"name": "vscode", "version": "1.0.0"}, - "protocolVersion": "2025-03-26" - } -} -``` - -**HTTP Response:** -```json -{"status": "accepted", "messageId": 1} -``` - -**SSE Response:** -``` -id: msg-1753710728979-95czkmmq8 -event: message -data: {"jsonrpc":"2.0","id":1,"result":{"serverInfo":{"name":"deploystack-gateway","version":"1.0.0"},"protocolVersion":"2025-03-26","capabilities":{"tools":{"listChanged":false}}}} -``` - -## Session Management - -### Session ID Generation -- **Algorithm**: Cryptographically secure random bytes (32 bytes = 256 bits) -- **Encoding**: Base64url for URL safety -- **Format**: `L8B-xaw3HBZEftyo-JCrHoGWb_iikRZiwGfp9B71-GA` -- **Validation**: Length and character set validation - -### Session Lifecycle -1. **Creation**: Generated on SSE connection establishment -2. **Validation**: Verified on each JSON-RPC request -3. **Activity Tracking**: Updated on every message -4. **Timeout**: 30-minute inactivity timeout -5. **Cleanup**: Automatic resource cleanup on disconnect - -### Security Features -- **Cryptographic Security**: 256-bit entropy prevents session prediction -- **Automatic Expiration**: Sessions expire after 30 minutes of inactivity -- **Connection Validation**: Session tied to specific SSE stream -- **Resource Cleanup**: Automatic cleanup prevents memory leaks - -## Message Routing - -### Supported Methods -The SSE transport handles all standard MCP protocol methods: - -- **initialize**: Gateway initialization with capabilities -- **notifications/initialized**: Client initialization confirmation -- **tools/list**: Returns available MCP servers as toggleable tools -- **tools/call**: Executes MCP server management actions -- **resources/list**: Returns empty resources (handled locally) -- **resources/templates/list**: Returns empty templates (handled locally) -- **prompts/list**: Returns empty prompts (handled locally) - -### Error Handling -Errors are sent via SSE with proper JSON-RPC error format: - -``` -id: err-1753710744580-061x9gi8x -event: error -data: {"jsonrpc":"2.0","error":{"code":-32603,"message":"Internal server error","data":"Server not available"},"id":2} -``` - -## VS Code Integration - -### Expected Client Behavior -1. **Connection**: Client connects to `http://localhost:9095/sse` via SSE -2. **Endpoint Discovery**: Receives session endpoint via `endpoint` event -3. **Initialization**: Sends `initialize` request to session endpoint -4. **Tool Discovery**: Calls `tools/list` to discover available MCP servers -5. **Tool Management**: Uses `tools/call` to enable/disable/status MCP servers - -### Configuration -VS Code MCP client configuration: -```json -{ - "mcpServers": { - "deploystack": { - "url": "http://localhost:9095/sse" - } - } -} -``` - -## Performance Considerations - -### Connection Management -- **Keep-Alive**: Persistent SSE connections reduce connection overhead -- **Heartbeat**: Optional heartbeat messages maintain connection health -- **Timeout Handling**: Automatic cleanup prevents resource exhaustion - -### Memory Management -- **Session Cleanup**: Automatic cleanup on disconnect or timeout -- **Stream Management**: Proper SSE stream lifecycle management -- **Error Recovery**: Graceful handling of connection failures - -### Client Detection -The Gateway detects SSE clients based on: -- **Accept Header**: `text/event-stream` indicates SSE client -- **User-Agent**: VS Code, Cursor, or other MCP clients -- **Request Method**: GET for SSE establishment, POST for session-based messaging - -## Implementation Details - -### SSE Event Format -All SSE events follow this structure: -``` -id: -event: -data: - -``` - -### Event Types -- **endpoint**: Session endpoint URL -- **message**: JSON-RPC response -- **error**: JSON-RPC error response -- **notification**: Server notifications - -### Connection Cleanup -Cleanup triggers include: -- Client disconnect (`close` event) -- Connection error (`error` event) -- Stream finish (`finish` event) -- Session timeout (30 minutes) - -## Security Considerations - -### Session Security -- **Unpredictable IDs**: Cryptographically secure generation -- **Time-Limited**: Automatic expiration prevents indefinite access -- **Connection-Bound**: Sessions tied to specific SSE connections - -### Network Security -- **Localhost Only**: Server binds only to localhost interface -- **No External Access**: No exposure to external networks -- **CORS Configuration**: Restricted to authorized origins - -The SSE transport implementation provides a robust, secure, and performant foundation for VS Code integration with clean dual-endpoint architecture. diff --git a/_DEPRECATED/gateway/structure.mdx b/_DEPRECATED/gateway/structure.mdx deleted file mode 100644 index cae4dd9..0000000 --- a/_DEPRECATED/gateway/structure.mdx +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: Gateway Project Structure -description: Directory structure and architecture of the DeployStack Gateway CLI -sidebar: Project Structure -icon: FolderTree ---- - -# Gateway Project Structure - -The DeployStack Gateway is structured as a TypeScript CLI application using Commander.js with a modular architecture designed for maintainability and extensibility. - -## Directory Overview - -```bash -services/gateway/ -├── src/ # Source code -│ ├── index.ts # CLI entry point and command registration -│ ├── commands/ # Command implementations -│ │ ├── login.ts # Authentication with cloud.deploystack.io -│ │ ├── start.ts # Start the gateway server -│ │ ├── refresh.ts # Root-level refresh command -│ │ └── ... # Other CLI commands -│ ├── core/ # Core business logic -│ │ ├── auth/ # Authentication handling -│ │ ├── server/ # HTTP proxy server with SSE support -│ │ ├── process/ # MCP process management -│ │ ├── mcp/ # MCP configuration management -│ │ └── config/ # Configuration utilities -│ ├── services/ # Shared business services -│ │ ├── refresh-service.ts # Shared MCP configuration refresh logic -│ │ ├── server-start-service.ts # Centralized server startup logic -│ │ └── ... # Other shared services -│ ├── utils/ # Shared utilities -│ │ ├── logger.ts # Centralized logging -│ │ └── ... # Other utilities -│ └── types/ # TypeScript type definitions -├── bin/gateway.js # Executable entry point -├── dist/ # Compiled JavaScript (gitignored) -├── tests/ # Test suite -├── package.json # Dependencies and scripts -├── tsconfig.json # TypeScript configuration -└── README.md # Gateway-specific documentation -``` - -## Key Design Decisions - -### Modular Architecture -The codebase is organized into distinct modules: -- **Commands**: User-facing CLI commands -- **Core**: Business logic separated by domain -- **Services**: Shared business services for cross-command functionality -- **Utils**: Reusable utilities and helpers - -### Process Management -The `process/` module handles the complexity of: -- Managing persistent background MCP server processes -- Runtime state tracking and team isolation -- Managing stdio communication with running processes -- Injecting environment variables securely at startup -- Graceful process lifecycle management following MCP protocol - -### Security First -- Credentials are never stored in plain text -- All sensitive data is encrypted at rest -- Environment injection happens at runtime only - -### Developer Experience -- Intuitive command structure (`deploystack login`, `deploystack start`, `deploystack mcp`) -- Rich CLI feedback with colors and progress indicators -- Clear error messages with actionable solutions -- MCP server management and tool discovery capabilities - -## Module Responsibilities - -### Commands Layer -Each command file exports a function that registers itself with Commander.js: -```typescript -export function registerLoginCommand(program: Command) { - program - .command('login') - .description('Authenticate with DeployStack cloud') - .action(async () => { - // Implementation - }); -} -``` - -### Core Modules - -**auth/**: Handles OAuth flow and token management -- Secure storage of access tokens -- Automatic token refresh -- Session management - -**server/**: HTTP proxy server with dual transport support -- **proxy.ts**: Dual-endpoint routing (GET /sse for SSE connections, POST /message for session-based JSON-RPC) -- **session-manager.ts**: Cryptographically secure session lifecycle management -- **sse-handler.ts**: Server-Sent Events implementation for VS Code compatibility - -**process/**: MCP server process lifecycle -- Persistent background process management -- Runtime state tracking with team isolation -- Stdio transport implementation for continuous communication -- Graceful lifecycle management following MCP protocol -- Enterprise management layer (MCP servers as toggleable tools) - -**mcp/**: Configuration management and processing -- Team configuration synchronization with cloud control plane -- Raw API data storage and processed config generation -- Secure credential injection and environment variable management -- MCP server tool discovery and capability exploration -- Team-aware tool caching system as detailed in [Caching System](/development/gateway/caching-system) -- Installation method processing for correct server spawning - -**services/**: Shared business services for cross-command functionality -- **refresh-service.ts**: Centralized MCP configuration refresh logic used by both `deploystack refresh` and `deploystack mcp --refresh` commands -- Eliminates code duplication while maintaining identical behavior across commands -- Provides consistent error handling and user feedback - -**utils/**: Shared utilities and centralized services -- **tool-discovery-manager.ts**: Centralized tool discovery eliminating code duplication across commands -- Logging, configuration, and encryption utilities -- Progress indicators and error handling - -**config/**: Configuration utilities and defaults -- Default gateway settings and validation -- Configuration file management -- Environment-specific overrides - -### Build Output -The TypeScript code is compiled to CommonJS for maximum compatibility: -- Source maps for debugging -- Minified for production -- External dependencies preserved diff --git a/_DEPRECATED/gateway/teams.mdx b/_DEPRECATED/gateway/teams.mdx deleted file mode 100644 index b31e18a..0000000 --- a/_DEPRECATED/gateway/teams.mdx +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Team Context in Gateway CLI -description: Understanding team-scoped operations and MCP server installations in the DeployStack Gateway CLI -sidebar: Team Context -icon: Users ---- - -# Team Context in Gateway CLI - -The DeployStack Gateway CLI is fundamentally **team-centric**. All MCP server installations and operations are scoped to the currently selected team, reflecting the architectural design where teams serve as isolated workspaces for deployment resources. - -## Team Selection Architecture - -### Secure Storage Location - -Team selection is stored securely alongside authentication credentials using: -- **Primary**: OS keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service) -- **Fallback**: Encrypted file at `~/.deploystack/credentials.enc` - -The selected team information is part of the `StoredCredentials` interface: - -```typescript -interface StoredCredentials { - // ... other credential fields - selectedTeam?: { - id: string; // Team ID for API operations - name: string; // Team name for display - }; -} -``` - -### Automatic Default Selection - -When users authenticate via `deploystack login`, the CLI automatically: - -1. Fetches user's teams from `/api/teams/me` -2. Identifies the default team (`is_default: true`) -3. Sets it as the selected team in secure storage -4. Confirms selection to the user - -### Team Switching - -Users can change their active team context using: - -```bash -deploystack teams --switch -``` - -This updates the stored team selection, affecting all subsequent CLI operations. - -## MCP Server Installation Scope - -### Database Architecture - -MCP server installations are stored in the `mcpServerInstallations` table with team-based foreign keys: - -```sql -mcpServerInstallations: - - team_id (FK to teams.id) -- Scopes installation to specific team - - server_id (FK to mcpServers.id) -- References the MCP server definition - - user_environment_variables -- Team-specific encrypted credentials -``` - -### Team-Scoped Operations - -All MCP-related CLI operations operate within the selected team context: - -- **Credential Injection**: Environment variables are team-specific -- **Server Availability**: Only team's installed servers are accessible -- **Configuration Sync**: Gateway downloads only selected team's configurations -- **Process Management**: Spawned MCP processes use team-scoped credentials - -> **MCP Configuration Management**: For detailed information about how the Gateway downloads, processes, and stores MCP server configurations from the backend API, see the [Gateway MCP Configuration documentation](/development/gateway/mcp). - -### Cross-Team Isolation - -The architecture ensures complete isolation between teams: - -- Team A cannot access Team B's MCP server installations -- Credentials are encrypted per team context -- No cross-team data leakage in local processes - -## CLI Implementation Details - -### Storage Methods - -The `CredentialStorage` class provides team selection methods: - -- `updateSelectedTeam(teamId, teamName)` - Updates selected team -- `getSelectedTeam()` - Retrieves current selection -- Team data is persisted with other authentication credentials - -### Team-Aware Commands - -Key commands that depend on team context: - -- `deploystack start` - Starts gateway for selected team's MCP servers -- `deploystack teams` - Shows selection status and switching options -- Future MCP management commands will operate on selected team - -### API Integration - -Team context affects backend communication: - -- All MCP-related API calls include team context -- Configuration sync requests are team-scoped -- Credential retrieval is filtered by team membership - -## Developer Guidelines - -### Working with Team Context - -When developing CLI features that interact with MCP servers: - -1. **Always check team selection** before MCP operations -2. **Use team ID for API calls** (not just team name) -3. **Scope local storage** by team when caching configurations -4. **Validate team access** before exposing functionality - -### Future Considerations - -The team context system is designed to support: - -- Multi-team development workflows -- Team-specific MCP server catalogs -- Role-based access to different tool sets -- Enterprise governance and audit trails - -For complete team management information, see the [Teams documentation](/teams). - -## Error Handling - -CLI commands should gracefully handle team context issues: - -- **No team selected**: Prompt user to select a team -- **Invalid team**: Guide user to available teams -- **Team access revoked**: Require re-authentication -- **Team deleted**: Clear selection and prompt for new team - -This team-centric design ensures that the Gateway CLI operates as a secure, isolated workspace aligned with organizational boundaries while maintaining a smooth developer experience. diff --git a/_DEPRECATED/gateway/tech-stack.mdx b/_DEPRECATED/gateway/tech-stack.mdx deleted file mode 100644 index 1564320..0000000 --- a/_DEPRECATED/gateway/tech-stack.mdx +++ /dev/null @@ -1,264 +0,0 @@ ---- -title: Gateway Tech Stack -description: CLI framework and npm packages used in the DeployStack Gateway -sidebar: Tech Stack -icon: Package ---- - -# Gateway Tech Stack - -The DeployStack Gateway is built with a carefully selected set of Node.js packages that prioritize developer experience, security, and performance. - -## Core Framework - -### Commander.js -Our CLI framework of choice for building the gateway's command-line interface. - -**Why Commander?** -- Battle-tested by major CLIs (Vue CLI, Create React App) -- Excellent TypeScript support -- Simple yet powerful API -- Extensive documentation and community - -### Fastify -High-performance HTTP server framework for the proxy server implementation. - -**Why Fastify?** -- Excellent TypeScript support with built-in type definitions -- High performance with low overhead -- Rich plugin ecosystem for middleware -- Built-in JSON schema validation -- Comprehensive logging and error handling - -## UI and Feedback - -### Chalk -Terminal string styling for colorful and readable output. - -**Features:** -- Semantic color methods for different message types -- Support for 256 colors and Truecolor -- Auto-detects color support -- Respects NO_COLOR environment variable - -### Ora -Elegant terminal spinners for long-running operations. - -**Use Cases:** -- Authentication flows -- Configuration syncing -- Process spawning feedback -- Network operations - -### CLI-Progress -Customizable progress bars for detailed operation feedback. - -**Features:** -- Single and multi-bar support -- Customizable formats and styles -- Ideal for file operations and bulk processing - -## Interactive Components - -### Inquirer.js -Interactive command line prompts for user input. - -**Prompt Types:** -- Text input for credentials -- Password input with masking -- Selection lists for configuration options -- Confirmations for destructive operations - -## Development Tools - -### TypeScript -Full TypeScript support for type safety and better developer experience. - -**Benefits:** -- Type safety catches errors at compile time -- Better IDE support with autocomplete -- Self-documenting code through types -- Easier refactoring - -### tsx -Run TypeScript files directly without compilation during development. - -### Build Tool - tsup -Fast TypeScript bundler powered by esbuild. - -**Why tsup?** -- Lightning fast builds using esbuild -- Zero config with sensible defaults -- Built-in TypeScript support -- Generates CommonJS and ESM outputs - -**Configuration Example:** -```typescript -export default defineConfig({ - entry: ['src/index.ts'], - format: ['cjs'], - target: 'node16', - clean: true, - sourcemap: true, -}); -``` - -## Utility Libraries - -### File System Operations - -**fs-extra** -Enhanced file system module with promise support and extra methods. -- Includes all standard fs methods -- Adds useful methods like `copy`, `remove`, `ensureDir` -- Promise-based API for cleaner async code -- Essential for team-aware tool caching system - -**glob** -File pattern matching using shell-style wildcards. -- Find files matching patterns like `*.ts` or `src/**/*.js` -- Essential for batch operations - -### Process Management - -**execa** -Better child process execution for spawning MCP servers. -- Improved error handling -- Promise-based interface -- Better Windows support -- Automatic escaping of arguments - -**ps-tree** -Process tree management for proper cleanup. -- Find all child processes of a parent -- Ensure clean shutdown of spawned MCP servers - -### Configuration - -**cosmiconfig** -Flexible configuration file loader. -- Searches for config in multiple formats (.json, .yml, .js) -- Supports `.deploystackrc`, `deploystack.config.js`, package.json -- Follows common patterns used by ESLint, Prettier, etc. - -**dotenv** -Environment variable loading from .env files. -- Load configuration from `.env` files -- Support for different environments (.env.local, .env.production) - -### Security - -**keytar** -Native OS keychain integration for secure credential storage. -- macOS: Keychain Access -- Windows: Credential Manager -- Linux: Secret Service API -- No plain text passwords on disk - -**crypto-js** -Additional encryption for sensitive data. -- AES encryption for config files -- Secure hashing for verification - -**crypto (Node.js built-in)** -Native cryptographic functionality for session management. -- Cryptographically secure random bytes generation -- Session ID generation with 256-bit entropy -- Base64url encoding for URL-safe session identifiers - -### Networking - -**axios** -Feature-rich HTTP client for cloud API communication. -- Interceptors for auth token injection -- Automatic retry logic -- Request/response transformation - -**http-proxy** -HTTP proxy for routing MCP requests to appropriate servers. -- Route requests based on MCP server name -- Inject authentication headers -- Handle stdio-to-HTTP translation - -## Testing Stack - -**vitest** -Fast unit testing framework with native TypeScript support. -- Compatible with Jest API -- Built-in TypeScript support -- Extremely fast execution - -**supertest** -HTTP assertion library for testing the proxy server. -- Test HTTP endpoints -- Assert response status, headers, and body -- Works seamlessly with vitest - -**msw (Mock Service Worker)** -API mocking for integration tests. -- Mock cloud API responses -- Test error scenarios -- Intercept HTTP requests - -## Why This Stack? - -### 1. **Developer Experience** -- Commander provides intuitive command structure -- Chalk + Ora + CLI-Progress create rich, informative output -- TypeScript ensures type safety and better IDE support - -### 2. **Security First** -- Keytar integrates with OS keychains (macOS Keychain, Windows Credential Manager, Linux Secret Service) -- Crypto-js for additional encryption layers -- No plain text credential storage - -### 3. **Performance** -- tsup/esbuild for fast builds -- Minimal dependencies for quick startup -- Lazy loading of heavy operations - -### 4. **Cross-Platform** -- All packages support Windows, macOS, and Linux -- Platform-specific features handled gracefully - -### 5. **Enterprise Ready** -- Comprehensive error handling -- Detailed logging capabilities -- Extensible architecture - -## Installation - -All dependencies are managed through npm: - -```bash -cd services/gateway -npm install -``` - -## Development Workflow - -```bash -# Development with hot reload -npm run dev - -# Run TypeScript directly -npm run start:dev - -# Build for production -npm run build - -# Run tests -npm test -``` - -## Package Selection Criteria - -When adding new packages, we consider: - -1. **Security**: Regular updates, no known vulnerabilities -2. **Maintenance**: Active development, responsive maintainers -3. **Size**: Minimal impact on CLI startup time -4. **Compatibility**: Works across all target platforms -5. **TypeScript**: First-class TypeScript support preferred - -This tech stack provides a solid foundation for building a secure, performant, and user-friendly CLI that meets enterprise requirements while maintaining excellent developer experience. diff --git a/_DEPRECATED/gateway/testing.mdx b/_DEPRECATED/gateway/testing.mdx deleted file mode 100644 index ef12d8b..0000000 --- a/_DEPRECATED/gateway/testing.mdx +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: Gateway Testing -description: Testing commands and workflows for the DeployStack Gateway -sidebar: Testing -icon: TestTube ---- - -# Gateway Testing - -The DeployStack Gateway includes testing infrastructure for ensuring reliability and quality of the CLI application. - -## Test Commands - -### Unit Tests -```bash -npm run test:unit -``` -Currently displays a placeholder message as tests are not yet implemented. - -### Linting -```bash -npm run lint -``` -Runs ESLint with automatic fixing of common issues. Essential for maintaining code quality. - -### Build Verification -```bash -npm run build -``` -Compiles TypeScript to JavaScript and verifies the build process. - -## Development Workflow - -### Local Development -```bash -npm run dev -``` -Starts the gateway in development mode with hot reload using `ts-node-dev`. - -### Manual Testing -```bash -npm run link -``` -Links the local gateway for testing CLI commands globally. - -After linking, test commands directly: -```bash -deploystack version -deploystack status -deploystack --help -``` - -## Release Testing - -### Pre-release Checks -```bash -npm run release -``` -Runs linting checks before creating a release through `release-it`. - -### CI/CD Testing -The GitHub Actions workflow automatically runs: -- Build verification -- Linting checks -- Unit tests (when implemented) - -## Testing Strategy - -### CLI-Specific Testing -- **Command validation**: Ensure all commands parse correctly -- **Output formatting**: Verify chalk styling and user messages -- **Error handling**: Test failure scenarios and exit codes -- **Cross-platform**: Validate behavior on Windows, macOS, and Linux - -### Integration Points -- **Authentication flows**: Test login/logout workflows -- **Configuration management**: Verify config file operations -- **Process management**: Test MCP server spawning and cleanup -- **Proxy functionality**: Validate HTTP proxy routing - -## Future Testing Implementation - -The gateway will include comprehensive testing using: -- **vitest** for unit testing -- **supertest** for HTTP endpoint testing -- **msw** for API mocking -- Cross-platform testing in CI/CD - -## Development Tips - -### Quick Validation -```bash -# Check command structure -deploystack --help - -# Verify version info -deploystack version - -# Test error handling -deploystack invalid-command -``` - -### Build and Test Cycle -```bash -npm run lint # Fix code style issues -npm run build # Verify compilation -npm run link # Test locally -``` - -This testing approach ensures the gateway maintains high quality while remaining focused on the essential CLI functionality. \ No newline at end of file diff --git a/app/[[...slug]]/page.tsx b/app/[[...slug]]/page.tsx deleted file mode 100644 index 8984065..0000000 --- a/app/[[...slug]]/page.tsx +++ /dev/null @@ -1,129 +0,0 @@ -import type { Metadata } from 'next'; -import { DocsLayout } from 'fumadocs-ui/layouts/docs'; -import { DocsPage, DocsBody } from 'fumadocs-ui/page'; -import { notFound } from 'next/navigation'; -import { source } from '@/lib/source'; -import { generatePageMetadata, getCanonicalUrl } from '@/lib/seo-utils'; -import { getFinalPageTitle } from '@/lib/h1-extractor'; -import { readFile } from 'fs/promises'; -import { getMDXComponents } from '@/mdx-components'; -import { docsOptions } from '../layout.config'; -import { generateTechArticleSchema, generateBreadcrumbSchema, combineSchemas } from '@/lib/structured-data'; - -export default async function Page({ - params, -}: { - params: Promise<{ slug?: string[] }>; -}) { - const { slug } = await params; - const page = source.getPage(slug); - - if (!page) { - notFound(); - } - - const MDX = page.data.body; - - // Generate structured data for all pages with content - let structuredData = ''; - if (slug && slug.length > 0) { - const slugString = slug.join('/'); - const url = `https://deploystack.io/docs/${slugString}`; - - // Get the final title (same logic as in generateMetadata) - let finalTitle = page.data.title; - try { - const filePath = page.file.path; - const absolutePath = `./docs/${filePath}`; - const rawContent = await readFile(absolutePath, 'utf-8'); - finalTitle = getFinalPageTitle(rawContent, page.data.title); - } catch (error) { - finalTitle = page.data.title; - } - - const articleSchema = generateTechArticleSchema({ - title: finalTitle, - description: page.data.description, - slug, - url, - }); - - const breadcrumbSchema = generateBreadcrumbSchema(slug); - structuredData = combineSchemas(articleSchema, breadcrumbSchema); - } - - // Always use the unified source pageTree that includes all sections - // Instead of switching between different trees, show all sections together - const pageTree = source.pageTree; - - // Always use DocsLayout with sidebar for all pages including root - return ( - <> - {structuredData && ( -