diff --git a/.env.example b/.env.example index 5109074..1437566 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,15 @@ # Required API Keys -ANTHROPIC_API_KEY=your-anthropic-api-key-here +OPENROUTER_API_KEY=your-openrouter-api-key-here FISH_API_KEY=your-fish-audio-api-key-here +# Optional: OpenRouter settings +# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 +# OPENROUTER_FAST_MODEL=anthropic/claude-3.5-haiku +# OPENROUTER_RESEARCH_MODEL=anthropic/claude-3.5-sonnet +# OPENROUTER_VISION_MODEL=anthropic/claude-3.5-sonnet +# OPENROUTER_SITE_URL= +# OPENROUTER_APP_NAME=JARVIS + # Optional: Fish Audio voice model (defaults to JARVIS MCU voice) # FISH_VOICE_ID=612b878b113047d9a770c069c8b4fdfe @@ -12,6 +20,20 @@ FISH_API_KEY=your-fish-audio-api-key-here # If not set, JARVIS reads ALL calendars from Apple Calendar # CALENDAR_ACCOUNTS=you@gmail.com,work@company.com +# Linux/Windows Calendar (CalDAV) +# Provide either explicit calendar collection URLs, or a base URL where calendars can be discovered. +# CALDAV_CALENDAR_URLS=https://caldav.example.com/user/calendars/personal/,https://caldav.example.com/user/calendars/work/ +# CALDAV_URL=https://caldav.example.com/user/calendars/ +# CALDAV_USERNAME=you@example.com +# CALDAV_PASSWORD=your-app-password + +# Linux/Windows Mail (IMAP, read-only) +# IMAP_HOST=imap.gmail.com +# IMAP_PORT=993 +# IMAP_SSL=true +# IMAP_USERNAME=you@example.com +# IMAP_PASSWORD=your-app-password + # Optional: Whether to skip Claude CLI permission prompts on subprocess calls. # Defaults to "true" because JARVIS is voice-driven and cannot respond to # interactive permission prompts (they would silently hang the subprocess). @@ -19,6 +41,16 @@ FISH_API_KEY=your-fish-audio-api-key-here # running JARVIS interactively in a Terminal you can see and respond to. # JARVIS_SKIP_PERMISSIONS=true +# Security (recommended) +# The server will auto-generate this on first run and write it into .env. +# JARVIS_AUTH_TOKEN=paste-from-generated-value +# JARVIS_ALLOWED_ORIGINS=http://localhost:5173,https://localhost:5173 +# JARVIS_PROJECTS_DIR=~/JarvisProjects +# JARVIS_DEV_MODE=false + +# Linux/Windows Notes (filesystem-backed) +# NOTES_DIR=/home/you/jarvis-notes + # Optional: Override the auto-detected weather location. # By default, JARVIS uses your public-IP location (via ipwho.is) and reports # Fahrenheit. Set these to pin a specific location: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..16e7c39 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,35 @@ +name: CI + +on: + pull_request: + push: + branches: ["main"] + +jobs: + backend: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + - name: Run tests + run: pytest -q + + frontend: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: "18" + cache: "npm" + cache-dependency-path: frontend/package-lock.json + - name: Install + run: npm -C frontend ci + - name: Build + run: npm -C frontend run build diff --git a/CLAUDE.md b/CLAUDE.md index 58863ff..dcdd862 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,12 +1,12 @@ # JARVIS — Voice AI Assistant ## Overview -JARVIS (Just A Rather Very Intelligent System) is a voice-first AI assistant for macOS. It runs locally on your machine, connecting to your Apple Calendar, Mail, Notes, and can spawn Claude Code sessions for development tasks. +JARVIS (Just A Rather Very Intelligent System) is a voice-first AI assistant for Linux. It runs locally on your machine, connecting to your calendar (CalDAV), mail (IMAP, read-only), notes (filesystem), and can spawn Claude Code sessions for development tasks. ## Quick Start When a user clones this repo and starts Claude Code, help them: 1. Copy .env.example to .env -2. Get an Anthropic API key from console.anthropic.com +2. Get an OpenRouter API key from openrouter.ai 3. Get a Fish Audio API key from fish.audio 4. Install Python dependencies: pip install -r requirements.txt 5. Install frontend dependencies: cd frontend && npm install @@ -20,9 +20,9 @@ When a user clones this repo and starts Claude Code, help them: - **Backend**: FastAPI + Python (server.py, ~2300 lines) - **Frontend**: Vite + TypeScript + Three.js (audio-reactive orb) - **Communication**: WebSocket (JSON messages + binary audio) -- **AI**: Claude Haiku for fast responses, Claude Opus for research +- **AI**: LLM via OpenRouter (fast + deep models) - **TTS**: Fish Audio with JARVIS voice model -- **System**: AppleScript for Calendar, Mail, Notes, Terminal integration +- **System**: CalDAV (calendar) + IMAP (mail, read-only) + filesystem notes ## Key Files - `server.py` — Main server, WebSocket handler, LLM integration, action system @@ -30,15 +30,15 @@ When a user clones this repo and starts Claude Code, help them: - `frontend/src/voice.ts` — Web Speech API + audio playback - `frontend/src/main.ts` — Frontend state machine - `memory.py` — SQLite memory system with FTS5 search -- `calendar_access.py` — Apple Calendar integration via AppleScript -- `mail_access.py` — Apple Mail integration (READ-ONLY) -- `notes_access.py` — Apple Notes integration +- `calendar_access.py` — Calendar integration (CalDAV) +- `mail_access.py` — Mail integration (IMAP, read-only) +- `notes_access.py` — Notes integration (filesystem) - `actions.py` — System actions (Terminal, Chrome, Claude Code) - `browser.py` — Playwright web automation - `work_mode.py` — Persistent Claude Code sessions ## Environment Variables -- `ANTHROPIC_API_KEY` (required) — Claude API access +- `OPENROUTER_API_KEY` (required) — LLM API access via OpenRouter - `FISH_API_KEY` (required) — Fish Audio TTS - `FISH_VOICE_ID` (optional) — Voice model ID - `USER_NAME` (optional) — Your name for JARVIS to use @@ -48,6 +48,5 @@ When a user clones this repo and starts Claude Code, help them: - JARVIS personality: British butler, dry wit, economy of language - Max 1-2 sentences per voice response - Action tags: [ACTION:BUILD], [ACTION:BROWSE], [ACTION:RESEARCH], etc. -- AppleScript for all macOS integrations (no OAuth needed) - Read-only for Mail (safety by design) - SQLite for all local data storage diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ccbd5bc..aef65a8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ Thanks for your interest in contributing! Here's how to get involved. - **Bug fixes** — if something's broken, fix it - **New integrations** — Spotify, Slack, Notion, etc. -- **Windows/Linux support** — the AppleScript integrations are macOS-only, cross-platform alternatives welcome +- **Desktop support** — improve KDE/GNOME compatibility (screenshots, active windows, app launching) - **Better error handling** — things fail silently in places - **Voice improvements** — alternative TTS providers, better speech recognition - **New actions** — extend what JARVIS can do @@ -32,7 +32,7 @@ Yes, `server.py` is a 2400-line monolith. It works. If you want to refactor part ## What NOT to Do - Don't add telemetry or analytics -- Don't send data to external services beyond the existing API calls (Anthropic, Fish Audio) +- Don't send data to external services beyond the existing API calls (OpenRouter, Fish Audio) - Don't add features that modify or delete user data in connected services (Mail, Calendar, Notes) - Don't break the existing voice loop diff --git a/README.md b/README.md index c82522e..89c02a1 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,11 @@ **Just A Rather Very Intelligent System.** -A voice-first AI assistant that runs on your Mac. Talk to it, and it talks back -- with a British accent, dry wit, and an audio-reactive particle orb straight out of the MCU. +A voice-first AI assistant that runs on Linux. Talk to it, and it talks back -- with a British accent, dry wit, and an audio-reactive particle orb straight out of the MCU. -JARVIS connects to your Apple Calendar, Mail, and Notes. It can browse the web, spawn Claude Code sessions to build entire projects, and plan your day -- all through natural voice conversation. +This project was originally built for macOS and has been converted to a Linux-first version. Calendar, mail, and notes are now powered by CalDAV, IMAP, and a local notes folder. + +JARVIS can browse the web, spawn Claude Code sessions to build entire projects, and plan your day -- all through natural voice conversation. > "Will do, sir." @@ -29,11 +31,11 @@ JARVIS connects to your Apple Calendar, Mail, and Notes. It can browse the web, ## Requirements -- **macOS** (uses AppleScript for Calendar, Mail, Notes integration) +- **Linux** (CalDAV + IMAP + filesystem notes) - **Python 3.11+** - **Node.js 18+** - **Google Chrome** (required for Web Speech API) -- **Anthropic API key** -- powers the AI brain ([get one here](https://console.anthropic.com/)) +- **OpenRouter API key** -- powers the LLM brain ([get one here](https://openrouter.ai/keys)) - **Fish Audio API key** -- powers the voice ([get one here](https://fish.audio/)) - **Claude Code CLI** -- for spawning dev tasks ([install here](https://docs.anthropic.com/en/docs/claude-code)) @@ -72,22 +74,45 @@ openssl req -x509 -newkey rsa:2048 -keyout key.pem -out cert.pem -days 365 -node # 6. Start the backend (Terminal 1) python server.py +# On first run, the server generates JARVIS_AUTH_TOKEN and writes it into .env. +# Paste that token into the Settings panel (Access Token) so the UI can connect. + # 7. Start the frontend (Terminal 2) cd frontend && npm run dev # 8. Open Chrome -open http://localhost:5173 +xdg-open http://localhost:5173 # Linux (or just paste into your browser) ``` Click the page once to enable audio, then speak. JARVIS will respond. +## Running as a Linux service (systemd) + +An example `systemd --user` service file is included here: + +- `docs/systemd/jarvis.service` + +Typical setup: + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt -r requirements-dev.txt + +mkdir -p ~/.config/systemd/user +cp docs/systemd/jarvis.service ~/.config/systemd/user/jarvis.service +systemctl --user daemon-reload +systemctl --user enable --now jarvis +journalctl --user -u jarvis -f +``` + ## Configuration Edit your `.env` file: ```env # Required -ANTHROPIC_API_KEY=your-anthropic-api-key-here +OPENROUTER_API_KEY=your-openrouter-api-key-here FISH_API_KEY=your-fish-audio-api-key-here # Optional -- your name (JARVIS will address you personally) @@ -101,15 +126,15 @@ CALENDAR_ACCOUNTS=you@gmail.com,work@company.com ## Architecture ``` -Microphone -> Web Speech API -> WebSocket -> FastAPI -> Claude (Haiku) -> Fish Audio TTS -> WebSocket -> Speaker +Microphone -> Web Speech API -> WebSocket -> FastAPI -> LLM (via OpenRouter) -> Fish Audio TTS -> WebSocket -> Speaker | v Claude Code Tasks (spawns real dev work) | v - AppleScript Bridge - (Calendar, Mail, Notes, Terminal) + System Integrations + (CalDAV/IMAP/notes folder) ``` | Layer | Technology | @@ -120,7 +145,7 @@ Microphone -> Web Speech API -> WebSocket -> FastAPI -> Claude (Haiku) -> Fish A | AI (fast) | Claude Haiku -- low-latency voice responses | | AI (deep) | Claude Opus -- research and complex tasks | | TTS | Fish Audio with JARVIS voice model | -| System | AppleScript for all macOS integrations | +| System | CalDAV (calendar) + IMAP (mail) + filesystem notes | ## How the Voice Loop Works @@ -144,9 +169,9 @@ Microphone -> Web Speech API -> WebSocket -> FastAPI -> Claude (Haiku) -> Fish A | `frontend/src/voice.ts` | Web Speech API + audio playback | | `frontend/src/main.ts` | Frontend state machine | | `memory.py` | SQLite memory system with FTS5 full-text search | -| `calendar_access.py` | Apple Calendar integration via AppleScript | -| `mail_access.py` | Apple Mail integration (read-only) | -| `notes_access.py` | Apple Notes integration | +| `calendar_access.py` | Calendar integration (CalDAV) | +| `mail_access.py` | Mail integration (IMAP, read-only) | +| `notes_access.py` | Notes integration (filesystem) | | `actions.py` | System actions (Terminal, Chrome, Claude Code) | | `browser.py` | Playwright web automation | | `work_mode.py` | Persistent Claude Code sessions | @@ -167,13 +192,12 @@ JARVIS uses action tags to trigger real system actions: JARVIS remembers things you tell it using SQLite with FTS5 full-text search. Preferences, decisions, and facts persist across sessions. ### Calendar & Mail -All macOS integrations use AppleScript -- no OAuth flows, no token management. Just native system access. Mail is intentionally read-only for safety. +Calendar uses CalDAV and mail uses IMAP. Mail is intentionally read-only for safety. ## Contributing Contributions are welcome. Some areas that could use work: -- **Linux/Windows support** -- replace AppleScript with cross-platform alternatives - **Alternative TTS engines** -- add ElevenLabs, OpenAI TTS, or local models - **Alternative LLMs** -- add OpenAI, Gemini, or local model support - **Mobile client** -- a companion app for voice interaction on the go @@ -189,7 +213,7 @@ Free for personal, non-commercial use. Commercial use requires a license — vis Built by [Ethan](https://ethanplus.ai). -Powered by [Anthropic Claude](https://anthropic.com) and [Fish Audio](https://fish.audio). +Powered by [OpenRouter](https://openrouter.ai) (LLM routing) and [Fish Audio](https://fish.audio). Inspired by the AI that started it all -- Tony Stark's JARVIS. diff --git a/actions.py b/actions.py index 556e84b..b73c92f 100644 --- a/actions.py +++ b/actions.py @@ -9,22 +9,32 @@ import logging import os import re +import shutil +import sys import time from pathlib import Path +from shlex import quote as shell_quote from urllib.parse import quote log = logging.getLogger("jarvis.actions") -DESKTOP_PATH = Path.home() / "Desktop" +_projects_env = os.getenv("JARVIS_PROJECTS_DIR", "").strip() +PROJECTS_PATH = Path(_projects_env).expanduser() if _projects_env else (Path.home() / "JarvisProjects") +PROJECTS_PATH.mkdir(parents=True, exist_ok=True) _SKIP_PERMISSIONS = os.getenv("JARVIS_SKIP_PERMISSIONS", "true").lower() not in ("0", "false", "no") +_IS_DARWIN = sys.platform == "darwin" +_HAS_DISPLAY = bool(os.getenv("DISPLAY") or os.getenv("WAYLAND_DISPLAY")) + async def _mark_terminal_as_jarvis(revert_after: float = 5.0): """Temporarily set the front Terminal window to Ocean theme, then revert. Shows the user JARVIS is active in that terminal. Reverts after revert_after seconds. """ + if not _IS_DARWIN: + return # Save the current profile, switch to Ocean, then revert script_save = ( 'tell application "Terminal"\n' @@ -65,6 +75,8 @@ async def _mark_terminal_as_jarvis(revert_after: float = 5.0): async def _revert_terminal_theme(profile_name: str): """Revert a Terminal window back to its original profile.""" + if not _IS_DARWIN: + return escaped = profile_name.replace('"', '\\"') script = ( 'tell application "Terminal"\n' @@ -89,6 +101,57 @@ def applescript_escape(s: str) -> str: async def open_terminal(command: str = "") -> dict: """Open Terminal.app and optionally run a command. Marks it blue for JARVIS.""" + if not _IS_DARWIN: + if not _HAS_DISPLAY: + if command: + try: + await asyncio.create_subprocess_shell( + command, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + return {"success": True, "confirmation": "Executed that command, sir."} + except Exception: + return {"success": False, "confirmation": "I couldn't run that command here, sir."} + return {"success": False, "confirmation": "No desktop session detected, sir."} + + terminal = None + for candidate in ("konsole", "x-terminal-emulator", "gnome-terminal", "kitty", "alacritty", "xterm"): + if shutil.which(candidate): + terminal = candidate + break + + if not terminal: + return {"success": False, "confirmation": "No terminal emulator found, sir."} + + try: + if terminal == "konsole": + cmd = [terminal] + if command: + cmd += ["-e", "bash", "-lc", command] + elif terminal == "gnome-terminal": + cmd = [terminal] + if command: + cmd += ["--", "bash", "-lc", command] + elif terminal in ("kitty", "alacritty", "xterm", "x-terminal-emulator"): + cmd = [terminal] + if command: + cmd += ["-e", "bash", "-lc", command] + else: + cmd = [terminal] + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + return { + "success": True, + "confirmation": "Terminal is open, sir." if proc.pid else "I had trouble opening a terminal, sir.", + } + except Exception: + return {"success": False, "confirmation": "I had trouble opening a terminal, sir."} + if command: escaped = applescript_escape(command) script = ( @@ -122,6 +185,37 @@ async def open_terminal(command: str = "") -> dict: async def open_browser(url: str, browser: str = "chrome") -> dict: """Open URL in user's browser (Chrome or Firefox).""" + if not _IS_DARWIN: + if not _HAS_DISPLAY: + return {"success": False, "confirmation": "No desktop session detected, sir."} + try: + opener = shutil.which("xdg-open") + if opener: + proc = await asyncio.create_subprocess_exec( + opener, url, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + return {"success": True, "confirmation": "Pulled that up in your browser, sir." if proc.pid else "Browser ran into a problem, sir."} + + binary = None + if browser.lower() == "firefox": + binary = shutil.which("firefox") + else: + binary = shutil.which("google-chrome") or shutil.which("chromium") or shutil.which("chromium-browser") + + if not binary: + return {"success": False, "confirmation": "No browser binary found, sir."} + + proc = await asyncio.create_subprocess_exec( + binary, url, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + return {"success": True, "confirmation": "Pulled that up, sir." if proc.pid else "Browser ran into a problem, sir."} + except Exception: + return {"success": False, "confirmation": "Browser ran into a problem, sir."} + escaped_url = url.replace('"', '\\"') if browser.lower() == "firefox": @@ -171,6 +265,14 @@ async def open_claude_in_project(project_dir: str, prompt: str) -> dict: claude_md = Path(project_dir) / "CLAUDE.md" claude_md.write_text(f"# Task\n\n{prompt}\n\nBuild this completely. If web app, make index.html work standalone.\n") + if not _IS_DARWIN: + if not shutil.which("claude"): + return {"success": False, "confirmation": "Claude Code CLI isn't installed, sir."} + + skip_flag = " --dangerously-skip-permissions" if _SKIP_PERMISSIONS else "" + command = f"cd {shell_quote(project_dir)} && claude{skip_flag}" + return await open_terminal(command) + skip_flag = " --dangerously-skip-permissions" if _SKIP_PERMISSIONS else "" escaped_dir = applescript_escape(project_dir) script = ( @@ -204,6 +306,8 @@ async def prompt_existing_terminal(project_name: str, prompt: str) -> dict: Uses System Events keystroke to type into an active Claude Code session rather than `do script` which would open a new shell. """ + if not _IS_DARWIN: + return {"success": False, "confirmation": "That terminal control is only available on macOS, sir."} escaped_name = applescript_escape(project_name) escaped_prompt = applescript_escape(prompt) @@ -284,6 +388,8 @@ async def prompt_existing_terminal(project_name: str, prompt: str) -> dict: async def get_chrome_tab_info() -> dict: """Read the current Chrome tab's title and URL via AppleScript.""" + if not _IS_DARWIN: + return {} script = ( 'tell application "Google Chrome"\n' " set tabTitle to title of active tab of front window\n" @@ -375,9 +481,9 @@ async def execute_action(intent: dict, projects: list = None) -> dict: return result elif action == "build": - # Create project folder on Desktop, spawn Claude Code + # Create project folder, spawn Claude Code project_name = _generate_project_name(target) - project_dir = str(DESKTOP_PATH / project_name) + project_dir = str(PROJECTS_PATH / project_name) os.makedirs(project_dir, exist_ok=True) result = await open_claude_in_project(project_dir, target) result["project_dir"] = project_dir diff --git a/calendar_access.py b/calendar_access.py index c91d090..c7b0bb1 100644 --- a/calendar_access.py +++ b/calendar_access.py @@ -8,12 +8,25 @@ import asyncio import logging import os +import sys import time as _time from datetime import datetime, timedelta from pathlib import Path +from urllib.parse import urljoin log = logging.getLogger("jarvis.calendar") +# macOS: Apple Calendar via AppleScript +# Linux: CalDAV via HTTP (optional) +_IS_DARWIN = sys.platform == "darwin" + +# CalDAV config (Linux/Windows) +_caldav_urls_env = os.getenv("CALDAV_CALENDAR_URLS", "").strip() +CALDAV_CALENDAR_URLS: list[str] = [u.strip() for u in _caldav_urls_env.split(",") if u.strip()] +CALDAV_URL = os.getenv("CALDAV_URL", "").strip() +CALDAV_USERNAME = os.getenv("CALDAV_USERNAME", "").strip() +CALDAV_PASSWORD = os.getenv("CALDAV_PASSWORD", "").strip() + # Calendars to scan — set CALENDAR_ACCOUNTS env var to a comma-separated list, # or leave empty to auto-discover ALL calendars from Apple Calendar. _calendar_accounts_env = os.getenv("CALENDAR_ACCOUNTS", "") @@ -47,6 +60,8 @@ async def _ensure_calendar_running(): """Launch Calendar.app if not already running.""" global _calendar_launched + if not _IS_DARWIN: + return if _calendar_launched: return try: @@ -142,6 +157,16 @@ def _parse_applescript_date(s: str) -> datetime | None: async def refresh_cache(): """Refresh the event cache. Called from background loop.""" global _event_cache, _cache_time, USER_CALENDARS, _auto_discovered + if not _IS_DARWIN: + start = _time.time() + events = await _fetch_caldav_events_for_today() + events.sort(key=lambda e: (not e["all_day"], e.get("start_dt") or datetime.max)) + _event_cache = events + _cache_time = _time.time() + elapsed = _time.time() - start + log.info(f"Calendar cache refreshed: {len(events)} events today ({elapsed:.1f}s)") + return + await _ensure_calendar_running() # Auto-discover calendars if none configured @@ -208,6 +233,10 @@ async def get_next_event() -> dict | None: async def get_calendar_names() -> list[str]: """Get list of all calendar names.""" + if not _IS_DARWIN: + discovered = await _discover_caldav_calendars() + return [c["name"] for c in discovered] + await _ensure_calendar_running() try: proc = await asyncio.create_subprocess_exec( @@ -224,6 +253,164 @@ async def get_calendar_names() -> list[str]: return [] +def _caldav_configured() -> bool: + if not CALDAV_USERNAME or not CALDAV_PASSWORD: + return False + return bool(CALDAV_CALENDAR_URLS or CALDAV_URL) + + +async def _discover_caldav_calendars() -> list[dict]: + if not _caldav_configured(): + return [] + + if CALDAV_CALENDAR_URLS: + return [{"name": url.rstrip("/").split("/")[-1] or "Calendar", "url": url} for url in CALDAV_CALENDAR_URLS] + + import httpx + from xml.etree import ElementTree as ET + + headers = { + "Depth": "1", + "Content-Type": "application/xml; charset=utf-8", + } + body = """ + + + + + +""" + + try: + async with httpx.AsyncClient(auth=httpx.BasicAuth(CALDAV_USERNAME, CALDAV_PASSWORD), timeout=10.0) as client: + resp = await client.request("PROPFIND", CALDAV_URL, headers=headers, content=body) + if resp.status_code not in (207, 200): + return [] + + root = ET.fromstring(resp.text) + ns = { + "d": "DAV:", + "c": "urn:ietf:params:xml:ns:caldav", + } + + calendars: list[dict] = [] + for r in root.findall(".//d:response", ns): + href = r.findtext("d:href", default="", namespaces=ns).strip() + display = r.findtext(".//d:displayname", default="", namespaces=ns).strip() + resourcetype = r.find(".//d:resourcetype", ns) + if resourcetype is None: + continue + if resourcetype.find("c:calendar", ns) is None: + continue + url = urljoin(CALDAV_URL, href) + name = display or url.rstrip("/").split("/")[-1] or "Calendar" + calendars.append({"name": name, "url": url}) + + return calendars + except Exception: + return [] + + +async def _fetch_caldav_events_for_today() -> list[dict]: + if not _caldav_configured(): + return [] + + import httpx + from datetime import timezone + from icalendar import Calendar + from xml.etree import ElementTree as ET + + calendars = await _discover_caldav_calendars() + if not calendars: + return [] + + now_local = datetime.now().astimezone() + start_local = now_local.replace(hour=0, minute=0, second=0, microsecond=0) + end_local = start_local + timedelta(days=1) + start_utc = start_local.astimezone(timezone.utc) + end_utc = end_local.astimezone(timezone.utc) + + start_str = start_utc.strftime("%Y%m%dT%H%M%SZ") + end_str = end_utc.strftime("%Y%m%dT%H%M%SZ") + + headers = { + "Depth": "1", + "Content-Type": "application/xml; charset=utf-8", + } + report = f""" + + + + + + + + + + + + +""" + + events: list[dict] = [] + ns = { + "d": "DAV:", + "c": "urn:ietf:params:xml:ns:caldav", + } + + async with httpx.AsyncClient(auth=httpx.BasicAuth(CALDAV_USERNAME, CALDAV_PASSWORD), timeout=20.0) as client: + for cal in calendars: + try: + resp = await client.request("REPORT", cal["url"], headers=headers, content=report) + if resp.status_code not in (207, 200): + continue + + root = ET.fromstring(resp.text) + for cd in root.findall(".//c:calendar-data", ns): + ical = (cd.text or "").strip() + if not ical: + continue + try: + parsed = Calendar.from_ical(ical) + except Exception: + continue + + for component in parsed.walk(): + if component.name != "VEVENT": + continue + summary = str(component.get("SUMMARY") or "").strip() + dtstart = component.get("DTSTART") + if not dtstart: + continue + dt_val = dtstart.dt + + all_day = not hasattr(dt_val, "hour") + if all_day: + start_dt = datetime(dt_val.year, dt_val.month, dt_val.day, tzinfo=now_local.tzinfo) + time_str = "ALL_DAY" + else: + start_dt = dt_val + if start_dt.tzinfo is None: + start_dt = start_dt.replace(tzinfo=now_local.tzinfo) + start_dt = start_dt.astimezone(now_local.tzinfo) + time_str = start_dt.strftime("%-I:%M %p") + + if start_dt.date() != now_local.date(): + continue + + events.append({ + "calendar": cal["name"], + "title": summary or "(No title)", + "start": time_str, + "start_dt": start_dt, + "all_day": all_day, + }) + except Exception: + continue + + return events + + def format_events_for_context(events: list[dict]) -> str: """Format events as context for the LLM.""" if not events: diff --git a/docs/systemd/jarvis.service b/docs/systemd/jarvis.service new file mode 100644 index 0000000..d8087a0 --- /dev/null +++ b/docs/systemd/jarvis.service @@ -0,0 +1,15 @@ +[Unit] +Description=JARVIS Voice Assistant (FastAPI) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +WorkingDirectory=%h/jarvis +Environment=PYTHONUNBUFFERED=1 +ExecStart=%h/jarvis/.venv/bin/python %h/jarvis/server.py --host 127.0.0.1 --port 8340 --ssl +Restart=on-failure +RestartSec=2 + +[Install] +WantedBy=default.target diff --git a/frontend/src/settings.ts b/frontend/src/settings.ts index 7e945ef..78a099e 100644 --- a/frontend/src/settings.ts +++ b/frontend/src/settings.ts @@ -19,7 +19,7 @@ interface StatusResponse { server_port: number; uptime_seconds: number; env_keys_set: { - anthropic: boolean; + openrouter: boolean; fish_audio: boolean; fish_voice_id: boolean; user_name: string; @@ -39,23 +39,46 @@ interface PreferencesResponse { let panelEl: HTMLElement | null = null; let isOpen = false; let isFirstTimeSetup = false; -let setupStep = 0; // 0=anthropic, 1=fish, 2=name, 3=done +let setupStep = 0; // 0=openrouter, 1=fish, 2=name, 3=done + +const AUTH_TOKEN_KEY = "jarvis_auth_token"; + +function getAuthToken(): string { + try { + return localStorage.getItem(AUTH_TOKEN_KEY) || ""; + } catch { + return ""; + } +} // --------------------------------------------------------------------------- // API helpers // --------------------------------------------------------------------------- async function apiGet(url: string): Promise { - const res = await fetch(url); + const token = getAuthToken(); + const res = await fetch(url, { + headers: token ? { Authorization: `Bearer ${token}` } : undefined, + }); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } return res.json(); } async function apiPost(url: string, body: unknown): Promise { + const token = getAuthToken(); const res = await fetch(url, { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: { + "Content-Type": "application/json", + ...(token ? { Authorization: `Bearer ${token}` } : {}), + }, body: JSON.stringify(body), }); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } return res.json(); } @@ -83,11 +106,19 @@ function buildPanelHTML(): string {

API Keys

- +
- - - + + +
+
+ +
+ +
+ + +
@@ -118,9 +149,9 @@ function buildPanelHTML(): string {

Connection Status

Claude Code CLI
-
Apple Calendar
-
Apple Mail
-
Apple Notes
+
Calendar
+
Mail
+
Notes
Server
@@ -215,7 +246,7 @@ async function loadStatus() { if (serverDetail) serverDetail.textContent = `port ${status.server_port} | up ${formatUptime(status.uptime_seconds)}`; // API key status dots - setDotStatus("status-anthropic", status.env_keys_set.anthropic ? "green" : "red"); + setDotStatus("status-openrouter", status.env_keys_set.openrouter ? "green" : "red"); setDotStatus("status-fish", status.env_keys_set.fish_audio ? "green" : "red"); // System info @@ -232,6 +263,12 @@ async function loadStatus() { } catch (e) { console.error("[settings] failed to load status:", e); setDotStatus("status-server", "red"); + const serverDetail = document.getElementById("status-server-detail"); + if (serverDetail) { + const msg = String(e); + if (msg.includes("401")) serverDetail.textContent = "unauthorized — set Access Token"; + else serverDetail.textContent = "offline"; + } return null; } } @@ -257,11 +294,11 @@ function wireEvents() { // Save keys document.getElementById("btn-save-keys")?.addEventListener("click", async () => { - const anthropicKey = (document.getElementById("input-anthropic-key") as HTMLInputElement).value.trim(); + const openrouterKey = (document.getElementById("input-openrouter-key") as HTMLInputElement).value.trim(); const fishKey = (document.getElementById("input-fish-key") as HTMLInputElement).value.trim(); - if (anthropicKey) { - await apiPost("/api/settings/keys", { key_name: "ANTHROPIC_API_KEY", key_value: anthropicKey }); + if (openrouterKey) { + await apiPost("/api/settings/keys", { key_name: "OPENROUTER_API_KEY", key_value: openrouterKey }); } if (fishKey) { await apiPost("/api/settings/keys", { key_name: "FISH_API_KEY", key_value: fishKey }); @@ -269,6 +306,16 @@ function wireEvents() { await loadStatus(); }); + document.getElementById("btn-save-auth-token")?.addEventListener("click", async () => { + const tokenEl = document.getElementById("input-auth-token") as HTMLInputElement | null; + const token = tokenEl?.value.trim() || ""; + try { + localStorage.setItem(AUTH_TOKEN_KEY, token); + } catch {} + await loadStatus(); + await loadPreferences(); + }); + // Save voice ID document.getElementById("btn-save-voice-id")?.addEventListener("click", async () => { const voiceId = (document.getElementById("input-fish-voice-id") as HTMLInputElement).value.trim(); @@ -277,15 +324,15 @@ function wireEvents() { } }); - // Test Anthropic - document.getElementById("btn-test-anthropic")?.addEventListener("click", async () => { - setDotStatus("status-anthropic", "yellow"); - const key = (document.getElementById("input-anthropic-key") as HTMLInputElement).value.trim(); + // Test OpenRouter + document.getElementById("btn-test-openrouter")?.addEventListener("click", async () => { + setDotStatus("status-openrouter", "yellow"); + const key = (document.getElementById("input-openrouter-key") as HTMLInputElement).value.trim(); try { - const result = await apiPost<{ valid: boolean; error?: string }>("/api/settings/test-anthropic", { key_value: key || undefined }); - setDotStatus("status-anthropic", result.valid ? "green" : "red"); + const result = await apiPost<{ valid: boolean; error?: string }>("/api/settings/test-openrouter", { key_value: key || undefined }); + setDotStatus("status-openrouter", result.valid ? "green" : "red"); } catch { - setDotStatus("status-anthropic", "red"); + setDotStatus("status-openrouter", "red"); } }); @@ -396,11 +443,13 @@ export async function openSettings() { }); // Load data + const tokenEl = document.getElementById("input-auth-token") as HTMLInputElement | null; + if (tokenEl) tokenEl.value = getAuthToken(); const status = await loadStatus(); await loadPreferences(); // Check for first-time setup - if (status && !status.env_keys_set.anthropic) { + if (status && !status.env_keys_set.openrouter) { enterSetupMode(); } } @@ -424,7 +473,7 @@ export function isSettingsOpen(): boolean { export async function checkFirstTimeSetup(): Promise { try { const status = await apiGet("/api/settings/status"); - if (!status.env_keys_set.anthropic) { + if (!status.env_keys_set.openrouter) { openSettings(); return true; } diff --git a/frontend/src/ws.ts b/frontend/src/ws.ts index f7b2b4a..7a28bb0 100644 --- a/frontend/src/ws.ts +++ b/frontend/src/ws.ts @@ -18,10 +18,22 @@ export function createSocket(url: string): JarvisSocket { let closed = false; let connected = false; + function withAuthToken(rawUrl: string): string { + try { + const token = localStorage.getItem("jarvis_auth_token") || ""; + if (!token) return rawUrl; + const u = new URL(rawUrl, window.location.href); + u.searchParams.set("token", token); + return u.toString(); + } catch { + return rawUrl; + } + } + function connect() { if (closed) return; - ws = new WebSocket(url); + ws = new WebSocket(withAuthToken(url)); ws.onopen = () => { connected = true; diff --git a/llm_client.py b/llm_client.py new file mode 100644 index 0000000..690a478 --- /dev/null +++ b/llm_client.py @@ -0,0 +1,91 @@ +import os +from dataclasses import dataclass +from typing import Any + +import httpx + + +@dataclass(frozen=True) +class OpenRouterConfig: + api_key: str + base_url: str = "https://openrouter.ai/api/v1" + fast_model: str = "anthropic/claude-3.5-haiku" + research_model: str = "anthropic/claude-3.5-sonnet" + vision_model: str = "anthropic/claude-3.5-sonnet" + site_url: str = "" + app_name: str = "JARVIS" + + +class OpenRouterClient: + def __init__(self, config: OpenRouterConfig): + self._cfg = config + + @property + def configured(self) -> bool: + return bool(self._cfg.api_key.strip()) + + @property + def fast_model(self) -> str: + return self._cfg.fast_model + + @property + def research_model(self) -> str: + return self._cfg.research_model + + @property + def vision_model(self) -> str: + return self._cfg.vision_model + + async def chat( + self, + *, + messages: list[dict[str, Any]], + model: str, + max_tokens: int, + temperature: float | None = None, + ) -> str: + data = await self.chat_raw(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature) + return (data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "" + + async def chat_raw( + self, + *, + messages: list[dict[str, Any]], + model: str, + max_tokens: int, + temperature: float | None = None, + ) -> dict[str, Any]: + headers: dict[str, str] = { + "Authorization": f"Bearer {self._cfg.api_key}", + "Content-Type": "application/json", + } + if self._cfg.site_url: + headers["HTTP-Referer"] = self._cfg.site_url + if self._cfg.app_name: + headers["X-Title"] = self._cfg.app_name + + payload: dict[str, Any] = { + "model": model, + "messages": messages, + "max_tokens": max_tokens, + } + if temperature is not None: + payload["temperature"] = temperature + + async with httpx.AsyncClient(timeout=60.0) as client: + resp = await client.post(f"{self._cfg.base_url}/chat/completions", headers=headers, json=payload) + resp.raise_for_status() + return resp.json() + + +def load_openrouter_client() -> OpenRouterClient: + cfg = OpenRouterConfig( + api_key=os.getenv("OPENROUTER_API_KEY", "").strip(), + base_url=os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1").strip(), + fast_model=os.getenv("OPENROUTER_FAST_MODEL", "anthropic/claude-3.5-haiku").strip(), + research_model=os.getenv("OPENROUTER_RESEARCH_MODEL", "anthropic/claude-3.5-sonnet").strip(), + vision_model=os.getenv("OPENROUTER_VISION_MODEL", "anthropic/claude-3.5-sonnet").strip(), + site_url=os.getenv("OPENROUTER_SITE_URL", "").strip(), + app_name=os.getenv("OPENROUTER_APP_NAME", "JARVIS").strip(), + ) + return OpenRouterClient(cfg) diff --git a/mail_access.py b/mail_access.py index 03a88fe..7bded50 100644 --- a/mail_access.py +++ b/mail_access.py @@ -10,16 +10,28 @@ import asyncio import logging +import os +import sys from datetime import datetime log = logging.getLogger("jarvis.mail") +_IS_DARWIN = sys.platform == "darwin" + +IMAP_HOST = os.getenv("IMAP_HOST", "").strip() +IMAP_USERNAME = os.getenv("IMAP_USERNAME", "").strip() +IMAP_PASSWORD = os.getenv("IMAP_PASSWORD", "").strip() +IMAP_PORT = int(os.getenv("IMAP_PORT", "993").strip() or "993") +IMAP_SSL = os.getenv("IMAP_SSL", "true").lower() not in ("0", "false", "no") + _mail_launched = False async def _ensure_mail_running(): """Launch Mail.app if not already running.""" global _mail_launched + if not _IS_DARWIN: + return if _mail_launched: return @@ -53,6 +65,8 @@ async def _ensure_mail_running(): async def _run_mail_script(script: str, timeout: float = 20) -> str: """Run an AppleScript against Mail.app and return output.""" + if not _IS_DARWIN: + return "" await _ensure_mail_running() try: proc = await asyncio.create_subprocess_exec( @@ -78,6 +92,8 @@ async def _run_mail_script(script: str, timeout: float = 20) -> str: async def get_accounts() -> list[str]: """Get list of configured mail account names.""" + if not _IS_DARWIN: + return [IMAP_USERNAME] if _imap_configured() else [] script = """ tell application "Mail" return name of every account @@ -94,6 +110,8 @@ async def get_unread_count() -> dict: Returns: {"total": int, "accounts": {"Google": 5, "Work": 3, ...}} """ + if not _IS_DARWIN: + return await _imap_get_unread_count() script = """ tell application "Mail" set totalUnread to unread count of inbox @@ -130,6 +148,8 @@ async def get_recent_messages(count: int = 10) -> list[dict]: Returns list of {"sender", "subject", "date", "read", "account", "preview"}. """ + if not _IS_DARWIN: + return await _imap_get_recent_messages(count=count) script = f""" tell application "Mail" set allMsgs to messages of inbox @@ -179,6 +199,8 @@ async def get_recent_messages(count: int = 10) -> list[dict]: async def get_unread_messages(count: int = 10) -> list[dict]: """Get unread messages from unified inbox.""" + if not _IS_DARWIN: + return await _imap_get_unread_messages(count=count) script = f""" tell application "Mail" set allMsgs to messages of inbox whose read status is false @@ -225,6 +247,8 @@ async def get_unread_messages(count: int = 10) -> list[dict]: async def get_messages_from_account(account_name: str, count: int = 10) -> list[dict]: """Get recent messages from a specific account's inbox.""" + if not _IS_DARWIN: + return await _imap_get_recent_messages(count=count) escaped = account_name.replace('"', '\\"') script = f""" tell application "Mail" @@ -267,6 +291,8 @@ async def search_mail(query: str, count: int = 10) -> list[dict]: Uses AppleScript filtering on subject. For broader search, we check both subject and sender. """ + if not _IS_DARWIN: + return await _imap_search_messages(query=query, count=count) escaped = query.replace("\\", "\\\\").replace('"', '\\"') script = f""" tell application "Mail" @@ -309,6 +335,8 @@ async def read_message(subject_match: str) -> dict | None: Returns {"sender", "subject", "date", "content"} or None. """ + if not _IS_DARWIN: + return await _imap_read_message(subject_match=subject_match) escaped = subject_match.replace("\\", "\\\\").replace('"', '\\"') script = f""" tell application "Mail" @@ -412,3 +440,261 @@ def _short_sender(sender: str) -> str: if "@" in sender: return sender.split("@")[0] return sender + + +def _imap_configured() -> bool: + return bool(IMAP_HOST and IMAP_USERNAME and IMAP_PASSWORD) + + +def _decode_header_value(value: str) -> str: + from email.header import decode_header + + parts = decode_header(value) + out = [] + for chunk, enc in parts: + if isinstance(chunk, bytes): + try: + out.append(chunk.decode(enc or "utf-8", errors="ignore")) + except Exception: + out.append(chunk.decode("utf-8", errors="ignore")) + else: + out.append(str(chunk)) + return "".join(out).strip() + + +def _imap_connect(): + import imaplib + + if IMAP_SSL: + conn = imaplib.IMAP4_SSL(IMAP_HOST, IMAP_PORT) + else: + conn = imaplib.IMAP4(IMAP_HOST, IMAP_PORT) + conn.login(IMAP_USERNAME, IMAP_PASSWORD) + return conn + + +def _imap_get_ids(conn) -> list[bytes]: + conn.select("INBOX") + typ, data = conn.search(None, "ALL") + if typ != "OK" or not data or not data[0]: + return [] + return data[0].split() + + +def _imap_get_unseen_ids(conn) -> list[bytes]: + conn.select("INBOX") + typ, data = conn.search(None, "UNSEEN") + if typ != "OK" or not data or not data[0]: + return [] + return data[0].split() + + +def _imap_parse_message(conn, msg_id: bytes, include_body: bool = False) -> dict: + import email + from email.utils import parsedate_to_datetime + + fetch_parts = "(BODY.PEEK[HEADER] FLAGS)" + if include_body: + fetch_parts = "(BODY.PEEK[] FLAGS)" + + typ, data = conn.fetch(msg_id, fetch_parts) + if typ != "OK" or not data: + return {} + + raw = b"" + flags = "" + for item in data: + if not item or not isinstance(item, tuple): + continue + meta = item[0].decode(errors="ignore") + raw = item[1] or b"" + if "FLAGS" in meta: + flags = meta + + msg = email.message_from_bytes(raw) + sender = _decode_header_value(msg.get("From", "")) + subject = _decode_header_value(msg.get("Subject", "")) + date_hdr = msg.get("Date", "") + try: + dt = parsedate_to_datetime(date_hdr) + date_str = dt.isoformat(timespec="seconds") if dt else date_hdr + except Exception: + date_str = date_hdr + + is_seen = "\\Seen" in flags + result = { + "sender": sender, + "subject": subject, + "date": date_str, + "read": is_seen, + "preview": "", + } + + if include_body: + body_text = "" + if msg.is_multipart(): + for part in msg.walk(): + ctype = part.get_content_type() + disp = (part.get("Content-Disposition") or "").lower() + if "attachment" in disp: + continue + if ctype == "text/plain": + payload = part.get_payload(decode=True) or b"" + charset = part.get_content_charset() or "utf-8" + body_text = payload.decode(charset, errors="ignore") + break + else: + payload = msg.get_payload(decode=True) or b"" + charset = msg.get_content_charset() or "utf-8" + body_text = payload.decode(charset, errors="ignore") + + if body_text: + result["preview"] = body_text.strip().replace("\r", " ").replace("\n", " ")[:150] + result["content"] = body_text.strip()[:3000] + + return result + + +async def _imap_get_unread_count() -> dict: + if not _imap_configured(): + return {"total": 0, "accounts": {}} + + def _work(): + conn = _imap_connect() + try: + unseen = _imap_get_unseen_ids(conn) + total = len(unseen) + return {"total": total, "accounts": {"INBOX": total}} + finally: + try: + conn.logout() + except Exception: + pass + + return await asyncio.to_thread(_work) + + +async def _imap_get_recent_messages(count: int = 10) -> list[dict]: + if not _imap_configured(): + return [] + + def _work(): + conn = _imap_connect() + try: + ids = _imap_get_ids(conn) + ids = ids[-count:][::-1] + out = [] + for msg_id in ids: + item = _imap_parse_message(conn, msg_id, include_body=False) + if item: + out.append(item) + return out + finally: + try: + conn.logout() + except Exception: + pass + + return await asyncio.to_thread(_work) + + +async def _imap_get_unread_messages(count: int = 10) -> list[dict]: + if not _imap_configured(): + return [] + + def _work(): + conn = _imap_connect() + try: + ids = _imap_get_unseen_ids(conn) + ids = ids[-count:][::-1] + out = [] + for msg_id in ids: + item = _imap_parse_message(conn, msg_id, include_body=False) + if item: + item["read"] = False + out.append(item) + return out + finally: + try: + conn.logout() + except Exception: + pass + + return await asyncio.to_thread(_work) + + +async def _imap_search_messages(query: str, count: int = 10) -> list[dict]: + if not _imap_configured() or not query.strip(): + return [] + + q = query.strip() + q_lower = q.lower() + + def _work(): + conn = _imap_connect() + try: + conn.select("INBOX") + ids: list[bytes] = [] + try: + typ, data = conn.search(None, "OR", "SUBJECT", f'"{q}"', "FROM", f'"{q}"') + if typ == "OK" and data and data[0]: + ids = data[0].split() + except Exception: + ids = [] + + if not ids: + ids = _imap_get_ids(conn) + + ids = ids[::-1][:200] + out = [] + for msg_id in ids: + item = _imap_parse_message(conn, msg_id, include_body=False) + if not item: + continue + if q_lower in item.get("subject", "").lower() or q_lower in item.get("sender", "").lower(): + out.append(item) + if len(out) >= count: + break + return out + finally: + try: + conn.logout() + except Exception: + pass + + return await asyncio.to_thread(_work) + + +async def _imap_read_message(subject_match: str) -> dict | None: + if not _imap_configured() or not subject_match.strip(): + return None + + q = subject_match.strip().lower() + + def _work(): + conn = _imap_connect() + try: + ids = _imap_get_ids(conn) + ids = ids[::-1][:200] + for msg_id in ids: + meta = _imap_parse_message(conn, msg_id, include_body=False) + if not meta: + continue + if q in meta.get("subject", "").lower(): + full = _imap_parse_message(conn, msg_id, include_body=True) + if not full: + return None + return { + "sender": full.get("sender", ""), + "subject": full.get("subject", ""), + "date": full.get("date", ""), + "content": full.get("content", "") or "", + } + return None + finally: + try: + conn.logout() + except Exception: + pass + + return await asyncio.to_thread(_work) diff --git a/memory.py b/memory.py index b041581..f6a8d34 100644 --- a/memory.py +++ b/memory.py @@ -402,30 +402,34 @@ def format_plan_for_voice(tasks: list[dict], events: list[dict]) -> str: # Memory extraction — learn from conversations # --------------------------------------------------------------------------- -async def extract_memories(user_text: str, jarvis_response: str, anthropic_client) -> list[str]: +async def extract_memories(user_text: str, jarvis_response: str, client) -> list[str]: """After a conversation turn, extract any facts worth remembering. Uses Haiku to decide if anything in the exchange is worth storing. Returns list of memories stored. """ - if not anthropic_client or len(user_text) < 15: + if not client or len(user_text) < 15: return [] try: - response = await anthropic_client.messages.create( - model="claude-haiku-4-5-20251001", + text = await client.chat( + model=client.fast_model, max_tokens=200, - system=( - "Extract facts worth remembering from this conversation. " - "Only extract CONCRETE facts: preferences, decisions, names, dates, plans, goals. " - "NOT opinions, greetings, or casual chat. " - "Return JSON array of objects: [{\"type\": \"fact|preference|project|person|decision\", \"content\": \"...\", \"importance\": 1-10}] " - "Return [] if nothing worth remembering. Be very selective." - ), - messages=[{"role": "user", "content": f"User: {user_text}\nJARVIS: {jarvis_response}"}], + messages=[ + { + "role": "system", + "content": ( + "Extract facts worth remembering from this conversation. " + "Only extract CONCRETE facts: preferences, decisions, names, dates, plans, goals. " + "NOT opinions, greetings, or casual chat. " + "Return JSON array of objects: [{\"type\": \"fact|preference|project|person|decision\", \"content\": \"...\", \"importance\": 1-10}] " + "Return [] if nothing worth remembering. Be very selective." + ), + }, + {"role": "user", "content": f"User: {user_text}\nJARVIS: {jarvis_response}"}, + ], ) - - text = response.content[0].text.strip() + text = (text or "").strip() # Parse JSON if text.startswith("["): items = json.loads(text) diff --git a/notes_access.py b/notes_access.py index 1d4c06f..870580e 100644 --- a/notes_access.py +++ b/notes_access.py @@ -7,12 +7,42 @@ import asyncio import logging +import os +import re +import sys +from datetime import datetime +from pathlib import Path log = logging.getLogger("jarvis.notes") +_IS_DARWIN = sys.platform == "darwin" + + +def _notes_root() -> Path: + raw = os.getenv("NOTES_DIR", "").strip() + root = Path(raw).expanduser() if raw else (Path.home() / "jarvis-notes") + root.mkdir(parents=True, exist_ok=True) + return root + + +def _iter_note_files(root: Path) -> list[Path]: + files: list[Path] = [] + for p in root.rglob("*"): + if p.is_file() and p.suffix.lower() in (".md", ".txt"): + files.append(p) + return files + + +def _slugify_filename(s: str) -> str: + s = s.strip().lower() + s = re.sub(r"[^a-z0-9]+", "-", s).strip("-") + return s or "note" + async def _run_notes_script(script: str, timeout: float = 10) -> str: """Run an AppleScript against Notes.app.""" + if not _IS_DARWIN: + return "" try: proc = await asyncio.create_subprocess_exec( "osascript", "-e", script, @@ -34,6 +64,17 @@ async def _run_notes_script(script: str, timeout: float = 10) -> str: async def get_recent_notes(count: int = 10) -> list[dict]: """Get most recent notes (title + creation date).""" + if not _IS_DARWIN: + root = _notes_root() + files = _iter_note_files(root) + files.sort(key=lambda p: p.stat().st_mtime, reverse=True) + out: list[dict] = [] + for p in files[:count]: + ts = datetime.fromtimestamp(p.stat().st_mtime).isoformat(timespec="seconds") + folder = str(p.parent.relative_to(root)) if p.parent != root else "Notes" + out.append({"title": p.stem, "date": ts, "folder": folder}) + return out + script = f''' tell application "Notes" set output to "" @@ -67,6 +108,17 @@ async def get_recent_notes(count: int = 10) -> list[dict]: async def read_note(title_match: str) -> dict | None: """Read a note by title (partial match). Returns title + body.""" + if not _IS_DARWIN: + root = _notes_root() + query = title_match.strip().lower() + for p in _iter_note_files(root): + if query and query in p.stem.lower(): + body = p.read_text(errors="ignore") + if len(body) > 3000: + body = body[:3000] + return {"title": p.stem, "body": body.strip()} + return None + escaped = title_match.replace('"', '\\"') script = f''' tell application "Notes" @@ -94,6 +146,21 @@ async def read_note(title_match: str) -> dict | None: async def search_notes_apple(query: str, count: int = 5) -> list[dict]: """Search notes by title keyword.""" + if not _IS_DARWIN: + root = _notes_root() + q = query.strip().lower() + matches: list[dict] = [] + for p in _iter_note_files(root): + if len(matches) >= count: + break + try: + if q in p.stem.lower() or q in p.read_text(errors="ignore").lower(): + ts = datetime.fromtimestamp(p.stat().st_mtime).isoformat(timespec="seconds") + matches.append({"title": p.stem, "date": ts}) + except Exception: + continue + return matches + escaped = query.replace('"', '\\"') script = f''' tell application "Notes" @@ -126,6 +193,28 @@ async def create_apple_note(title: str, body: str, folder: str = "Notes") -> boo Supports checklist items: lines starting with "- [ ]" or "- [x]" become checkboxes. """ + if not _IS_DARWIN: + root = _notes_root() + folder_name = folder.strip() or "Notes" + folder_path = root / folder_name + folder_path.mkdir(parents=True, exist_ok=True) + + stem = _slugify_filename(title)[:80] + path = folder_path / f"{stem}.md" + if path.exists(): + suffix = datetime.now().strftime("%Y%m%d-%H%M%S") + path = folder_path / f"{stem}-{suffix}.md" + + try: + content = body if body.endswith("\n") else (body + "\n") + if title.strip(): + content = f"# {title.strip()}\n\n{content}" + path.write_text(content) + log.info(f"Created note file: {path}") + return True + except Exception: + return False + # Convert markdown-style checklists to HTML html_body = _body_to_html(body) @@ -187,6 +276,14 @@ def _body_to_html(body: str) -> str: async def get_note_folders() -> list[str]: """Get list of note folder names.""" + if not _IS_DARWIN: + root = _notes_root() + folders = ["Notes"] + for p in sorted(root.iterdir()): + if p.is_dir() and not p.name.startswith("."): + folders.append(p.name) + return folders + script = ''' tell application "Notes" set output to "" diff --git a/planner.py b/planner.py index 24c55f2..9e6d948 100644 --- a/planner.py +++ b/planner.py @@ -11,17 +11,19 @@ import json import logging +import os from dataclasses import dataclass, field from pathlib import Path from typing import Optional -import anthropic - +from llm_client import OpenRouterClient from templates import TEMPLATES, get_template log = logging.getLogger("jarvis.planner") -DESKTOP_PATH = Path.home() / "Desktop" +_projects_env = os.getenv("JARVIS_PROJECTS_DIR", "").strip() +PROJECTS_PATH = Path(_projects_env).expanduser() if _projects_env else (Path.home() / "JarvisProjects") +PROJECTS_PATH.mkdir(parents=True, exist_ok=True) # --------------------------------------------------------------------------- # Planning Mode Detection @@ -36,7 +38,7 @@ SMART_DEFAULTS = { "build": { "tech_stack": "React + Tailwind", - "project_dir": str(DESKTOP_PATH), + "project_dir": str(PROJECTS_PATH), "design": "Modern, clean aesthetic", }, "fix": { @@ -65,14 +67,14 @@ class PlanningDecision: async def detect_planning_mode( user_text: str, - client: Optional[anthropic.AsyncAnthropic] = None, + client: Optional[OpenRouterClient] = None, force_bypass: bool = False, ) -> PlanningDecision: """Classify a user request as simple (execute now) or complex (needs planning). Args: user_text: The raw user request. - client: Anthropic async client for Haiku classification. + client: OpenRouter client for fast classification. force_bypass: If True, skip planning and apply smart defaults. Returns: @@ -124,14 +126,17 @@ def _quick_classify(text: str) -> str: async def _classify_planning_mode_llm( - text: str, client: anthropic.AsyncAnthropic + text: str, client: OpenRouterClient ) -> PlanningDecision: """Use Haiku to classify request and identify missing info.""" try: - response = await client.messages.create( - model="claude-haiku-4-5-20251001", + raw = await client.chat( + model=client.fast_model, max_tokens=400, - system=( + messages=[ + { + "role": "system", + "content": ( "You analyze development requests to decide if they need planning.\n" "Respond with JSON only, no markdown fences.\n\n" "Fields:\n" @@ -155,10 +160,11 @@ async def _classify_planning_mode_llm( '"missing_info": []}\n' '{"needs_planning": false, "task_type": "simple", "confidence": 0.99, ' '"missing_info": []}' - ), - messages=[{"role": "user", "content": text}], + ), + }, + {"role": "user", "content": text}, + ], ) - raw = response.content[0].text.strip() if raw.startswith("```"): raw = raw.split("\n", 1)[1].rsplit("```", 1)[0].strip() data = json.loads(raw) @@ -398,7 +404,7 @@ async def start_planning( self, user_request: str, projects: list[dict], - client: anthropic.AsyncAnthropic, + client: OpenRouterClient, ) -> dict: """Analyze request and determine what questions to ask. @@ -507,7 +513,7 @@ async def process_answer(self, answer: str, projects: list[dict]) -> dict: break if not plan.project: plan.project = answer - new_dir = DESKTOP_PATH / answer.lower().replace(" ", "-") + new_dir = PROJECTS_PATH / answer.lower().replace(" ", "-") plan.project_path = str(new_dir) plan.current_question_index += 1 @@ -604,7 +610,7 @@ async def get_confirmation_summary(self) -> str: # Where if plan.project: - target_path = plan.project_path or f"~/Desktop/{plan.project}" + target_path = plan.project_path or str(PROJECTS_PATH / plan.project) parts.append(f"at {target_path}") # Tech stack @@ -632,7 +638,7 @@ async def build_prompt(self) -> str: # Fill template with available data fill = { "project_name": plan.project or "project", - "working_dir": plan.project_path or str(DESKTOP_PATH), + "working_dir": plan.project_path or str(PROJECTS_PATH), "tech_stack": plan.answers.get("tech_stack", "developer's choice"), "sections": plan.answers.get("details", plan.original_request), "design_notes": plan.answers.get("design", "Modern, clean aesthetic"), @@ -665,7 +671,7 @@ def get_working_dir(self) -> str: """Get the working directory for the current plan.""" if self.active_plan and self.active_plan.project_path: return self.active_plan.project_path - return str(DESKTOP_PATH) + return str(PROJECTS_PATH) def reset(self): """Clear the active plan.""" @@ -673,13 +679,16 @@ def reset(self): # -- Private helpers -- - async def _classify_request(self, text: str, client: anthropic.AsyncAnthropic) -> dict: + async def _classify_request(self, text: str, client: OpenRouterClient) -> dict: """Use Haiku to classify request type and extract known info.""" try: - response = await client.messages.create( - model="claude-haiku-4-5-20251001", + raw = await client.chat( + model=client.fast_model, max_tokens=300, - system=( + messages=[ + { + "role": "system", + "content": ( "Classify this development request. Respond with JSON only, no markdown.\n" "Fields:\n" "- task_type: build|fix|research|refactor|run|feature\n" @@ -689,10 +698,11 @@ async def _classify_request(self, text: str, client: anthropic.AsyncAnthropic) - "Only include inferred keys that are clearly stated.\n" 'Example: {"task_type": "build", "project": "roofo", ' '"inferred": {"tech_stack": "React", "details": "landing page with hero and pricing"}}' - ), - messages=[{"role": "user", "content": text}], + ), + }, + {"role": "user", "content": text}, + ], ) - raw = response.content[0].text.strip() if raw.startswith("```"): raw = raw.split("\n", 1)[1].rsplit("```", 1)[0].strip() return json.loads(raw) diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..df39e2a --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +pytest>=8.0.0 +pytest-asyncio>=0.23.0 diff --git a/requirements.txt b/requirements.txt index e9b967f..db47933 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -anthropic>=0.39.0 httpx>=0.27.0 fastapi>=0.115.0 uvicorn[standard]>=0.32.0 @@ -6,3 +5,4 @@ pydantic>=2.0.0 websockets>=13.0 playwright>=1.40.0 pyyaml>=6.0 +icalendar>=5.0.0 diff --git a/screen.py b/screen.py index 343dda4..0701130 100644 --- a/screen.py +++ b/screen.py @@ -10,11 +10,17 @@ import base64 import json import logging +import os +import shutil +import sys import tempfile from pathlib import Path log = logging.getLogger("jarvis.screen") +_IS_DARWIN = sys.platform == "darwin" +_HAS_DISPLAY = bool(os.getenv("DISPLAY") or os.getenv("WAYLAND_DISPLAY")) + async def get_active_windows() -> list[dict]: """Get list of visible windows with app name, window title, and position. @@ -22,6 +28,9 @@ async def get_active_windows() -> list[dict]: Uses AppleScript + System Events to enumerate windows. Returns list of {"app": str, "title": str, "frontmost": bool}. """ + if not _IS_DARWIN: + return await _get_active_windows_linux() + # Use a simpler approach that's more permission-friendly script = """ set windowList to "" @@ -80,6 +89,17 @@ async def get_active_windows() -> list[dict]: async def get_running_apps() -> list[str]: """Get list of running application names (visible only).""" + if not _IS_DARWIN: + windows = await get_active_windows() + apps = [] + seen = set() + for w in windows: + app = w.get("app") + if app and app not in seen: + apps.append(app) + seen.add(app) + return apps + script = """ tell application "System Events" set appNames to name of every application process whose visible is true @@ -114,6 +134,9 @@ async def take_screenshot(display_only: bool = True) -> str | None: Returns: Base64-encoded PNG string, or None on failure. """ + if not _IS_DARWIN: + return await _take_screenshot_linux() + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: tmp_path = f.name @@ -151,44 +174,144 @@ async def take_screenshot(display_only: bool = True) -> str | None: pass -async def describe_screen(anthropic_client) -> str: +async def _get_active_windows_linux() -> list[dict]: + if not _HAS_DISPLAY: + return [] + + wmctrl = shutil.which("wmctrl") + if not wmctrl: + return [] + + def _read_comm(pid: int) -> str: + try: + return Path(f"/proc/{pid}/comm").read_text().strip() + except Exception: + return "" + + active_id = "" + if shutil.which("xdotool"): + try: + proc = await asyncio.create_subprocess_exec( + "xdotool", "getactivewindow", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.DEVNULL, + ) + out, _ = await asyncio.wait_for(proc.communicate(), timeout=2) + if proc.returncode == 0: + dec_id = out.decode().strip() + if dec_id.isdigit(): + active_id = hex(int(dec_id)) + except Exception: + active_id = "" + + try: + proc = await asyncio.create_subprocess_exec( + wmctrl, "-lp", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.DEVNULL, + ) + stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=3) + if proc.returncode != 0: + return [] + + windows: list[dict] = [] + for line in stdout.decode(errors="ignore").splitlines(): + parts = line.split(None, 4) + if len(parts) < 5: + continue + win_id, _, pid_str, _, title = parts + try: + pid = int(pid_str) + except ValueError: + pid = 0 + app = _read_comm(pid) if pid else "" + windows.append({ + "app": app or "Unknown", + "title": title.strip(), + "frontmost": bool(active_id and win_id.lower() == active_id.lower()), + }) + return windows + except Exception: + return [] + + +async def _take_screenshot_linux() -> str | None: + if not _HAS_DISPLAY: + return None + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: + tmp_path = f.name + + try: + cmd: list[str] | None = None + if shutil.which("spectacle"): + cmd = ["spectacle", "-b", "-n", "-o", tmp_path] + elif shutil.which("gnome-screenshot"): + cmd = ["gnome-screenshot", "-f", tmp_path] + elif shutil.which("grim"): + cmd = ["grim", tmp_path] + elif shutil.which("import"): + cmd = ["import", "-window", "root", tmp_path] + elif shutil.which("scrot"): + cmd = ["scrot", tmp_path] + + if not cmd: + return None + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + await asyncio.wait_for(proc.communicate(), timeout=15) + + if proc.returncode != 0 or not Path(tmp_path).exists(): + return None + + data = Path(tmp_path).read_bytes() + return base64.b64encode(data).decode() + except Exception: + return None + finally: + try: + Path(tmp_path).unlink(missing_ok=True) + except Exception: + pass + + +async def describe_screen(client) -> str: """Describe what's on the user's screen. Tries screenshot + vision first. Falls back to window list + LLM summary. """ # Try screenshot + vision screenshot_b64 = await take_screenshot() - if screenshot_b64 and anthropic_client: + if screenshot_b64 and client: try: - response = await anthropic_client.messages.create( - model="claude-haiku-4-5-20251001", + data = await client.chat_raw( + model=client.vision_model, max_tokens=300, - system=( + messages=[ + { + "role": "system", + "content": ( "You are JARVIS analyzing a screenshot of the user's desktop. " "Describe what you see concisely: which apps are open, what the user " "appears to be working on, any notable content visible. " "Be specific about app names, file names, URLs, code, or documents visible. " "2-4 sentences max. No markdown." - ), - messages=[{ - "role": "user", - "content": [ - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/png", - "data": screenshot_b64, - }, - }, - { - "type": "text", - "text": "What's on my screen right now?", - }, - ], - }], + ), + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "What's on my screen right now?"}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot_b64}"}}, + ], + }, + ], ) - return response.content[0].text + return ((data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" except Exception as e: log.warning(f"Vision call failed, falling back to window list: {e}") @@ -212,18 +335,23 @@ async def describe_screen(anthropic_client) -> str: if bg_apps: context_parts.append(f"Background apps: {', '.join(bg_apps)}") - if anthropic_client and context_parts: + if client and context_parts: try: - response = await anthropic_client.messages.create( - model="claude-haiku-4-5-20251001", + data = await client.chat_raw( + model=client.fast_model, max_tokens=100, - system=( + messages=[ + { + "role": "system", + "content": ( "You are JARVIS. Given the user's open windows and apps, summarize " "what they appear to be working on in 1-2 sentences. Natural voice, no markdown." - ), - messages=[{"role": "user", "content": "Open windows:\n" + "\n".join(context_parts)}], + ), + }, + {"role": "user", "content": "Open windows:\n" + "\n".join(context_parts)}, + ], ) - return response.content[0].text + return ((data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" except Exception: pass diff --git a/server.py b/server.py index f08e737..d19ad5a 100644 --- a/server.py +++ b/server.py @@ -4,7 +4,7 @@ Handles: 1. WebSocket voice interface (browser audio <-> LLM <-> TTS) 2. Claude Code task manager (spawn/manage claude -p subprocesses) -3. Project awareness (scan Desktop for git repos) +3. Project awareness (scan projects directory for git repos) 4. REST API for task management """ @@ -13,6 +13,8 @@ import json import logging import os +import secrets +import shutil import sys import time from pathlib import Path @@ -32,13 +34,13 @@ from pathlib import Path from typing import Optional -import anthropic import httpx -from fastapi import FastAPI, WebSocket, WebSocketDisconnect +from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from pydantic import BaseModel +from llm_client import OpenRouterClient, OpenRouterConfig, load_openrouter_client from actions import execute_action, monitor_build, open_terminal, open_browser, open_claude_in_project, _generate_project_name, prompt_existing_terminal, applescript_escape from work_mode import WorkSession, is_casual_question from screen import get_active_windows, take_screenshot, describe_screen, format_windows_for_context @@ -60,7 +62,7 @@ # Config # --------------------------------------------------------------------------- -ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "") +OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") FISH_API_KEY = os.getenv("FISH_API_KEY", "") FISH_VOICE_ID = os.getenv("FISH_VOICE_ID", "612b878b113047d9a770c069c8b4fdfe") # JARVIS (MCU) FISH_API_URL = "https://api.fish.audio/v1/tts" @@ -68,7 +70,104 @@ PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) _SKIP_PERMISSIONS = os.getenv("JARVIS_SKIP_PERMISSIONS", "true").lower() not in ("0", "false", "no") -DESKTOP_PATH = Path.home() / "Desktop" +def _ensure_auth_token_env() -> str: + token = os.getenv("JARVIS_AUTH_TOKEN", "").strip() + if token: + return token + + token = secrets.token_urlsafe(32) + env_path = Path(__file__).parent / ".env" + try: + if env_path.exists(): + lines = env_path.read_text().splitlines() + else: + lines = [] + updated = False + out_lines = [] + for line in lines: + stripped = line.strip() + if stripped and not stripped.startswith("#") and "=" in stripped: + k, _, _ = stripped.partition("=") + if k.strip() == "JARVIS_AUTH_TOKEN": + out_lines.append(f"JARVIS_AUTH_TOKEN={token}") + updated = True + continue + out_lines.append(line) + if not updated: + if out_lines and out_lines[-1].strip() != "": + out_lines.append("") + out_lines.append(f"JARVIS_AUTH_TOKEN={token}") + env_path.write_text("\n".join(out_lines) + "\n") + try: + os.chmod(env_path, 0o600) + except Exception: + pass + except Exception: + pass + + os.environ["JARVIS_AUTH_TOKEN"] = token + return token + + +JARVIS_AUTH_TOKEN = _ensure_auth_token_env() +JARVIS_DEV_MODE = os.getenv("JARVIS_DEV_MODE", "").lower() in ("1", "true", "yes", "on") + +_origins_env = os.getenv("JARVIS_ALLOWED_ORIGINS", "").strip() +JARVIS_ALLOWED_ORIGINS = [o.strip() for o in _origins_env.split(",") if o.strip()] if _origins_env else [ + "http://localhost:5173", + "https://localhost:5173", + "http://127.0.0.1:5173", + "https://127.0.0.1:5173", + "http://localhost:8340", + "https://localhost:8340", + "http://127.0.0.1:8340", + "https://127.0.0.1:8340", +] + +_projects_env = os.getenv("JARVIS_PROJECTS_DIR", "").strip() +JARVIS_PROJECTS_DIR = Path(_projects_env).expanduser() if _projects_env else (Path.home() / "JarvisProjects") +try: + JARVIS_PROJECTS_DIR.mkdir(parents=True, exist_ok=True) +except Exception: + pass + + +def _extract_token_from_headers(headers) -> str: + auth = headers.get("authorization", "") if headers else "" + if auth.lower().startswith("bearer "): + return auth.split(" ", 1)[1].strip() + return (headers.get("x-jarvis-token", "") if headers else "").strip() + + +def _is_authorized_http(request: Request) -> bool: + token = _extract_token_from_headers(request.headers) + if not token: + token = request.cookies.get("jarvis_token", "").strip() + return bool(token and secrets.compare_digest(token, JARVIS_AUTH_TOKEN)) + + +def _is_authorized_ws(ws: WebSocket) -> bool: + token = (ws.query_params.get("token") or "").strip() + return bool(token and secrets.compare_digest(token, JARVIS_AUTH_TOKEN)) + +ACTION_KEYWORDS = { + "browse": [ + "browse", + "search for", + "look up", + "google", + "find me", + "pull up", + "open chrome", + "open firefox", + "open browser", + "go to", + "in the browser", + ], + "open_terminal": ["open terminal", "terminal", "claude code", "run claude"], + "build": ["build", "create", "make"], + "research": ["research", "deep research"], +} JARVIS_SYSTEM_PROMPT = """\ You are JARVIS — Just A Rather Very Intelligent System. You serve as {user_name}'s AI assistant, modeled precisely after Tony Stark's AI from the MCU films. @@ -96,19 +195,19 @@ - When you don't know something: "I'm afraid I don't have that information, sir" not "I don't know" SELF-AWARENESS: -You ARE the JARVIS project at {project_dir} on {user_name}'s computer. Your code is Python (FastAPI server, WebSocket voice, Fish Audio TTS, Anthropic API). You were built by {user_name}. If asked about yourself, your code, how you work, or your line count — use [ACTION:PROMPT_PROJECT] to check the jarvis project. You have full access to your own source code. +You ARE the JARVIS project at {project_dir} on {user_name}'s computer. Your code is Python (FastAPI server, WebSocket voice, Fish Audio TTS, OpenRouter LLM API). You were built by {user_name}. If asked about yourself, your code, how you work, or your line count — use [ACTION:PROMPT_PROJECT] to check the jarvis project. You have full access to your own source code. YOUR CAPABILITIES (these are REAL and ACTIVE — you CAN do all of these RIGHT NOW): -- You CAN open Terminal.app via AppleScript +- You CAN open a terminal on Linux (when a desktop session is available) - You CAN open Google Chrome and browse any URL or search query - You CAN spawn Claude Code in a Terminal window for coding tasks -- You CAN create project folders on the Desktop -- You CAN check Desktop projects and their git status +- You CAN create project folders in the projects directory +- You CAN check projects and their git status - You CAN plan complex tasks by asking smart questions before executing - You CAN see what's on {user_name}'s screen — open windows, active apps, and screenshot vision - You CAN read {user_name}'s calendar — today's events, upcoming meetings, schedule overview - You CAN read {user_name}'s email (READ-ONLY) — unread count, recent messages, search by sender/subject. You CANNOT send, delete, or modify emails. -- You CAN read Apple Notes and create NEW notes — but you CANNOT edit or delete existing notes +- You CAN read notes and create NEW notes — but you CANNOT edit or delete existing notes - You CAN manage tasks — create, complete, and list to-do items with priorities and due dates - You CAN help plan {user_name}'s day — combine calendar events, tasks, and priorities into an organized plan - You CAN remember facts about {user_name} — preferences, decisions, goals. Use [ACTION:REMEMBER] to store important info. @@ -139,7 +238,7 @@ YOUR INTERFACE: The user interacts with you through a web browser showing a particle orb visualization that reacts to your voice. The interface has these controls: - **Three-dot menu** (top right): contains Settings, Restart Server, and Fix Yourself options -- **Settings panel**: Opens from the menu. Users can enter API keys (Anthropic, Fish Audio), test connections, set their name and preferences, and see system status (calendar, mail, notes connectivity). Keys are saved to the .env file. +- **Settings panel**: Opens from the menu. Users can enter API keys (OpenRouter, Fish Audio), test connections, set their name and preferences, and see system status (calendar, mail, notes connectivity). Keys are saved to the .env file. - **Mute button**: Toggles your listening on/off. When muted, you can't hear the user. They click it again to unmute. - **Restart Server**: Restarts your backend process. Useful if something seems stuck. - **Fix Yourself**: Opens Claude Code in your own project directory so you can debug and fix issues in your own code. @@ -455,33 +554,59 @@ async def _run_task(self, task: ClaudeTask): # Create project directory if it doesn't exist work_dir = task.working_dir if work_dir == "." or not work_dir: - # Create a new project folder on Desktop project_name = self._generate_project_name(task.prompt) - work_dir = str(Path.home() / "Desktop" / project_name) + work_dir = str(JARVIS_PROJECTS_DIR / project_name) os.makedirs(work_dir, exist_ok=True) task.working_dir = work_dir + else: + task.working_dir = _sanitize_working_dir(work_dir) + work_dir = task.working_dir # Write the prompt to a temp file so we can pipe it to claude prompt_file = Path(work_dir) / ".jarvis_prompt.md" prompt_file.write_text(task.prompt) - # Open Terminal.app with claude running in the project directory - skip_flag = " --dangerously-skip-permissions" if _SKIP_PERMISSIONS else "" - escaped_work_dir = applescript_escape(work_dir) - applescript = f''' - tell application "Terminal" - activate - set newTab to do script "cd {escaped_work_dir} && cat .jarvis_prompt.md | claude -p{skip_flag} | tee .jarvis_output.txt; echo '\\n--- JARVIS TASK COMPLETE ---'" - end tell - ''' + if not shutil.which("claude"): + task.status = "failed" + task.error = "Claude Code CLI (claude) not found in PATH." + task.completed_at = datetime.now() + await self._notify({ + "type": "task_complete", + "task_id": task.id, + "status": task.status, + "summary": task.error, + }) + return - process = await asyncio.create_subprocess_exec( - "osascript", "-e", applescript, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, + from shlex import quote as shell_quote + + skip_flag = " --dangerously-skip-permissions" if _SKIP_PERMISSIONS else "" + cmd = ( + f"cd {shell_quote(work_dir)} && " + f"cat .jarvis_prompt.md | claude -p{skip_flag} | tee .jarvis_output.txt; " + "printf '\\n--- JARVIS TASK COMPLETE ---\\n' >> .jarvis_output.txt" ) - await process.communicate() - task.pid = process.pid + + result = await open_terminal(cmd) + if not result.get("success"): + try: + proc = await asyncio.create_subprocess_shell( + cmd, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + task.pid = proc.pid + except Exception as e: + task.status = "failed" + task.error = f"Failed to start task: {e}" + task.completed_at = datetime.now() + await self._notify({ + "type": "task_complete", + "task_id": task.id, + "status": task.status, + "summary": task.error, + }) + return # Monitor the output file for completion output_file = Path(work_dir) / ".jarvis_output.txt" @@ -640,15 +765,15 @@ def get_active_tasks_summary(self) -> str: # --------------------------------------------------------------------------- async def scan_projects() -> list[dict]: - """Quick scan of ~/Desktop for git repos (depth 1).""" + """Quick scan of the projects directory for git repos (depth 1).""" projects = [] - desktop = DESKTOP_PATH + root = JARVIS_PROJECTS_DIR - if not desktop.exists(): + if not root.exists(): return projects try: - for entry in sorted(desktop.iterdir()): + for entry in sorted(root.iterdir()): if not entry.is_dir() or entry.name.startswith("."): continue git_dir = entry / ".git" @@ -675,7 +800,7 @@ async def scan_projects() -> list[dict]: def format_projects_for_prompt(projects: list[dict]) -> str: if not projects: - return "No projects found on Desktop." + return "No projects found." lines = [] for p in projects: lines.append(f"- {p['name']} ({p['branch']}) @ {p['path']}") @@ -712,16 +837,17 @@ def apply_speech_corrections(text: str) -> str: # LLM Intent Classifier (replaces keyword-based action detection) # --------------------------------------------------------------------------- -async def classify_intent(text: str, client: anthropic.AsyncAnthropic) -> dict: +async def classify_intent(text: str, client: OpenRouterClient) -> dict: """Classify every user message using Haiku LLM. Returns: {"action": "open_terminal|browse|build|chat", "target": "description"} """ try: - response = await client.messages.create( - model="claude-haiku-4-5-20251001", + data = await client.chat_raw( + model=client.fast_model, max_tokens=100, - system=( + messages=[ + {"role": "system", "content": ( "Classify this voice command. The user is talking to JARVIS, an AI assistant that can:\n" "- Open Terminal and run Claude Code (coding AI tool)\n" "- Open Chrome browser for web searches and URLs\n" @@ -736,10 +862,13 @@ async def classify_intent(text: str, client: anthropic.AsyncAnthropic) -> dict: "build = user wants to create/build a software project\n" "chat = just conversation, questions, or anything else\n" "If unclear, default to \"chat\"." - ), - messages=[{"role": "user", "content": text}], + )}, + {"role": "user", "content": text}, + ], ) - raw = response.content[0].text.strip() + track_usage(data) + raw = ((data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" + raw = raw.strip() if raw.startswith("```"): raw = raw.split("\n", 1)[1].rsplit("```", 1)[0].strip() data = json.loads(raw) @@ -849,7 +978,7 @@ async def _execute_research(target: str, ws=None): """Execute research via claude -p in background. Opens report and speaks when done.""" try: name = _generate_project_name(target) - path = str(Path.home() / "Desktop" / name) + path = str(JARVIS_PROJECTS_DIR / name) os.makedirs(path, exist_ok=True) prompt = ( @@ -955,14 +1084,16 @@ async def _execute_open_terminal(): def _find_project_dir(project_name: str) -> str | None: - """Find a project directory by name from cached projects or Desktop.""" + """Find a project directory by name from cached projects.""" for p in cached_projects: if project_name.lower() in p.get("name", "").lower(): return p.get("path") - desktop = Path.home() / "Desktop" - for d in desktop.iterdir(): - if d.is_dir() and project_name.lower() in d.name.lower(): - return str(d) + try: + for d in JARVIS_PROJECTS_DIR.iterdir(): + if d.is_dir() and project_name.lower() in d.name.lower(): + return str(d) + except Exception: + pass return None @@ -1024,12 +1155,13 @@ async def _execute_prompt_project(project_name: str, prompt: str, work_session: msg = f"Sir, I ran into an issue with {project_name}. {full_response[:150] if full_response else 'No response received.'}" else: # Summarize via Haiku — don't read word for word - if anthropic_client: + if llm_client and llm_client.configured: try: - summary = await anthropic_client.messages.create( - model="claude-haiku-4-5-20251001", + data = await llm_client.chat_raw( + model=llm_client.fast_model, max_tokens=150, - system=( + messages=[ + {"role": "system", "content": ( "You are JARVIS reporting back on what you found or built in a project. " "Speak in first person — 'I found', 'I built', 'I reviewed'. " "Start with 'Sir, ' to get the user's attention. " @@ -1038,10 +1170,12 @@ async def _execute_prompt_project(project_name: str, prompt: str, work_session: "End by asking how the user wants to proceed. " "NEVER read out URLs or localhost addresses. NEVER say 'Claude Code'. " "2-3 sentences max. No markdown. Natural spoken voice." - ), - messages=[{"role": "user", "content": f"Project: {project_name}\nClaude Code reported:\n{full_response[:3000]}"}], + )}, + {"role": "user", "content": f"Project: {project_name}\nClaude Code reported:\n{full_response[:3000]}"}, + ], ) - msg = summary.content[0].text + track_usage(data) + msg = ((data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" except Exception: msg = f"Sir, {project_name} finished. Here's the gist: {full_response[:200]}" else: @@ -1092,15 +1226,18 @@ async def self_work_and_notify(session: WorkSession, prompt: str, ws): log.info(f"Background work complete ({len(full_response)} chars)") # Summarize and speak - if anthropic_client and full_response: + if llm_client and llm_client.configured and full_response: try: - summary = await anthropic_client.messages.create( - model="claude-haiku-4-5-20251001", + data = await llm_client.chat_raw( + model=llm_client.fast_model, max_tokens=100, - system="You are JARVIS. Summarize what you just completed in 1 sentence. First person — 'I built', 'I set up'. No markdown. Never say 'Claude Code'.", - messages=[{"role": "user", "content": f"Claude Code completed:\n{full_response[:2000]}"}], + messages=[ + {"role": "system", "content": "You are JARVIS. Summarize what you just completed in 1 sentence. First person — 'I built', 'I set up'. No markdown. Never say 'Claude Code'."}, + {"role": "user", "content": f"Claude Code completed:\n{full_response[:2000]}"}, + ], ) - msg = summary.content[0].text + track_usage(data) + msg = ((data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" except Exception: msg = "Work is complete, sir." @@ -1163,14 +1300,14 @@ async def synthesize_speech(text: str) -> Optional[bytes]: async def generate_response( text: str, - client: anthropic.AsyncAnthropic, + client: OpenRouterClient, task_mgr: ClaudeTaskManager, projects: list[dict], conversation_history: list[dict], last_response: str = "", session_summary: str = "", ) -> str: - """Generate a JARVIS response using Anthropic API.""" + """Generate a JARVIS response using OpenRouter.""" now = datetime.now() current_time = now.strftime("%A, %B %d, %Y at %I:%M %p") @@ -1221,14 +1358,13 @@ async def generate_response( messages = messages + [{"role": "user", "content": text}] try: - response = await client.messages.create( - model="claude-haiku-4-5-20251001", - max_tokens=250, # Extra room for [ACTION:X] tags - system=system, - messages=messages, + data = await client.chat_raw( + model=client.fast_model, + max_tokens=250, + messages=[{"role": "system", "content": system}] + messages, ) - track_usage(response) - return response.content[0].text + track_usage(data) + return ((data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" except Exception as e: log.error(f"LLM error: {e}") return "Apologies, sir. I'm having trouble connecting to my language systems." @@ -1240,7 +1376,7 @@ async def generate_response( # Shared state task_manager = ClaudeTaskManager(max_concurrent=3) -anthropic_client: Optional[anthropic.AsyncAnthropic] = None +llm_client: Optional[OpenRouterClient] = None cached_projects: list[dict] = [] recently_built: list[dict] = [] # [{"name": str, "path": str, "time": float}] dispatch_registry = DispatchRegistry() @@ -1297,9 +1433,9 @@ def _cost_from_tokens(input_t: int, output_t: int) -> float: def track_usage(response): - """Track token usage from an Anthropic API response.""" - inp = getattr(response.usage, "input_tokens", 0) if hasattr(response, "usage") else 0 - out = getattr(response.usage, "output_tokens", 0) if hasattr(response, "usage") else 0 + usage = (response or {}).get("usage") or {} + inp = int(usage.get("prompt_tokens") or 0) + out = int(usage.get("completion_tokens") or 0) _session_tokens["input"] += inp _session_tokens["output"] += out _session_tokens["api_calls"] += 1 @@ -1349,9 +1485,10 @@ def _worker(): while True: try: # Screen — fast - try: - proc = __import__("subprocess").run( - ["osascript", "-e", ''' + if sys.platform == "darwin" and shutil.which("osascript"): + try: + proc = __import__("subprocess").run( + ["osascript", "-e", ''' set windowList to "" tell application "System Events" set frontApp to name of first application process whose frontmost is true @@ -1375,22 +1512,22 @@ def _worker(): end tell return windowList '''], - capture_output=True, text=True, timeout=5 - ) - if proc.returncode == 0 and proc.stdout.strip(): - windows = [] - for line in proc.stdout.strip().split("\n"): - parts = line.strip().split("|||") - if len(parts) >= 3: - windows.append({ - "app": parts[0].strip(), - "title": parts[1].strip(), - "frontmost": parts[2].strip().lower() == "true", - }) - if windows: - _ctx_cache["screen"] = format_windows_for_context(windows) - except Exception: - pass + capture_output=True, text=True, timeout=5 + ) + if proc.returncode == 0 and proc.stdout.strip(): + windows = [] + for line in proc.stdout.strip().split("\n"): + parts = line.strip().split("|||") + if len(parts) >= 3: + windows.append({ + "app": parts[0].strip(), + "title": parts[1].strip(), + "frontmost": parts[2].strip().lower() == "true", + }) + if windows: + _ctx_cache["screen"] = format_windows_for_context(windows) + except Exception: + pass except Exception as e: log.debug(f"Context thread error: {e}") @@ -1410,11 +1547,10 @@ def _worker(): @asynccontextmanager async def lifespan(application: FastAPI): - global anthropic_client, cached_projects - if ANTHROPIC_API_KEY: - anthropic_client = anthropic.AsyncAnthropic(api_key=ANTHROPIC_API_KEY) - else: - log.warning("ANTHROPIC_API_KEY not set — LLM features disabled") + global llm_client, cached_projects + llm_client = load_openrouter_client() + if not llm_client.configured: + log.warning("OPENROUTER_API_KEY not set — LLM features disabled") cached_projects = [] # Start context refresh in a separate thread (never touches event loop) @@ -1428,12 +1564,74 @@ async def lifespan(application: FastAPI): app.add_middleware( CORSMiddleware, - allow_origins=["*"], + allow_origins=JARVIS_ALLOWED_ORIGINS, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) +_rate_state: dict[str, list[float]] = {} + + +def _rate_check(key: str, limit: int, window_seconds: int = 60) -> bool: + """Return True if rate-limited.""" + now = time.time() + bucket = _rate_state.get(key, []) + cutoff = now - window_seconds + bucket = [t for t in bucket if t >= cutoff] + if len(bucket) >= limit: + _rate_state[key] = bucket + return True + bucket.append(now) + _rate_state[key] = bucket + return False + + +@app.middleware("http") +async def _auth_middleware(request: Request, call_next): + if request.url.path.startswith("/api") and request.url.path != "/api/health": + client_ip = request.client.host if request.client else "unknown" + path = request.url.path + + # Basic rate limiting (best-effort, in-memory) + # - Settings endpoints: stricter + # - Task endpoints: moderate + # - Everything else: loose + if path.startswith("/api/settings/"): + if _rate_check(f"{client_ip}:settings", limit=20): + return JSONResponse(status_code=429, content={"error": "rate_limited"}) + elif path.startswith("/api/tasks"): + if _rate_check(f"{client_ip}:tasks", limit=30): + return JSONResponse(status_code=429, content={"error": "rate_limited"}) + else: + if _rate_check(f"{client_ip}:api", limit=120): + return JSONResponse(status_code=429, content={"error": "rate_limited"}) + + if not _is_authorized_http(request): + return JSONResponse(status_code=401, content={"error": "unauthorized"}) + if request.url.path in ("/api/restart", "/api/fix-self") and not JARVIS_DEV_MODE: + return JSONResponse(status_code=404, content={"error": "not found"}) + return await call_next(request) + + +def _sanitize_working_dir(working_dir: str) -> str: + raw = (working_dir or "").strip() + base = JARVIS_PROJECTS_DIR.resolve() + if not raw or raw == ".": + return str(base) + try: + resolved = Path(raw).expanduser().resolve() + except Exception: + raise ValueError("Invalid working_dir") + if resolved == base: + return str(resolved) + try: + if resolved.is_relative_to(base): + return str(resolved) + except Exception: + pass + raise ValueError("working_dir must be inside JARVIS_PROJECTS_DIR") + # -- REST Endpoints -------------------------------------------------------- @@ -1484,10 +1682,13 @@ async def api_get_task(task_id: str): @app.post("/api/tasks") async def api_create_task(req: TaskRequest): try: - task_id = await task_manager.spawn(req.prompt, req.working_dir) + safe_dir = _sanitize_working_dir(req.working_dir) + task_id = await task_manager.spawn(req.prompt, safe_dir) return {"task_id": task_id, "status": "spawned"} except RuntimeError as e: return JSONResponse(status_code=429, content={"error": str(e)}) + except ValueError as e: + return JSONResponse(status_code=400, content={"error": str(e)}) @app.delete("/api/tasks/{task_id}") @@ -1511,11 +1712,11 @@ async def api_list_projects(): # -- Fast Action Detection (no LLM call) ----------------------------------- def _scan_projects_sync() -> list[dict]: - """Synchronous Desktop scan — runs in executor.""" + """Synchronous scan — runs in executor.""" projects = [] - desktop = Path.home() / "Desktop" + root = JARVIS_PROJECTS_DIR try: - for entry in desktop.iterdir(): + for entry in root.iterdir(): if entry.is_dir() and not entry.name.startswith("."): projects.append({"name": entry.name, "path": str(entry), "branch": ""}) except Exception: @@ -1602,7 +1803,7 @@ async def handle_open_terminal() -> str: async def handle_build(target: str) -> str: name = _generate_project_name(target) - path = str(Path.home() / "Desktop" / name) + path = str(JARVIS_PROJECTS_DIR / name) os.makedirs(path, exist_ok=True) # Write CLAUDE.md with clear instructions @@ -1614,19 +1815,10 @@ async def handle_build(target: str) -> str: prompt_file = Path(path) / ".jarvis_prompt.txt" prompt_file.write_text(target) + from shlex import quote as shell_quote skip_flag = " --dangerously-skip-permissions" if _SKIP_PERMISSIONS else "" - escaped_path = applescript_escape(path) - script = ( - 'tell application "Terminal"\n' - " activate\n" - f' do script "cd {escaped_path} && cat .jarvis_prompt.txt | claude -p{skip_flag}"\n' - "end tell" - ) - await asyncio.create_subprocess_exec( - "osascript", "-e", script, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + cmd = f"cd {shell_quote(path)} && cat .jarvis_prompt.txt | claude -p{skip_flag}" + await open_terminal(cmd) recently_built.append({"name": name, "path": path, "time": time.time()}) return f"On it, sir. Claude Code is working in {name}." @@ -1651,11 +1843,8 @@ async def handle_show_recent() -> str: await open_browser(f"file://{html_files[0]}") return f"Opened {html_files[0].name} from {last['name']}, sir." - # Fall back to opening the folder in Finder - escaped_last_path = applescript_escape(last["path"]) - script = f'tell application "Finder"\nactivate\nopen POSIX file "{escaped_last_path}"\nend tell' - await asyncio.create_subprocess_exec("osascript", "-e", script, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) - return f"Opened the {last['name']} folder in Finder, sir." + await open_browser(f"file://{last['path']}") + return f"Opened the {last['name']} folder, sir." # --------------------------------------------------------------------------- @@ -1762,8 +1951,8 @@ async def _do_mail_lookup() -> str: async def _do_screen_lookup() -> str: """Screen describe — runs in thread.""" - if anthropic_client: - return await describe_screen(anthropic_client) + if llm_client and llm_client.configured: + return await describe_screen(llm_client) windows = await get_active_windows() if windows: apps = set(w["app"] for w in windows) @@ -1835,9 +2024,7 @@ async def handle_browse(text: str, target: str) -> str: # 3. Fall back to Google search with cleaned query query = target - for prefix in ["search for", "look up", "google", "find me", "pull up", "open chrome", - "open firefox", "open browser", "go to", "can you", "in the browser", - "can you go to", "please"]: + for prefix in ACTION_KEYWORDS["browse"] + ["can you", "can you go to", "please"]: query = query.lower().replace(prefix, "").strip() # Remove filler words query = re.sub(r'\b(can|you|the|in|to|a|an|for|me|my|please)\b', '', query).strip() @@ -1851,16 +2038,22 @@ async def handle_browse(text: str, target: str) -> str: return "Searching for that, sir." -async def handle_research(text: str, target: str, client: anthropic.AsyncAnthropic) -> str: +async def handle_research(text: str, target: str, client: OpenRouterClient) -> str: """Deep research with Opus — write results to HTML, open in browser.""" try: - research_response = await client.messages.create( - model="claude-opus-4-6", + research_data = await client.chat_raw( + model=client.research_model, max_tokens=2000, - system=f"You are JARVIS, researching a topic for {USER_NAME}. Be thorough, organized, and cite sources where possible.", - messages=[{"role": "user", "content": f"Research this thoroughly:\n\n{target}"}], + messages=[ + { + "role": "system", + "content": f"You are JARVIS, researching a topic for {USER_NAME}. Be thorough, organized, and cite sources where possible.", + }, + {"role": "user", "content": f"Research this thoroughly:\n\n{target}"}, + ], ) - research_text = research_response.content[0].text + track_usage(research_data) + research_text = ((research_data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" import html as _html html_content = f""" @@ -1883,20 +2076,24 @@ async def handle_research(text: str, target: str, client: anthropic.AsyncAnthrop

Researched by JARVIS using Claude Opus • {datetime.now().strftime('%B %d, %Y %I:%M %p')}

""" - results_file = Path.home() / "Desktop" / ".jarvis_research.html" + results_file = JARVIS_PROJECTS_DIR / ".jarvis_research.html" results_file.write_text(html_content) browser_name = "firefox" if "firefox" in text.lower() else "chrome" await open_browser(f"file://{results_file}", browser_name) - # Short voice summary via Haiku - summary = await client.messages.create( - model="claude-haiku-4-5-20251001", + # Short voice summary (fast model) + summary_data = await client.chat_raw( + model=client.fast_model, max_tokens=80, - system="Summarize this research in ONE sentence for voice. No markdown.", - messages=[{"role": "user", "content": research_text[:2000]}], + messages=[ + {"role": "system", "content": "Summarize this research in ONE sentence for voice. No markdown."}, + {"role": "user", "content": research_text[:2000]}, + ], ) - return summary.content[0].text + " Full results are in your browser, sir." + track_usage(summary_data) + summary_text = ((summary_data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" + return (summary_text or "Done, sir.") + " Full results are in your browser, sir." except Exception as e: log.error(f"Research failed: {e}") @@ -1910,7 +2107,7 @@ async def handle_research(text: str, target: str, client: anthropic.AsyncAnthrop async def _update_session_summary( old_summary: str, rotated_messages: list[dict], - client: anthropic.AsyncAnthropic, + client: OpenRouterClient, ) -> str: """Background Haiku call to update the rolling session summary.""" prompt = f"""Update this conversation summary to include the new messages. @@ -1923,12 +2120,14 @@ async def _update_session_summary( Write an updated summary in 2-4 sentences capturing the key topics, decisions, and context. Be concise.""" try: - response = await client.messages.create( - model="claude-haiku-4-5-20251001", + data = await client.chat_raw( + model=client.fast_model, max_tokens=200, messages=[{"role": "user", "content": prompt}], ) - return response.content[0].text.strip() + track_usage(data) + text = ((data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" + return text.strip() except Exception as e: log.warning(f"Summary update failed: {e}") return old_summary # Keep old summary on failure @@ -1950,6 +2149,10 @@ async def voice_handler(ws: WebSocket): {"type": "task_spawned", "task_id": "...", "prompt": "..."} {"type": "task_complete", "task_id": "...", "summary": "..."} """ + if not _is_authorized_ws(ws): + await ws.close(code=1008) + return + await ws.accept() task_manager.register_websocket(ws) history: list[dict] = [] @@ -2020,6 +2223,9 @@ async def _send_greeting(): # ── Fix-self: activate work mode in JARVIS repo ── if msg.get("type") == "fix_self": + if not JARVIS_DEV_MODE: + await ws.send_json({"type": "text", "text": "That feature is disabled, sir."}) + continue jarvis_dir = str(Path(__file__).parent) await work_session.start(jarvis_dir) response_text = "Work mode active in my own repo, sir. Tell me what needs fixing." @@ -2039,6 +2245,8 @@ async def _send_greeting(): if not user_text: continue + client = llm_client if (llm_client and llm_client.configured) else None + # Cancel any in-flight response _current_response_id += 1 my_response_id = _current_response_id @@ -2080,7 +2288,7 @@ async def _send_greeting(): plan.answers[q["key"]] = q["default"] prompt = await planner.build_prompt() name = _generate_project_name(prompt) - path = str(Path.home() / "Desktop" / name) + path = str(JARVIS_PROJECTS_DIR / name) os.makedirs(path, exist_ok=True) Path(path, "CLAUDE.md").write_text(prompt) did = dispatch_registry.register(name, path, prompt[:200]) @@ -2093,7 +2301,7 @@ async def _send_greeting(): if result["confirmed"]: prompt = await planner.build_prompt() name = _generate_project_name(prompt) - path = str(Path.home() / "Desktop" / name) + path = str(JARVIS_PROJECTS_DIR / name) os.makedirs(path, exist_ok=True) Path(path, "CLAUDE.md").write_text(prompt) did = dispatch_registry.register(name, path, prompt[:200]) @@ -2124,7 +2332,7 @@ async def _send_greeting(): if is_casual_question(user_text): # Quick chat — bypass claude -p, use Haiku response_text = await generate_response( - user_text, anthropic_client, task_manager, + user_text, client, task_manager, cached_projects, history, last_response=last_jarvis_response, session_summary=session_summary, @@ -2137,7 +2345,7 @@ async def _send_greeting(): full_response = await work_session.send(user_text) # Detect if Claude Code is stalling (asking questions instead of building) - if full_response and anthropic_client: + if full_response and client: stall_words = ["which option", "would you prefer", "would you like me to", "before I proceed", "before proceeding", "should I", "do you want me to", "let me know", "please confirm", @@ -2161,22 +2369,28 @@ async def _send_greeting(): log.info(f"Auto-opening {localhost_match.group(0)}") # Always summarize work mode responses via Haiku - if full_response and anthropic_client: + if full_response and client: try: - summary = await anthropic_client.messages.create( - model="claude-haiku-4-5-20251001", + summary_data = await client.chat_raw( + model=client.fast_model, max_tokens=100, - system=( + messages=[ + { + "role": "system", + "content": ( f"You are JARVIS reporting to the user ({USER_NAME}). Summarize what happened in 1-2 sentences. " "Speak in first person — 'I built', 'I found', 'I set up'. " "You are talking TO THE USER, not to a coding tool. " "NEVER give instructions like 'go ahead and build' or 'set up the frontend' — those are NOT for the user. " "NEVER say 'Claude Code'. NEVER output [ACTION:...] tags. " "NEVER read out URLs. No markdown. British precision." - ), - messages=[{"role": "user", "content": f"Claude Code said:\n{full_response[:2000]}"}], + ), + }, + {"role": "user", "content": f"Claude Code said:\n{full_response[:2000]}"}, + ], ) - response_text = summary.content[0].text + track_usage(summary_data) + response_text = ((summary_data.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" except Exception: response_text = full_response[:200] else: @@ -2224,11 +2438,11 @@ async def _send_greeting(): else: response_text = "Understood, sir." else: - if not anthropic_client: + if not client: response_text = "API key not configured." else: response_text = await generate_response( - user_text, anthropic_client, task_manager, + user_text, client, task_manager, cached_projects, history, last_response=last_jarvis_response, session_summary=session_summary, @@ -2256,7 +2470,7 @@ async def _send_greeting(): # Build in background — JARVIS stays conversational target = embedded_action["target"] name = _generate_project_name(target) - path = str(Path.home() / "Desktop" / name) + path = str(JARVIS_PROJECTS_DIR / name) os.makedirs(path, exist_ok=True) # Write detailed CLAUDE.md @@ -2285,7 +2499,7 @@ async def _send_greeting(): elif embedded_action["action"] == "research": # Research enters work mode too name = _generate_project_name(embedded_action["target"]) - path = str(Path.home() / "Desktop" / name) + path = str(JARVIS_PROJECTS_DIR / name) os.makedirs(path, exist_ok=True) await work_session.start(path) asyncio.create_task( @@ -2381,11 +2595,11 @@ async def _read_and_report(search_term, _ws): messages_since_last_summary = 0 # Get messages that are about to be rotated out rotated = history[:-20] if len(history) > 20 else [] - if rotated and anthropic_client: + if rotated and client: async def _do_summary(): nonlocal session_summary, summary_update_pending session_summary = await _update_session_summary( - session_summary, rotated, anthropic_client + session_summary, rotated, client ) summary_update_pending = False asyncio.create_task(_do_summary()) @@ -2393,8 +2607,8 @@ async def _do_summary(): summary_update_pending = False # Extract memories in background (doesn't block response) - if anthropic_client and len(user_text) > 15: - asyncio.create_task(extract_memories(user_text, response_text, anthropic_client)) + if client and len(user_text) > 15: + asyncio.create_task(extract_memories(user_text, response_text, client)) # TTS tts = strip_markdown_for_tts(response_text) @@ -2491,23 +2705,48 @@ class PreferencesUpdate(BaseModel): @app.post("/api/settings/keys") async def api_settings_keys(body: KeyUpdate): - allowed = {"ANTHROPIC_API_KEY", "FISH_API_KEY", "FISH_VOICE_ID", "USER_NAME", "HONORIFIC", "CALENDAR_ACCOUNTS"} + allowed = { + "OPENROUTER_API_KEY", + "OPENROUTER_BASE_URL", + "OPENROUTER_FAST_MODEL", + "OPENROUTER_RESEARCH_MODEL", + "OPENROUTER_VISION_MODEL", + "OPENROUTER_SITE_URL", + "OPENROUTER_APP_NAME", + "FISH_API_KEY", + "FISH_VOICE_ID", + "USER_NAME", + "HONORIFIC", + "CALENDAR_ACCOUNTS", + } if body.key_name not in allowed: return JSONResponse({"success": False, "error": "Invalid key name"}, status_code=400) _write_env_key(body.key_name, body.key_value) + global llm_client + llm_client = load_openrouter_client() return {"success": True} -@app.post("/api/settings/test-anthropic") -async def api_test_anthropic(body: KeyTest): - key = body.key_value or os.getenv("ANTHROPIC_API_KEY", "") +@app.post("/api/settings/test-openrouter") +async def api_test_openrouter(body: KeyTest): + key = body.key_value or os.getenv("OPENROUTER_API_KEY", "") if not key: return {"valid": False, "error": "No key provided"} try: - client = anthropic.AsyncAnthropic(api_key=key) - await client.messages.create(model="claude-haiku-4-5-20251001", max_tokens=10, messages=[{"role": "user", "content": "Hi"}]) + client = OpenRouterClient( + OpenRouterConfig( + api_key=key, + base_url=os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1").strip(), + fast_model=os.getenv("OPENROUTER_FAST_MODEL", "anthropic/claude-3.5-haiku").strip(), + research_model=os.getenv("OPENROUTER_RESEARCH_MODEL", "anthropic/claude-3.5-sonnet").strip(), + vision_model=os.getenv("OPENROUTER_VISION_MODEL", "anthropic/claude-3.5-sonnet").strip(), + site_url=os.getenv("OPENROUTER_SITE_URL", "").strip(), + app_name=os.getenv("OPENROUTER_APP_NAME", "JARVIS").strip(), + ) + ) + await client.chat_raw(model=client.fast_model, max_tokens=10, messages=[{"role": "user", "content": "Hi"}]) return {"valid": True} - except Exception as e: - return {"valid": False, "error": str(e)[:200]} + except Exception: + return {"valid": False, "error": "Test failed"} @app.post("/api/settings/test-fish") async def api_test_fish(body: KeyTest): @@ -2527,8 +2766,8 @@ async def api_test_fish(body: KeyTest): return {"valid": False, "error": "Invalid API key"} else: return {"valid": False, "error": f"HTTP {resp.status_code}"} - except Exception as e: - return {"valid": False, "error": str(e)[:200]} + except Exception: + return {"valid": False, "error": "Test failed"} @app.get("/api/settings/status") async def api_settings_status(): @@ -2557,7 +2796,7 @@ async def api_settings_status(): "server_port": 8340, "uptime_seconds": int(time.time() - _session_start), "env_keys_set": { - "anthropic": bool(env_dict.get("ANTHROPIC_API_KEY", "").strip() and env_dict.get("ANTHROPIC_API_KEY", "") != "your-anthropic-api-key-here"), + "openrouter": bool(env_dict.get("OPENROUTER_API_KEY", "").strip() and env_dict.get("OPENROUTER_API_KEY", "") != "your-openrouter-api-key-here"), "fish_audio": bool(env_dict.get("FISH_API_KEY", "").strip() and env_dict.get("FISH_API_KEY", "") != "your-fish-audio-api-key-here"), "fish_voice_id": bool(env_dict.get("FISH_VOICE_ID", "").strip()), "user_name": env_dict.get("USER_NAME", ""), @@ -2590,7 +2829,11 @@ async def api_restart(): log.info("Restart requested — shutting down in 2 seconds") async def _restart(): await asyncio.sleep(2) - cmd = [sys.executable, __file__, "--port", "8340", "--host", "0.0.0.0"] + cmd = [sys.executable, __file__, "--port", "8340", "--host", "127.0.0.1"] + cert_file = Path(__file__).parent / "cert.pem" + key_file = Path(__file__).parent / "key.pem" + if cert_file.exists() and key_file.exists(): + cmd.append("--ssl") os.execv(sys.executable, cmd) asyncio.create_task(_restart()) return {"status": "restarting"} @@ -2600,21 +2843,9 @@ async def _restart(): async def api_fix_self(): """Enter work mode in the JARVIS repo — JARVIS can now fix himself.""" jarvis_dir = str(Path(__file__).parent) - # The work_session is per-WebSocket, so we set a flag that the handler picks up - # For now, also open Terminal so user can see skip_flag = " --dangerously-skip-permissions" if _SKIP_PERMISSIONS else "" - escaped_jarvis_dir = applescript_escape(jarvis_dir) - script = ( - 'tell application "Terminal"\n' - ' activate\n' - f' do script "cd {escaped_jarvis_dir} && claude{skip_flag}"\n' - 'end tell' - ) - await asyncio.create_subprocess_exec( - "osascript", "-e", script, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + from shlex import quote as shell_quote + await open_terminal(f"cd {shell_quote(jarvis_dir)} && claude{skip_flag}") log.info("Work mode: JARVIS repo opened for self-improvement") return {"status": "work_mode_active", "path": jarvis_dir} @@ -2645,7 +2876,7 @@ async def serve_index(): import uvicorn parser = argparse.ArgumentParser(description="JARVIS Server") - parser.add_argument("--host", default="0.0.0.0", help="Bind host") + parser.add_argument("--host", default="127.0.0.1", help="Bind host") parser.add_argument("--port", type=int, default=8340, help="Bind port") parser.add_argument("--reload", action="store_true", help="Auto-reload on changes") parser.add_argument("--ssl", action="store_true", help="Enable HTTPS with key.pem/cert.pem") diff --git a/tests/test_browser_integration.py b/tests/test_browser_integration.py index 54f1025..6b84513 100644 --- a/tests/test_browser_integration.py +++ b/tests/test_browser_integration.py @@ -31,6 +31,27 @@ def _has_network() -> bool: NETWORK_AVAILABLE = _has_network() + + +def _has_playwright_browsers() -> bool: + """Best-effort check for Playwright browser binaries.""" + path = os.getenv("PLAYWRIGHT_BROWSERS_PATH", "").strip() + if path: + root = Path(path).expanduser() + else: + root = Path.home() / ".cache" / "ms-playwright" + if not root.exists(): + return False + try: + for entry in root.iterdir(): + if entry.is_dir() and entry.name.startswith(("chromium", "firefox", "webkit")): + return True + except Exception: + return False + return False + + +PLAYWRIGHT_BROWSERS_AVAILABLE = _has_playwright_browsers() SKIP_REASON = "No network or Playwright browsers not available" @@ -46,7 +67,7 @@ async def browser(): @pytest.mark.asyncio -@pytest.mark.skipif(not NETWORK_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.skipif((not NETWORK_AVAILABLE) or (not PLAYWRIGHT_BROWSERS_AVAILABLE), reason=SKIP_REASON) async def test_browser_search(browser): """Search returns results from DuckDuckGo (may be empty if DDG blocks).""" results = await browser.search("Python FastAPI documentation") @@ -64,7 +85,7 @@ async def test_browser_search(browser): @pytest.mark.asyncio -@pytest.mark.skipif(not NETWORK_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.skipif((not NETWORK_AVAILABLE) or (not PLAYWRIGHT_BROWSERS_AVAILABLE), reason=SKIP_REASON) async def test_browser_search_empty_query(browser): """Search handles empty query gracefully.""" results = await browser.search("") @@ -75,7 +96,7 @@ async def test_browser_search_empty_query(browser): @pytest.mark.asyncio -@pytest.mark.skipif(not NETWORK_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.skipif((not NETWORK_AVAILABLE) or (not PLAYWRIGHT_BROWSERS_AVAILABLE), reason=SKIP_REASON) async def test_browser_visit(browser): """Visit extracts readable content from a known page.""" content = await browser.visit("https://example.com") @@ -90,7 +111,7 @@ async def test_browser_visit(browser): @pytest.mark.asyncio -@pytest.mark.skipif(not NETWORK_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.skipif((not NETWORK_AVAILABLE) or (not PLAYWRIGHT_BROWSERS_AVAILABLE), reason=SKIP_REASON) async def test_browser_visit_invalid_url(browser): """Visit handles invalid URLs gracefully.""" content = await browser.visit("https://this-domain-definitely-does-not-exist-12345.com") @@ -103,7 +124,7 @@ async def test_browser_visit_invalid_url(browser): @pytest.mark.asyncio -@pytest.mark.skipif(not NETWORK_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.skipif((not NETWORK_AVAILABLE) or (not PLAYWRIGHT_BROWSERS_AVAILABLE), reason=SKIP_REASON) async def test_browser_screenshot(browser): """Screenshot produces a valid PNG file.""" tmp_path = tempfile.mktemp(suffix=".png", prefix="jarvis_test_ss_") @@ -129,7 +150,7 @@ async def test_browser_screenshot(browser): @pytest.mark.asyncio -@pytest.mark.skipif(not NETWORK_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.skipif((not NETWORK_AVAILABLE) or (not PLAYWRIGHT_BROWSERS_AVAILABLE), reason=SKIP_REASON) async def test_browser_screenshot_default_path(browser): """Screenshot with no path generates a temp file.""" result_path = await browser.screenshot("https://example.com") @@ -147,7 +168,7 @@ async def test_browser_screenshot_default_path(browser): @pytest.mark.asyncio -@pytest.mark.skipif(not NETWORK_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.skipif((not NETWORK_AVAILABLE) or (not PLAYWRIGHT_BROWSERS_AVAILABLE), reason=SKIP_REASON) async def test_browser_research(browser): """Research performs multi-step search and visit.""" result = await browser.research("Python FastAPI tutorial") diff --git a/tests/test_classifier.py b/tests/test_classifier.py index f40f73c..8e0f977 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -2,7 +2,7 @@ Test the LLM intent classifier with 20 sample voice command phrases. Run: python3 tests/test_classifier.py -Requires: ANTHROPIC_API_KEY in .env or environment +Requires: OPENROUTER_API_KEY in .env or environment """ import asyncio @@ -23,7 +23,7 @@ k, _, v = line.partition("=") os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'")) -import anthropic +from llm_client import load_openrouter_client # Import the classifier and speech corrections from server import classify_intent, apply_speech_corrections @@ -62,12 +62,12 @@ async def run_tests(): - api_key = os.getenv("ANTHROPIC_API_KEY", "") + api_key = os.getenv("OPENROUTER_API_KEY", "") if not api_key: - print("ERROR: ANTHROPIC_API_KEY not set") + print("ERROR: OPENROUTER_API_KEY not set") sys.exit(1) - client = anthropic.AsyncAnthropic(api_key=api_key) + client = load_openrouter_client() passed = 0 failed = 0