diff --git a/.github/workflows/electron-ci.yml b/.github/workflows/electron-ci.yml
new file mode 100644
index 0000000..c3f8761
--- /dev/null
+++ b/.github/workflows/electron-ci.yml
@@ -0,0 +1,41 @@
+name: Electron CI
+
+on:
+ push:
+ paths:
+ - "electron/**"
+ - ".github/workflows/electron-ci.yml"
+
+ pull_request:
+ paths:
+ - "electron/**"
+ - ".github/workflows/electron-ci.yml"
+
+jobs:
+ electron-checks:
+ name: Install, link, and build Electron app
+ runs-on: ubuntu-latest
+
+ defaults:
+ run:
+ working-directory: electron
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Setup Node.js
+ uses: actions/setup-node@v4
+ with:
+ node-version: 20
+ cache: npm
+ cache-dependency-path: electron/package-lock.json
+
+ - name: Install dependencies
+ run: npm ci
+
+ - name: Run lint
+ run: npm run lint
+
+ - name: Build Electron renderer
+ run: npm run build
\ No newline at end of file
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
new file mode 100644
index 0000000..49ced8f
--- /dev/null
+++ b/.github/workflows/python-ci.yml
@@ -0,0 +1,55 @@
+name: Python CI
+
+on:
+ push:
+ paths:
+ - "python/**"
+ - ".github/workflows/python-ci.yml"
+
+ pull_request:
+ paths:
+ - "python/**"
+ - ".github/workflows/python-ci.yml"
+
+
+jobs:
+ python-checks:
+ name: Install and import-check Python CV worker
+ runs-on: ubuntu-latest
+
+ defaults:
+ run:
+ working-directory: python
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Setup Python 3.11
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+ cache: pip
+ cache-dependency-path: python/requirements.txt
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+
+ - name: Check Python imports
+ run: |
+ python - <<'PY'
+ import cv2
+ import numpy
+ import onnxruntime
+ import fastapi
+ import uvicorn
+ import websockets
+
+ print("Python CV dependencies imported successfully")
+ PY
+
+ - name: Check project modules compile
+ run: |
+ python -m compileall .
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 98343bf..c77af2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@ build/
*.pt
*.pth
*.onnx
+*.task
# Database
*.sqlite
diff --git a/README.md b/README.md
index 58d46f5..90f3bce 100644
--- a/README.md
+++ b/README.md
@@ -57,11 +57,15 @@ taskmaster/
│ └── preload/
│ └── index.ts
├── python/
+│ ├── README.md # CV worker docs
+│ ├── requirements.txt # Python deps (installed by setup.sh)
│ ├── main.py # FastAPI + WebSocket server
│ └── cv/
-│ ├── camera.py
-│ ├── gaze_detector.py
-│ └── phone_detector.py
+│ ├── camera.py # webcam capture (owns the camera handle)
+│ ├── detection_loop.py # camera -> detectors -> events loop
+│ ├── phone_detector.py # phone-in-frame detection
+│ └── gaze_detector.py # gaze/face detection (planned)
+├── setup.sh # one-shot install for Python + Electron
├── PLAN.md
└── README.md
```
@@ -69,30 +73,50 @@ taskmaster/
## Prerequisites
- Node.js >= 18
-- Python >= 3.10
+- **Python 3.11** (MediaPipe has no wheels for 3.13/3.14 yet)
- A webcam
## Setup
-### Python backend
+One command installs both the Python CV worker and the Electron app:
```bash
-cd python
-python -m venv .venv
-source .venv/bin/activate # Windows: .venv\Scripts\activate
-pip install -r requirements.txt
+./setup.sh
```
-### Electron app
+It creates the Python venv at `python/.venv` (Python 3.11), installs
+`requirements.txt`, and runs `npm install` in `electron/`.
+
+
+Manual setup (if you prefer)
```bash
+# Python CV worker
+cd python
+python3.11 -m venv .venv
+source .venv/bin/activate # Windows: .venv\Scripts\activate
+pip install -r requirements.txt
+cd ..
+
+# Electron app
cd electron
npm install
```
+
+
## Development
-Start the Python CV server:
+Run the CV detection loop directly (current entry point while the
+WebSocket server is being built):
+
+```bash
+cd python
+source .venv/bin/activate
+python cv/detection_loop.py # Ctrl+C to stop
+```
+
+Later, the FastAPI + WebSocket server will be the entry point instead:
```bash
cd python
diff --git a/electron/src/main/appDetection/detectCommonWindowsApps.ts b/electron/src/main/appDetection/detectCommonWindowsApps.ts
new file mode 100644
index 0000000..a3f8412
--- /dev/null
+++ b/electron/src/main/appDetection/detectCommonWindowsApps.ts
@@ -0,0 +1,148 @@
+// This file needs to be on main as we are using node APIs to detect if common apps are installed on the user's system. We will likely need to expand this in the future to support more apps and other platforms, but for now we are just focusing on a few common Windows apps.
+import fs from 'node:fs'
+import path from 'node:path'
+import { COMMON_APPS } from "../../shared/appDetection/commonApps.ts"
+
+export type DetectedWindowsApp = {
+ id: string
+ displayName: string
+ category: 'productivity' | 'distraction' | 'browser'
+ executablePath: string
+ defaultStatus: 'allowed' | 'blocked'
+}
+
+function expandWindowsEnvironmentPath(rawPath: string) {
+ return rawPath.replace(/%([^%]+)%/g, (_, variableName: string) => {
+ return process.env[variableName] ?? ''
+ })
+}
+
+function pathHasWildcard(filePath: string) {
+ return filePath.includes('*')
+}
+
+function findWildcardPath(filePath: string) {
+ const normalizedPath = path.normalize(filePath)
+ const wildcardIndex = normalizedPath.indexOf('*')
+
+ if (wildcardIndex === -1) {
+ return fs.existsSync(normalizedPath) ? normalizedPath : null
+ }
+
+ const beforeWildcard = normalizedPath.slice(0, wildcardIndex)
+ const afterWildcard = normalizedPath.slice(wildcardIndex + 1)
+
+ const baseDirectory = path.dirname(beforeWildcard)
+ const prefix = path.basename(beforeWildcard)
+
+ try {
+ if (!fs.existsSync(baseDirectory)) {
+ return null
+ }
+
+ const entries = fs.readdirSync(baseDirectory, {
+ withFileTypes: true,
+ })
+
+ const matchedDirectory = entries.find((entry) => {
+ return entry.isDirectory() && entry.name.startsWith(prefix)
+ })
+
+ if (!matchedDirectory) {
+ return null
+ }
+
+ const possiblePath = path.join(
+ baseDirectory,
+ matchedDirectory.name,
+ afterWildcard
+ )
+
+ return fs.existsSync(possiblePath) ? possiblePath : null
+ } catch (error) {
+ console.warn('[Taskmaster] Could not scan wildcard path:', {
+ filePath,
+ baseDirectory,
+ error,
+ })
+
+ return null
+ }
+}
+
+function findExistingAppPath(commonWindowsPaths: string[]) {
+ for (const rawPath of commonWindowsPaths) {
+ const expandedPath = expandWindowsEnvironmentPath(rawPath)
+
+ console.log('[Taskmaster] Checking path:', {
+ rawPath,
+ expandedPath,
+ })
+
+ if (!expandedPath) {
+ continue
+ }
+
+ if (pathHasWildcard(expandedPath)) {
+ const matchedPath = findWildcardPath(expandedPath)
+
+ // --- debug log ---
+ console.log('[Taskmaster] Wildcard path result:', {
+ expandedPath,
+ matchedPath,
+ })
+ // --- remove later ---
+
+ if (matchedPath) {
+ return matchedPath
+ }
+
+ continue
+ }
+
+ const normalizedPath = path.normalize(expandedPath)
+ try {
+ const exists = fs.existsSync(normalizedPath)
+
+ console.log('[Taskmaster] Path exists result:', {
+ normalizedPath,
+ exists,
+ })
+
+ if (exists) {
+ return normalizedPath
+ }
+ } catch (error) {
+ console.warn('[Taskmaster] Could not check path:', {
+ normalizedPath,
+ error,
+ })
+ }
+ }
+
+ return null
+}
+
+export function detectCommonWindowsApps(): DetectedWindowsApp[] {
+ if (process.platform !== 'win32') {
+ return []
+ }
+
+ return COMMON_APPS.flatMap((app) => {
+ const executablePath = findExistingAppPath(app.commonWindowsPaths)
+
+ if (!executablePath) {
+ return []
+ }
+
+ return [
+ {
+ id: app.id,
+ displayName: app.displayName,
+ category: app.category,
+ executablePath,
+ defaultStatus: app.defaultStatus,
+ },
+ ]
+ })
+}
\ No newline at end of file
diff --git a/electron/src/main/index.ts b/electron/src/main/index.ts
index ae80b61..cc48912 100644
--- a/electron/src/main/index.ts
+++ b/electron/src/main/index.ts
@@ -5,6 +5,9 @@
import { app, BrowserWindow, Tray, Menu, nativeImage } from 'electron'
import path from 'path'
import { fileURLToPath } from 'url'
+import { registerIpcHandlers } from './ipc-handlers.ts'
+
+
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
@@ -32,6 +35,7 @@ function createWindow() {
minWidth: 1000,
minHeight: 700,
webPreferences: {
+ preload: path.join(__dirname, '../preload/index.js'),
nodeIntegration: false,
contextIsolation: true,
},
@@ -40,7 +44,9 @@ function createWindow() {
win.loadURL('http://localhost:5173')
}
+
app.whenReady().then(() => {
+ registerIpcHandlers()
createWindow()
createTray()
})
diff --git a/electron/src/main/ipc-handlers.ts b/electron/src/main/ipc-handlers.ts
index 76789c4..6d4794e 100644
--- a/electron/src/main/ipc-handlers.ts
+++ b/electron/src/main/ipc-handlers.ts
@@ -1,2 +1,17 @@
// Registers all ipcMain.handle() and ipcMain.on() listeners.
-// This is the entry point for every message the renderer sends — start session, save settings, get history, etc.
\ No newline at end of file
+// This is the entry point for every message the renderer sends — start session, save settings, get history, etc.import { ipcMain } from 'electron'
+import { ipcMain } from 'electron'
+import { detectCommonWindowsApps } from './appDetection/detectCommonWindowsApps.ts'
+
+export function registerIpcHandlers() {
+ ipcMain.removeHandler('taskmaster:detect-common-apps')
+
+ ipcMain.handle('taskmaster:detect-common-apps', () => {
+ const detectedApps = detectCommonWindowsApps()
+
+ console.log('[Taskmaster] Detected common apps:')
+ console.log(JSON.stringify(detectedApps, null, 2))
+
+ return detectedApps
+ })
+}
\ No newline at end of file
diff --git a/electron/src/preload/index.js b/electron/src/preload/index.js
new file mode 100644
index 0000000..08acabf
--- /dev/null
+++ b/electron/src/preload/index.js
@@ -0,0 +1,11 @@
+// Uses contextBridge.exposeInMainWorld() to give the renderer a safe, limited API.
+// Example: window.taskmaster.startSession().
+// The renderer can never call Node directly - everything goes through here.
+
+const { contextBridge, ipcRenderer } = require('electron')
+
+console.log('Taskmaster preload loaded')
+
+contextBridge.exposeInMainWorld('taskmaster', {
+ detectCommonApps: () => ipcRenderer.invoke('taskmaster:detect-common-apps'),
+})
\ No newline at end of file
diff --git a/electron/src/preload/index.ts b/electron/src/preload/index.ts
deleted file mode 100644
index feaa357..0000000
--- a/electron/src/preload/index.ts
+++ /dev/null
@@ -1,3 +0,0 @@
-// Uses contextBridge.exposeInMainWorld() to give the renderer a safe, limited API.
-// Example: window.taskmaster.startSession().
-// The renderer can never call Node directly — everything goes through here.
\ No newline at end of file
diff --git a/electron/src/renderer/components/onboarding/BrowserActivitySelectionStep.tsx b/electron/src/renderer/components/onboarding/BrowserActivitySelectionStep.tsx
new file mode 100644
index 0000000..5741011
--- /dev/null
+++ b/electron/src/renderer/components/onboarding/BrowserActivitySelectionStep.tsx
@@ -0,0 +1,184 @@
+/**
+ * Browser activity onboarding step.
+ *
+ * This screen lets the user decide which common websites should be treated as
+ * allowed or blocked during focus sessions.
+ *
+ * It does not detect websites yet. Later, Taskmaster can compare the active
+ * browser window title against the matchText values stored in the settings.
+ */
+
+import { useFocusEnvironmentSettings } from '../../hooks/useFocusEnvironmentSettings'
+import type {
+ BrowserActivityRule,
+ BrowserActivityRuleStatus,
+} from '../../hooks/useFocusEnvironmentSettings'
+
+
+type BrowserActivitySelectionStepProps = {
+ onBack: () => void
+ onContinue: () => void
+}
+
+type BrowserActivityRuleSectionProps = {
+ title: string
+ description: string
+ rules: BrowserActivityRule[]
+ onUpdateRuleStatus: (
+ ruleId: string,
+ status: BrowserActivityRuleStatus
+ ) => void
+}
+
+
+
+
+
+/**
+ * Renders browser activity rules from useFocusEnvironmentSettings.
+ *
+ * The settings hook owns the data and update logic. This component only renders
+ * the onboarding UI and forwards toggle changes back to the hook.
+ */
+
+function BrowserActivityRuleSection({
+ title,
+ description,
+ rules,
+ onUpdateRuleStatus,
+}: BrowserActivityRuleSectionProps) {
+
+ return (
+
+
+
{title}
+
{description}
+
+
+
+ {rules.map((rule) => {
+ const isBlocked = rule.status === 'blocked'
+
+ return (
+
+
+ {rule.label}
+
+ {rule.description}
+
+
+
+
+
+ onUpdateRuleStatus(
+ rule.id,
+ event.target.checked ? 'blocked' : 'allowed'
+ )
+ }
+ aria-label={`${isBlocked ? 'Allow' : 'Block'} ${rule.label}`}
+ />
+
+
+
+ )
+ })}
+
+
+ )
+}
+
+/**
+ * Onboarding step for configuring common browser page rules.
+ *
+ * For now, this is only static UI state. In the next step, these rules should
+ * be moved into useFocusEnvironmentSettings so Back/Continue can save them to
+ * localStorage together with the desktop app rules.
+ */
+export default function BrowserActivitySelectionStep({
+ onBack,
+ onContinue,
+}: BrowserActivitySelectionStepProps) {
+ const {
+ blockedBrowserActivityRules,
+ flexibleBrowserActivityRules,
+ updateBrowserActivityRuleStatus,
+ saveFocusEnvironmentSettings,
+ } = useFocusEnvironmentSettings()
+
+ function handleBack() {
+ saveFocusEnvironmentSettings()
+ onBack()
+ }
+
+ function handleContinue() {
+ saveFocusEnvironmentSettings()
+ onContinue()
+ }
+ return (
+
+ Step 4
+
+
+
+
+
+ Browser activity
+
+
+ Choose which websites should count as distractions during focus
+ sessions.
+
+
+
+
+ For the MVP, Taskmaster will estimate browser activity from the
+ active window title, such as “YouTube - Google Chrome”.
+
+
+
+
+
+
+ These websites do not need to be installed. They are common page
+ patterns that Taskmaster can later match while your browser is
+ open.
+
+
+
+
+
+
+
+
+
+
+
+
+ Back
+
+
+
+ Continue
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/electron/src/renderer/components/onboarding/OnboardingAdditionalFunctions.tsx b/electron/src/renderer/components/onboarding/OnboardingAdditionalFunctions.tsx
index bebc18a..1a9595b 100644
--- a/electron/src/renderer/components/onboarding/OnboardingAdditionalFunctions.tsx
+++ b/electron/src/renderer/components/onboarding/OnboardingAdditionalFunctions.tsx
@@ -1,3 +1,9 @@
+/**
+ * Final onboarding options screen.
+ *
+ * This screen is currently a placeholder for future focus-session guardrails.
+ * The options shown here are not fully wired into session behavior yet.
+ */
type DistractionOptionsStepProps = {
onBack: () => void
onFinish: () => void
diff --git a/electron/src/renderer/components/onboarding/OnboardingCameraSetup.tsx b/electron/src/renderer/components/onboarding/OnboardingCameraSetup.tsx
index c0e9ce1..d24000a 100644
--- a/electron/src/renderer/components/onboarding/OnboardingCameraSetup.tsx
+++ b/electron/src/renderer/components/onboarding/OnboardingCameraSetup.tsx
@@ -1,5 +1,14 @@
+/**
+ * Camera setup onboarding screen.
+ *
+ * This screen lets the user preview and select the camera Taskmaster will use
+ * during focus sessions.
+ *
+ * The camera stream is owned by useCameraDevices. When this screen unmounts,
+ * the hook stops the stream so the camera turns off.
+ */
// === camera setup ===
-import { useEffect, useRef, useState, } from "react";
+import { useEffect, useRef } from "react";
import { useCameraDevices } from "../../hooks/useCameraDevices";
type CameraSetupStepProps = {
@@ -33,6 +42,12 @@ export default function CameraSetupStep({
const isCameraConnected = cameraStatus === "connected";
+ /**
+ * Attach the MediaStream from the hook to the video element.
+ *
+ * React cannot set srcObject directly through JSX, so this needs to be done
+ * imperatively through a ref.
+ */
useEffect(() => {
if (videoRef.current && stream) {
videoRef.current.srcObject = stream;
@@ -63,8 +78,10 @@ const isCameraConnected = cameraStatus === "connected";
Camera
selectCamera(e.target.value)}
+ onChange={(e) => selectCamera(e.target.value)}
+ disabled={cameras.length === 0}
>
{cameras.map((camera, index) => (
@@ -109,7 +126,7 @@ const isCameraConnected = cameraStatus === "connected";
Back
-
+
Continue
diff --git a/electron/src/renderer/components/onboarding/OnboardingWelcome.tsx b/electron/src/renderer/components/onboarding/OnboardingWelcome.tsx
index 56c24b8..c7a5feb 100644
--- a/electron/src/renderer/components/onboarding/OnboardingWelcome.tsx
+++ b/electron/src/renderer/components/onboarding/OnboardingWelcome.tsx
@@ -1,3 +1,9 @@
+/**
+ * First onboarding screen.
+ *
+ * This screen introduces Taskmaster and starts the setup flow. It does not own
+ * any onboarding settings, it only sends the user to the next step.
+ */
type WelcomeStepProps = {
onStartSetup: () => void
}
diff --git a/electron/src/renderer/components/onboarding/WhitelistSelectionStep.tsx b/electron/src/renderer/components/onboarding/WhitelistSelectionStep.tsx
index 1fa4964..85ae15f 100644
--- a/electron/src/renderer/components/onboarding/WhitelistSelectionStep.tsx
+++ b/electron/src/renderer/components/onboarding/WhitelistSelectionStep.tsx
@@ -1,26 +1,129 @@
+/**
+ * Desktop app whitelist step for onboarding.
+ *
+ * This screen lets the user choose which detected desktop apps should be
+ * treated as allowed or blocked during focus sessions.
+ *
+ * Important:
+ * - This file is only for installed desktop apps.
+ * - Browser tabs/websites such as YouTube, Gmail, Netflix, or ChatGPT
+ * should be handled in a separate browser activity step later.
+ * - The detection and localStorage logic lives in useFocusEnvironmentSettings.
+ */
+
+import { useFocusEnvironmentSettings } from '../../hooks/useFocusEnvironmentSettings'
+import type {
+ AppRuleStatus,
+ FocusApp,
+} from '../../hooks/useFocusEnvironmentSettings'
+
type FocusEnvironmentStepProps = {
onBack: () => void
onContinue: () => void
}
-const browserItems = [
- { label: 'Chrome: GitHub', allowed: true },
- { label: 'Chrome: YouTube', allowed: false },
- { label: 'Chrome: OnTrack', allowed: true },
-]
+type FocusAppRuleSectionProps = {
+ title: string
+ description: string
+ apps: FocusApp[]
+ onUpdateAppStatus: (appId: string, status: AppRuleStatus) => void
+}
-const appItems = [
- { label: 'VS Code', allowed: true },
- { label: 'Discord', allowed: false },
-]
+/**
+ * Renders one group of desktop app rules.
+ *
+ * Example groups:
+ * - Productivity apps
+ * - Potential distractions
+ *
+ * Each app stays in its original group, but the user can toggle whether it is
+ * currently allowed or blocked during focus sessions.
+ */
+function FocusAppRuleSection({
+ title,
+ description,
+ apps,
+ onUpdateAppStatus,
+}: FocusAppRuleSectionProps) {
+ return (
+
+
+
{title}
+
{description}
+
+
+
+ {apps.map((app) => (
+
+ {app.name}
+
+
+ onUpdateAppStatus(
+ app.id,
+ event.target.checked ? 'blocked' : 'allowed'
+ )
+ }
+ aria-label={`${app.status === 'blocked' ? 'Allow' : 'Block'} ${
+ app.name
+ }`}
+ />
+
+
+
+ ))}
+
+
+ )
+}
+
+/**
+ * Onboarding step for configuring desktop app focus rules.
+ *
+ * The user can:
+ * - Pick their main browser.
+ * - Decide whether the selected browser should be blocked entirely.
+ * - Mark detected desktop apps as allowed or blocked.
+ *
+ * Saving happens when the user presses Back or Continue.
+ */
export default function FocusEnvironmentStep({
onBack,
onContinue,
}: FocusEnvironmentStepProps) {
+ const {
+ settings,
+ browserOptions,
+ productivityApps,
+ distractionApps,
+ shouldSplitAppRules,
+ setSelectedBrowserId,
+ setBlockSelectedBrowser,
+ updateAppStatus,
+ saveFocusEnvironmentSettings,
+ } = useFocusEnvironmentSettings()
+
+ function handleBack() {
+ saveFocusEnvironmentSettings()
+ onBack()
+ }
+
+ function handleContinue() {
+ saveFocusEnvironmentSettings()
+ onContinue()
+ }
+
return (
Step 3
+
@@ -28,64 +131,83 @@ export default function FocusEnvironmentStep({
Focus environment
- Choose which apps or tabs are allowed during your deep work
- sessions.
+ Choose which apps are allowed during your deep work sessions.
+
- Taskmaster will use this list to understand when you are working and
- when you might be drifting.
+ Taskmaster checks the active app during focus sessions. Apps not
+ recognised yet will be marked as unknown and can be reviewed after
+ the session.
- Browser
-
- Chrome
- Edge
- Opera
+ Main browser
+ setSelectedBrowserId(event.target.value)}
+ >
+ {browserOptions.map((browser) => (
+
+ {browser.name}
+
+ ))}
-
- Recommended setup
-
+
+
+ setBlockSelectedBrowser(event.target.checked)
+ }
+ />
+ Block selected browser during focus sessions
+
+
+
+
+
+ Taskmaster will also learn from apps you open during sessions.
+ Unknown apps can be reviewed after each session.
+
-
-
-
Browser tabs
-
- {browserItems.map((item) => (
-
- {item.label}
-
-
- ))}
-
-
-
-
-
Apps
-
- {appItems.map((item) => (
-
- {item.label}
-
-
- ))}
-
-
+
+
+
+
-
+
Back
-
+
Continue
diff --git a/electron/src/renderer/hooks/useCameraDevices.ts b/electron/src/renderer/hooks/useCameraDevices.ts
index 35406f6..ab4cf74 100644
--- a/electron/src/renderer/hooks/useCameraDevices.ts
+++ b/electron/src/renderer/hooks/useCameraDevices.ts
@@ -1,5 +1,20 @@
-// detect camera devices and manage camera stream for onboarding camera setup step
-import { useEffect, useState } from "react";
+/**
+ * Camera device hook for the onboarding camera setup step.
+ *
+ * This hook is responsible for:
+ * - requesting camera permission
+ * - listing available video input devices
+ * - remembering the selected camera in localStorage
+ * - opening a preview stream for the selected camera
+ * - stopping the preview stream when the camera step unmounts
+ *
+ * Important:
+ * This hook should only be used by the camera setup screen. When that screen is
+ * no longer mounted, the cleanup effect stops the camera so the webcam light
+ * turns off.
+ */
+
+import { useCallback, useEffect, useRef, useState } from "react";
type CameraStatus =
| "checking"
@@ -8,98 +23,193 @@ type CameraStatus =
| "permission-denied"
| "error";
+const SELECTED_CAMERA_KEY = "taskmaster:selectedCameraId";
+
export function useCameraDevices() {
const [cameras, setCameras] = useState
([]);
const [selectedCameraId, setSelectedCameraId] = useState("");
const [stream, setStream] = useState(null);
const [cameraStatus, setCameraStatus] = useState("checking");
- const SELECTED_CAMERA_KEY = "taskmaster:selectedCameraId";
+ /**
+ * Keep the active stream in a ref so cleanup functions can stop the latest
+ * stream without depending on React state timing.
+ */
+ const streamRef = useRef(null);
+
+ /**
+ * Stops the currently active camera stream.
+ *
+ * This is used when:
+ * - switching from one camera to another
+ * - leaving the camera setup step
+ * - cancelling an async camera request after the component unmounts
+ */
+ const stopCurrentStream = useCallback(() => {
+ if (!streamRef.current) {
+ return;
+ }
- async function selectCamera(cameraId: string): Promise {
+ streamRef.current.getTracks().forEach((track) => track.stop());
+ streamRef.current = null;
+ setStream(null);
+ }, []);
+
+ /**
+ * Updates the selected camera and persists the choice.
+ *
+ * The actual camera stream is opened by the selectedCameraId effect below.
+ */
+ const selectCamera = useCallback((cameraId: string): void => {
setSelectedCameraId(cameraId);
localStorage.setItem(SELECTED_CAMERA_KEY, cameraId);
- }
+ }, []);
- async function loadCameras(): Promise {
- try {
- setCameraStatus("checking");
- const permissionStream = await navigator.mediaDevices.getUserMedia({
- video: true,
- audio: false,
- });
+/**
+ * On mount, request permission and load the list of available cameras.
+ *
+ * getUserMedia is called first because some browsers/Electron builds do not
+ * reveal camera labels until permission has been granted.
+ */
+useEffect(() => {
+ let isCancelled = false;
+
+ async function detectCameras() {
+ try {
+ const permissionStream =
+ await navigator.mediaDevices.getUserMedia({
+ video: true,
+ audio: false,
+ });
+
+ /**
+ * This stream is only used to unlock permission/device labels.
+ * Stop it immediately because the selected camera preview is opened in
+ * the next effect.
+ */
+ permissionStream.getTracks().forEach((track) => track.stop());
+
+ const devices = await navigator.mediaDevices.enumerateDevices();
+ const videoDevices = devices.filter(
+ (device) => device.kind === "videoinput",
+ );
+
+ if (isCancelled) {
+ return;
+ }
+
+ setCameras(videoDevices);
+
+ if (videoDevices.length === 0) {
+ setCameraStatus("no-camera");
+ return;
+ }
+
+ const savedCameraId = localStorage.getItem(SELECTED_CAMERA_KEY);
+
+ const savedCameraStillExists = videoDevices.some(
+ (camera) => camera.deviceId === savedCameraId,
+ );
+
+ const cameraIdToUse =
+ savedCameraId && savedCameraStillExists
+ ? savedCameraId
+ : videoDevices[0].deviceId;
+
+ setSelectedCameraId(cameraIdToUse);
+ localStorage.setItem(SELECTED_CAMERA_KEY, cameraIdToUse);
+ } catch (error) {
+ console.error("Error accessing cameras:", error);
+
+ if (isCancelled) {
+ return;
+ }
+
+ if (
+ error instanceof DOMException &&
+ error.name === "NotAllowedError"
+ ) {
+ setCameraStatus("permission-denied");
+ } else {
+ setCameraStatus("error");
+ }
+ }
+ }
- permissionStream.getTracks().forEach((track) => track.stop());
+ void detectCameras();
- const devices = await navigator.mediaDevices.enumerateDevices();
- const videoDevices = devices.filter(
- (device) => device.kind === "videoinput",
- );
+ return () => {
+ isCancelled = true;
+ };
+ }, []);
+
+ /**
+ * Open the preview stream whenever the selected camera changes.
+ *
+ * Leaving the camera step unmounts the component, which triggers the final
+ * cleanup effect below and turns the camera off.
+ */
+ useEffect(() => {
+ if (!selectedCameraId) {
+ return;
+ }
- setCameras(videoDevices);
+ let isCancelled = false;
- if (videoDevices.length === 0) {
- setCameraStatus("no-camera");
- return;
- }
+ async function openSelectedCamera() {
+ try {
+ stopCurrentStream();
- const savedCameraId = localStorage.getItem(SELECTED_CAMERA_KEY);
+ const newStream = await navigator.mediaDevices.getUserMedia({
+ video: { deviceId: { exact: selectedCameraId } },
+ audio: false,
+ });
- const savedCameraStillExists = videoDevices.some(
- (camera) => camera.deviceId === savedCameraId,
- );
+ if (isCancelled) {
+ newStream.getTracks().forEach((track) => track.stop());
+ return;
+ }
- if (savedCameraId && savedCameraStillExists) {
- await selectCamera(savedCameraId);
- } else {
- await selectCamera(videoDevices[0].deviceId);
- }
- } catch (error) {
- console.error("Error accessing cameras:", error);
+ streamRef.current = newStream;
+ setStream(newStream);
+ setCameraStatus("connected");
+ } catch (error) {
+ console.error("Error starting camera:", error);
- if (error instanceof DOMException && error.name === "NotAllowedError") {
- setCameraStatus("permission-denied");
- } else {
- setCameraStatus("error");
+ if (!isCancelled) {
+ setCameraStatus("error");
+ }
}
}
- }
-
- async function startCamera(deviceId: string): Promise {
- try {
- if (stream) {
- stream.getTracks().forEach((track) => track.stop());
- }
- const newStream = await navigator.mediaDevices.getUserMedia({
- video: { deviceId: { exact: deviceId } },
- audio: false,
- });
+ void openSelectedCamera();
- setStream(newStream);
- setCameraStatus("connected");
- } catch (error) {
- console.error(error);
- setCameraStatus("error");
- }
- }
+ return () => {
+ isCancelled = true;
+ };
+ }, [selectedCameraId, stopCurrentStream]);
+ /**
+ * Final unmount cleanup.
+ *
+ * This is an important part for the onboarding flow:
+ * when the user leaves the camera setup step, the preview stream stops and
+ * the webcam is released.
+ */
useEffect(() => {
- loadCameras();
+ return () => {
+ if (streamRef.current) {
+ streamRef.current.getTracks().forEach((track) => track.stop());
+ }
+ };
}, []);
- useEffect(() => {
- if (selectedCameraId) {
- startCamera(selectedCameraId);
- }
- }, [selectedCameraId]);
-
return {
cameras,
selectedCameraId,
selectCamera,
stream,
- cameraStatus,
+ cameraStatus,
};
-}
+}
\ No newline at end of file
diff --git a/electron/src/renderer/hooks/useFocusEnvironmentSettings.ts b/electron/src/renderer/hooks/useFocusEnvironmentSettings.ts
new file mode 100644
index 0000000..73c4bdb
--- /dev/null
+++ b/electron/src/renderer/hooks/useFocusEnvironmentSettings.ts
@@ -0,0 +1,288 @@
+/**
+ * Shared focus environment settings hook.
+ *
+ * This hook owns the onboarding settings for:
+ * - selected main browser
+ * - whether the selected browser is blocked during focus sessions
+ * - detected desktop app rules
+ * - common browser activity rules
+ *
+ * UI components should stay mostly presentational and call this hook instead
+ * of owning local copies of the settings logic.
+ */
+
+
+import { useEffect, useState } from 'react'
+import { getDefaultBrowserOptions, getDefaultFocusApps, } from '../../shared/appDetection/commonApps.ts'
+import {
+ getDefaultBrowserActivityRules,
+ type BrowserActivityRule,
+ type BrowserActivityRuleStatus,
+} from '../../shared/browserActivity/commonBrowserActivityRules.ts'
+
+export type AppCategory = 'productivity' | 'distraction'
+export type AppRuleStatus = 'allowed' | 'blocked'
+
+export type FocusApp = {
+ id: string
+ name: string
+ category: AppCategory
+ status: AppRuleStatus
+}
+
+export type BrowserOption = {
+ id: string
+ name: string
+}
+
+export type FocusEnvironmentSettings = {
+ selectedBrowserId: string
+ blockSelectedBrowser: boolean
+ appRules: FocusApp[]
+ browserActivityRules: BrowserActivityRule[]
+}
+
+export type {
+ BrowserActivityRule,
+ BrowserActivityRuleStatus,
+} from '../../shared/browserActivity/commonBrowserActivityRules.ts'
+
+type DetectedCommonApp = {
+ id: string
+ displayName: string
+ category: 'productivity' | 'distraction' | 'browser'
+ executablePath: string
+ defaultStatus: 'allowed' | 'blocked'
+}
+
+const FOCUS_ENVIRONMENT_SETTINGS_KEY = 'taskmaster:focusEnvironmentSettings'
+
+const defaultBrowserOptions: BrowserOption[] = getDefaultBrowserOptions()
+const defaultFocusApps: FocusApp[] = getDefaultFocusApps()
+const defaultBrowserActivityRules: BrowserActivityRule[] = getDefaultBrowserActivityRules()
+
+/**
+ * Creates the fallback settings used before the real app detector returns data.
+ *
+ * Desktop apps can later be replaced by detected installed apps.
+ * Browser activity rules are static defaults because websites are not installed
+ * programs.
+ */
+function createDefaultSettings(): FocusEnvironmentSettings {
+ return {
+ selectedBrowserId: defaultBrowserOptions[0]?.id ?? '',
+ blockSelectedBrowser: false,
+ appRules: defaultFocusApps,
+ browserActivityRules: defaultBrowserActivityRules,
+ }
+}
+
+
+function loadFocusEnvironmentSettings(): FocusEnvironmentSettings | null {
+ const savedSettings = localStorage.getItem(FOCUS_ENVIRONMENT_SETTINGS_KEY)
+
+ if (!savedSettings) {
+ return null
+ }
+
+ try {
+ return JSON.parse(savedSettings) as FocusEnvironmentSettings
+ } catch {
+ localStorage.removeItem(FOCUS_ENVIRONMENT_SETTINGS_KEY)
+ return null
+ }
+}
+
+
+/**
+ * Narrows detected apps to desktop app rules.
+ *
+ * Browser apps are handled separately as browser options, so this prevents
+ * TypeScript from treating the category as "browser" after filtering.
+ */
+function isDetectedFocusApp(
+ app: DetectedCommonApp
+): app is DetectedCommonApp & { category: AppCategory } {
+ return app.category === 'productivity' || app.category === 'distraction'
+}
+// ====== \\
+
+
+function convertDetectedAppsToFocusApps(
+ detectedApps: DetectedCommonApp[]
+): FocusApp[] {
+ return detectedApps.filter(isDetectedFocusApp).map((app) => ({
+ id: app.id,
+ name: app.displayName,
+ category: app.category,
+ status: app.defaultStatus,
+ }))
+}
+
+function convertDetectedAppsToBrowserOptions(
+ detectedApps: DetectedCommonApp[]
+): BrowserOption[] {
+ return detectedApps
+ .filter((app) => app.category === 'browser')
+ .map((app) => ({
+ id: app.id,
+ name: app.displayName,
+ }))
+}
+
+export function useFocusEnvironmentSettings() {
+ const [hasSavedSettings] = useState(() => {
+ return loadFocusEnvironmentSettings() !== null
+ })
+
+ const [settings, setSettings] = useState(() => {
+ return loadFocusEnvironmentSettings() ?? createDefaultSettings()
+ })
+
+ const [browserOptions, setBrowserOptions] =
+ useState(defaultBrowserOptions)
+
+
+
+ /**
+ * On first load, ask Electron main process to detect installed desktop apps.
+ *
+ * This only runs when there are no saved settings, so the user's previous
+ * allowed/blocked choices are not overwritten.
+ */
+ useEffect(() => {
+ async function loadDetectedApps() {
+ if (hasSavedSettings) {
+ return
+ }
+
+ if (!window.taskmaster?.detectCommonApps) {
+ console.warn('Taskmaster preload API is not available')
+ return
+ }
+
+ const detectedApps = await window.taskmaster.detectCommonApps()
+
+ const detectedFocusApps = convertDetectedAppsToFocusApps(detectedApps)
+ const detectedBrowserOptions =
+ convertDetectedAppsToBrowserOptions(detectedApps)
+
+ if (detectedBrowserOptions.length > 0) {
+ setBrowserOptions(detectedBrowserOptions)
+ }
+
+ setSettings((currentSettings) => ({
+ ...currentSettings,
+ selectedBrowserId:
+ detectedBrowserOptions[0]?.id ?? currentSettings.selectedBrowserId,
+ appRules:
+ detectedFocusApps.length > 0
+ ? detectedFocusApps
+ : currentSettings.appRules,
+ }))
+ }
+
+ loadDetectedApps()
+ }, [hasSavedSettings])
+ // ===== \\
+
+ /**
+ * Derived desktop app groups for the desktop app whitelist UI.
+ */
+ const productivityApps = settings.appRules.filter(
+ (app) => app.category === 'productivity'
+ )
+
+ const distractionApps = settings.appRules.filter(
+ (app) => app.category === 'distraction'
+ )
+ // ===== \\
+
+ /**
+ * Browser activity groups shown by BrowserActivitySelectionStep.
+ *
+ * These are website/page rules, not installed desktop apps.
+ * AI tools are separated because they can be productive or distracting
+ * depending on the user's work.
+ */
+ const blockedBrowserActivityRules = settings.browserActivityRules.filter(
+ (rule) => rule.id !== 'ai-tools'
+ )
+
+ const flexibleBrowserActivityRules = settings.browserActivityRules.filter(
+ (rule) => rule.id === 'ai-tools'
+ )
+
+ const shouldSplitAppRules = settings.appRules.length > 6
+ // ===== \\
+
+ /**
+ * Setting update helpers used by onboarding UI components.
+ */
+ function setSelectedBrowserId(selectedBrowserId: string) {
+ setSettings((currentSettings) => ({
+ ...currentSettings,
+ selectedBrowserId,
+ }))
+ }
+
+ function setBlockSelectedBrowser(blockSelectedBrowser: boolean) {
+ setSettings((currentSettings) => ({
+ ...currentSettings,
+ blockSelectedBrowser,
+ }))
+ }
+
+ function updateAppStatus(appId: string, status: AppRuleStatus) {
+ setSettings((currentSettings) => ({
+ ...currentSettings,
+ appRules: currentSettings.appRules.map((app) =>
+ app.id === appId
+ ? {
+ ...app,
+ status,
+ }
+ : app
+ ),
+ }))
+ }
+
+ function updateBrowserActivityRuleStatus(
+ ruleId: string,
+ status: BrowserActivityRuleStatus
+ ) {
+ setSettings((currentSettings) => ({
+ ...currentSettings,
+ browserActivityRules: currentSettings.browserActivityRules.map((rule) =>
+ rule.id === ruleId
+ ? {
+ ...rule,
+ status,
+ }
+ : rule
+ ),
+ }))
+ }
+
+ function saveFocusEnvironmentSettings() {
+ localStorage.setItem(
+ FOCUS_ENVIRONMENT_SETTINGS_KEY,
+ JSON.stringify(settings)
+ )
+ }
+
+ return {
+ settings,
+ browserOptions,
+ productivityApps,
+ distractionApps,
+ shouldSplitAppRules,
+ setSelectedBrowserId,
+ setBlockSelectedBrowser,
+ updateAppStatus,
+ saveFocusEnvironmentSettings,
+ blockedBrowserActivityRules,
+ flexibleBrowserActivityRules,
+ updateBrowserActivityRuleStatus,
+ }
+}
\ No newline at end of file
diff --git a/electron/src/renderer/index.css b/electron/src/renderer/index.css
index 4000e00..36855e3 100644
--- a/electron/src/renderer/index.css
+++ b/electron/src/renderer/index.css
@@ -751,7 +751,7 @@ a {
display: grid;
align-content: start;
gap: var(--space-md);
- padding: clamp(var(--space-lg), 2.4vw, var(--space-xl));
+ padding: clamp(var(--space-lg), 2vw, var(--space-xl));
}
.allowed-environment-panel {
@@ -1021,6 +1021,15 @@ a {
.onboarding-fixed-actions .secondary-button {
flex: 1 1 0;
}
+
+ .browser-block-toggle {
+ max-width: none;
+ }
+
+ .focus-app-rules--split {
+ grid-template-columns: minmax(0, 1fr);
+ }
+
}
@@ -1039,4 +1048,225 @@ a {
.camera-status-dot--error {
background: var(--color-distracted);
+}
+
+
+
+
+/* app sections */
+
+.focus-app-rules {
+ display: grid;
+ gap: var(--space-md);
+}
+
+.focus-app-rules--split {
+ grid-template-columns: repeat(2, minmax(0, 1fr));
+ align-items: start;
+}
+
+.focus-app-rule-section {
+ display: grid;
+ align-content: start;
+ gap: var(--space-sm);
+ padding: 0.70rem;
+ border: 1px solid var(--color-border);
+ border-radius: var(--radius-md);
+ background: color-mix(in srgb, var(--color-bg-elevated) 48%, transparent);
+}
+
+.focus-app-rule-section-header {
+ display: grid;
+ gap: 0.12rem;
+}
+
+.focus-app-rule-section-header h2 {
+ margin: 0;
+ color: var(--color-text-main);
+ font-size: clamp(0.92rem, 0.86vw, 1rem);
+ font-weight: 850;
+}
+
+.focus-app-rule-section-header p {
+ margin: 0;
+ color: var(--color-text-muted);
+ font-size: clamp(0.72rem, 0.66vw, 0.8rem);
+ line-height: 1.35;
+}
+
+.focus-app-rule-list {
+ display: grid;
+ gap: 0.4rem;
+}
+
+.focus-app-rule-row {
+ display: grid;
+ min-height: 2.25rem;
+ grid-template-columns: minmax(0, 1fr) auto;
+ align-items: center;
+ gap: var(--space-sm);
+ padding: 0.30rem 0.48rem;
+ border: 1px solid color-mix(in srgb, var(--color-border) 76%, transparent);
+ border-radius: var(--radius-sm);
+ background: color-mix(in srgb, var(--color-bg-card) 66%, transparent);
+}
+
+.focus-app-rule-name {
+ overflow: hidden;
+ color: var(--color-text-main);
+ font-size: clamp(0.82rem, 0.76vw, 0.9rem);
+ font-weight: 760;
+ text-overflow: ellipsis;
+ white-space: nowrap;
+}
+
+/* Simple allow/block toggle */
+
+.focus-app-toggle {
+ position: relative;
+ display: inline-block;
+ width: 2.5rem;
+ height: 1.25rem;
+ border-radius: 999px;
+ cursor: pointer;
+}
+
+.focus-app-toggle input {
+ position: absolute;
+ inset: 0;
+ z-index: 2;
+ width: 100%;
+ height: 100%;
+ margin: 0;
+ opacity: 0;
+ cursor: pointer;
+}
+
+.focus-app-toggle span {
+ position: absolute;
+ inset: 0;
+ border: 1px solid color-mix(in srgb, var(--color-focused) 46%, var(--color-border));
+ border-radius: 999px;
+ background:
+ linear-gradient(
+ 90deg,
+ color-mix(in srgb, var(--color-focused) 34%, var(--color-bg-elevated)),
+ color-mix(in srgb, var(--color-focused) 18%, var(--color-bg-card))
+ );
+ transition:
+ background 220ms ease,
+ border-color 220ms ease,
+ box-shadow 220ms ease;
+}
+
+.focus-app-toggle span::before {
+ position: absolute;
+ top: 0.13rem;
+ left: 0.14rem;
+ width: 0.94rem;
+ height: 0.94rem;
+ content: "";
+ border-radius: 999px;
+ background: linear-gradient(145deg, #f6f2e8, #bfb8aa);
+ box-shadow: 0 0.22rem 0.48rem rgba(0, 0, 0, 0.36);
+ transition:
+ left 220ms cubic-bezier(0.22, 1, 0.36, 1),
+ transform 220ms cubic-bezier(0.22, 1, 0.36, 1);
+}
+
+.focus-app-toggle input:checked + span {
+ border-color: color-mix(in srgb, var(--color-distracted) 52%, var(--color-border));
+ background:
+ linear-gradient(
+ 90deg,
+ color-mix(in srgb, var(--color-distracted) 20%, var(--color-bg-card)),
+ color-mix(in srgb, var(--color-distracted) 40%, var(--color-bg-elevated))
+ );
+ box-shadow: 0 0 0.7rem color-mix(in srgb, var(--color-distracted) 14%, transparent);
+}
+
+.focus-app-toggle input:checked + span::before {
+ left: 1.32rem;
+}
+
+.focus-app-toggle input:focus-visible + span {
+ outline: 2px solid var(--color-accent-bright);
+ outline-offset: 3px;
+}
+
+
+/* browser activity rules */
+.browser-activity-rule-copy {
+ display: flex;
+ flex-direction: column;
+ gap: 0.25rem;
+}
+
+.browser-activity-rule-description {
+ color: var(--color-text-muted);
+ font-size: 0.82rem;
+ line-height: 1.35;
+}
+
+/* Browser activity onboarding step
+ This layout uses one column because website rules need more description text
+ than desktop app rules. */
+
+.browser-activity-panel {
+ max-width: 620px;
+}
+
+.browser-activity-rules {
+ display: flex;
+ flex-direction: column;
+ gap: 1rem;
+}
+
+.browser-activity-rule-section {
+ display: flex;
+ flex-direction: column;
+ gap: 0.75rem;
+}
+
+.browser-activity-rule-list {
+ display: flex;
+ flex-direction: column;
+ gap: 0.3rem;
+}
+
+.browser-activity-rule-row {
+ display: grid;
+ grid-template-columns: 1fr auto;
+ align-items: center;
+ min-height: 64px;
+ padding: 0.70rem 0.80rem;
+ border: 1px solid var(--color-border);
+ border-radius: 0.85rem;
+ background: rgba(255, 255, 255, 0.035);
+}
+
+.browser-activity-rule-copy {
+ display: flex;
+ flex-direction: column;
+ gap: 0rem;
+ min-width: 0;
+}
+
+.browser-activity-rule-description {
+ max-width: 440px;
+ color: var(--color-text-muted);
+ font-size: 0.82rem;
+ line-height: 1.35;
+ opacity: 0.80;
+}
+
+@media (max-width: 760px) {
+ .browser-activity-panel {
+ max-width: 100%;
+ }
+
+ .browser-activity-rule-row {
+ grid-template-columns: 1fr;
+ align-items: flex-start;
+ }
}
\ No newline at end of file
diff --git a/electron/src/renderer/pages/OnboardingPage.tsx b/electron/src/renderer/pages/OnboardingPage.tsx
index c2843c1..49f9709 100644
--- a/electron/src/renderer/pages/OnboardingPage.tsx
+++ b/electron/src/renderer/pages/OnboardingPage.tsx
@@ -1,9 +1,17 @@
+/**
+ * Main onboarding flow controller.
+ *
+ * This page owns the current onboarding step, transition direction, and screen
+ * animations. Individual onboarding screens should keep their own UI focused
+ * and receive only navigation callbacks from this page.
+ */
import { useEffect, useRef, useState } from 'react'
import CameraSetupStep from '../components/onboarding/OnboardingCameraSetup'
import DistractionOptionsStep from '../components/onboarding/OnboardingAdditionalFunctions'
import FocusEnvironmentStep from '../components/onboarding/WhitelistSelectionStep'
import MenuPage from './MenuPage'
import WelcomeStep from '../components/onboarding/OnboardingWelcome'
+import BrowserActivitySelectionStep from '../components/onboarding/BrowserActivitySelectionStep'
type Direction = 'forward' | 'backward'
@@ -12,6 +20,7 @@ const lightStateByStep = [
'top-right',
'top-left',
'ambient',
+ 'ambient',
'off',
] as const
@@ -48,6 +57,7 @@ export default function OnboardingPage() {
}, 700)
}
+
function renderStep(stepToRender: number) {
if (stepToRender === 0) {
return goToStep(1)} />
@@ -73,9 +83,18 @@ export default function OnboardingPage() {
if (stepToRender === 3) {
return (
- goToStep(2)}
- onFinish={() => goToStep(4)}
+ onContinue={() => goToStep(4)}
+ />
+ )
+ }
+
+ if (stepToRender === 4) {
+ return (
+ goToStep(3)}
+ onFinish={() => goToStep(5)}
/>
)
}
diff --git a/electron/src/renderer/vite-env.d.ts b/electron/src/renderer/vite-env.d.ts
new file mode 100644
index 0000000..c33d10f
--- /dev/null
+++ b/electron/src/renderer/vite-env.d.ts
@@ -0,0 +1,19 @@
+///
+
+type DetectedCommonApp = {
+ id: string
+ displayName: string
+ category: 'productivity' | 'distraction' | 'browser'
+ executablePath: string
+ defaultStatus: 'allowed' | 'blocked'
+}
+
+declare global {
+ interface Window {
+ taskmaster: {
+ detectCommonApps: () => Promise
+ }
+ }
+}
+
+export {}
\ No newline at end of file
diff --git a/electron/src/shared/appDetection/commonApps.ts b/electron/src/shared/appDetection/commonApps.ts
new file mode 100644
index 0000000..5459bfa
--- /dev/null
+++ b/electron/src/shared/appDetection/commonApps.ts
@@ -0,0 +1,164 @@
+/**
+ * Common desktop app catalogue used by Taskmaster onboarding.
+ *
+ * These definitions describe known Windows apps that Taskmaster can try to
+ * detect on the user's computer.
+ *
+ * Browser websites/pages do not belong here. Browser activity rules live in:
+ * shared/browserActivity/commonBrowserActivityRules.ts
+ */
+
+export type CommonAppCategory = 'productivity' | 'distraction' | 'browser'
+
+export type CommonAppDefinition = {
+ id: string
+ displayName: string
+ category: CommonAppCategory
+ executableNames: string[]
+ commonWindowsPaths: string[]
+ defaultStatus: 'allowed' | 'blocked'
+}
+
+export const COMMON_APPS: CommonAppDefinition[] = [
+ {
+ id: 'vscode',
+ displayName: 'Visual Studio Code',
+ category: 'productivity',
+ executableNames: ['Code.exe'],
+ commonWindowsPaths: [
+ '%LOCALAPPDATA%\\Programs\\Microsoft VS Code\\Code.exe',
+ '%PROGRAMFILES%\\Microsoft VS Code\\Code.exe',
+ '%PROGRAMFILES(X86)%\\Microsoft VS Code\\Code.exe',
+ ],
+ defaultStatus: 'allowed',
+ },
+ {
+ id: 'windows-terminal',
+ displayName: 'Windows Terminal',
+ category: 'productivity',
+ executableNames: ['WindowsTerminal.exe', 'wt.exe'],
+ commonWindowsPaths: [
+ '%LOCALAPPDATA%\\Microsoft\\WindowsApps\\wt.exe',
+ ],
+ defaultStatus: 'allowed',
+ },
+ {
+ id: 'notion',
+ displayName: 'Notion',
+ category: 'productivity',
+ executableNames: ['Notion.exe'],
+ commonWindowsPaths: [
+ '%LOCALAPPDATA%\\Programs\\Notion\\Notion.exe',
+ ],
+ defaultStatus: 'allowed',
+ },
+ {
+ id: 'chrome',
+ displayName: 'Google Chrome',
+ category: 'browser',
+ executableNames: ['chrome.exe'],
+ commonWindowsPaths: [
+ '%PROGRAMFILES%\\Google\\Chrome\\Application\\chrome.exe',
+ '%PROGRAMFILES(X86)%\\Google\\Chrome\\Application\\chrome.exe',
+ '%LOCALAPPDATA%\\Google\\Chrome\\Application\\chrome.exe',
+ ],
+ defaultStatus: 'allowed',
+ },
+ {
+ id: 'edge',
+ displayName: 'Microsoft Edge',
+ category: 'browser',
+ executableNames: ['msedge.exe'],
+ commonWindowsPaths: [
+ '%PROGRAMFILES(X86)%\\Microsoft\\Edge\\Application\\msedge.exe',
+ '%PROGRAMFILES%\\Microsoft\\Edge\\Application\\msedge.exe',
+ ],
+ defaultStatus: 'allowed',
+ },
+ {
+ id: 'opera-gx',
+ displayName: 'Opera GX',
+ category: 'browser',
+ executableNames: ['opera.exe', 'launcher.exe'],
+ commonWindowsPaths: [
+ '%LOCALAPPDATA%\\Programs\\Opera GX\\launcher.exe',
+ '%LOCALAPPDATA%\\Programs\\Opera GX\\opera.exe',
+ ],
+ defaultStatus: 'allowed',
+ },
+ {
+ id: 'discord',
+ displayName: 'Discord',
+ category: 'distraction',
+ executableNames: ['Discord.exe'],
+ commonWindowsPaths: [
+ '%LOCALAPPDATA%\\Discord\\Update.exe',
+ '%LOCALAPPDATA%\\Discord\\app-*\\Discord.exe',
+ ],
+ defaultStatus: 'blocked',
+ },
+ {
+ id: 'spotify',
+ displayName: 'Spotify',
+ category: 'distraction',
+ executableNames: ['Spotify.exe'],
+ commonWindowsPaths: [
+ '%APPDATA%\\Spotify\\Spotify.exe',
+ '%LOCALAPPDATA%\\Microsoft\\WindowsApps\\Spotify.exe',
+ ],
+ defaultStatus: 'blocked',
+ },
+ {
+ id: 'steam',
+ displayName: 'Steam',
+ category: 'distraction',
+ executableNames: ['steam.exe'],
+ commonWindowsPaths: [
+ '%PROGRAMFILES(X86)%\\Steam\\steam.exe',
+ '%PROGRAMFILES%\\Steam\\steam.exe',
+ ],
+ defaultStatus: 'blocked',
+ },
+]
+
+
+/**
+ * Converts the common app catalogue into the desktop app rules shown during
+ * onboarding before real detection results are available.
+ */
+export type DefaultFocusApp = {
+ id: string
+ name: string
+ category: 'productivity' | 'distraction'
+ status: 'allowed' | 'blocked'
+}
+
+export type DefaultBrowserOption = {
+ id: string
+ name: string
+}
+
+export function getDefaultFocusApps(): DefaultFocusApp[] {
+ return COMMON_APPS
+ .filter((app) => app.category !== 'browser')
+ .map((app) => ({
+ id: app.id,
+ name: app.displayName,
+ category: app.category as 'productivity' | 'distraction',
+ status: app.defaultStatus,
+ }))
+}
+
+
+/**
+ * Converts detected/common browser apps into options for the main browser
+ * dropdown in onboarding.
+ */
+export function getDefaultBrowserOptions(): DefaultBrowserOption[] {
+ return COMMON_APPS
+ .filter((app) => app.category === 'browser')
+ .map((app) => ({
+ id: app.id,
+ name: app.displayName,
+ }))
+}
\ No newline at end of file
diff --git a/electron/src/shared/browserActivity/commonBrowserActivityRules.ts b/electron/src/shared/browserActivity/commonBrowserActivityRules.ts
new file mode 100644
index 0000000..cb6f8ce
--- /dev/null
+++ b/electron/src/shared/browserActivity/commonBrowserActivityRules.ts
@@ -0,0 +1,112 @@
+/**
+ * Common browser activity rules used during onboarding.
+ *
+ * These rules are not installed programs. They are common website/page patterns
+ * that Taskmaster can later match against the active browser window title.
+ *
+ * Example:
+ * - Active window title: "YouTube - Google Chrome"
+ * - Rule matchText: ["youtube"]
+ *
+ * Later, a browser extension can replace this weak title-matching approach
+ * with accurate tab URL detection.
+ */
+
+export type BrowserActivityRuleStatus = 'allowed' | 'blocked' | 'ignored'
+
+export type BrowserActivityCategory =
+ | 'entertainment'
+ | 'music'
+ | 'messaging'
+ | 'communication'
+ | 'ai'
+ | 'social'
+ | 'shopping'
+
+export type BrowserActivityRule = {
+ id: string
+ label: string
+ description: string
+ matchText: string[]
+ category: BrowserActivityCategory
+ status: BrowserActivityRuleStatus
+}
+
+export const COMMON_BROWSER_ACTIVITY_RULES: BrowserActivityRule[] = [
+ {
+ id: 'youtube',
+ label: 'YouTube',
+ description: 'Videos, recommendations, shorts, and general browsing.',
+ matchText: ['youtube'],
+ category: 'entertainment',
+ status: 'blocked',
+ },
+ {
+ id: 'youtube-music',
+ label: 'YouTube Music',
+ description: 'Music streaming through YouTube Music.',
+ matchText: ['music.youtube', 'youtube music'],
+ category: 'music',
+ status: 'blocked',
+ },
+ {
+ id: 'spotify-web',
+ label: 'Spotify Web',
+ description: 'Spotify in the browser.',
+ matchText: ['spotify'],
+ category: 'music',
+ status: 'blocked',
+ },
+ {
+ id: 'whatsapp-web',
+ label: 'WhatsApp Web',
+ description: 'Messaging through WhatsApp in the browser.',
+ matchText: ['whatsapp'],
+ category: 'messaging',
+ status: 'blocked',
+ },
+ {
+ id: 'email',
+ label: 'Email',
+ description: 'Gmail, Outlook, Yahoo Mail, Proton Mail, and similar inboxes.',
+ matchText: ['gmail', 'outlook', 'yahoo mail', 'proton mail'],
+ category: 'communication',
+ status: 'blocked',
+ },
+ {
+ id: 'streaming',
+ label: 'Streaming services',
+ description: 'Netflix, Disney+, Prime Video, Crunchyroll, Apple TV, etc.',
+ matchText: ['netflix', 'disney+', 'prime video', 'crunchyroll', 'apple tv'],
+ category: 'entertainment',
+ status: 'blocked',
+ },
+ {
+ id: 'ai-tools',
+ label: 'AI tools',
+ description: 'ChatGPT, Claude, Gemini, Perplexity, and similar tools.',
+ matchText: ['chatgpt', 'claude', 'gemini', 'perplexity'],
+ category: 'ai',
+ status: 'allowed',
+ },
+ {
+ id: 'social-media',
+ label: 'Social media',
+ description: 'Instagram, TikTok, Facebook, Reddit, X, and similar sites.',
+ matchText: ['instagram', 'tiktok', 'facebook', 'reddit', 'x.com'],
+ category: 'social',
+ status: 'blocked',
+ },
+ {
+ id: 'shopping',
+ label: 'Shopping',
+ description: 'Amazon, eBay, AliExpress, marketplace browsing, and similar.',
+ matchText: ['amazon', 'ebay', 'aliexpress', 'marketplace'],
+ category: 'shopping',
+ status: 'blocked',
+ },
+]
+
+export function getDefaultBrowserActivityRules(): BrowserActivityRule[] {
+ return COMMON_BROWSER_ACTIVITY_RULES
+}
\ No newline at end of file
diff --git a/electron/src/shared/protocol.ts b/electron/src/shared/protocol.ts
deleted file mode 100644
index 9f37611..0000000
--- a/electron/src/shared/protocol.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-// TypeScript types for IPC messages (renderer ↔ main) and WebSocket events (main ↔ Python).
-// Single source of truth so neither side drifts out of sync.
\ No newline at end of file
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..76cd2f2
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,93 @@
+# Taskmaster — Python CV Worker
+
+The computer-vision backend. It owns the webcam, runs the detectors
+(phone now, gaze next), and will stream detection events to the Electron
+app over a WebSocket.
+
+## Requirements
+
+- **Python 3.11** — MediaPipe has no wheels for 3.13/3.14, so the venv
+ must be built with `python3.11`.
+- Dependencies live in [`requirements.txt`](requirements.txt).
+
+## Setup
+
+From the repo root, the easiest path is `./setup.sh`. To do just the
+Python side manually:
+
+```bash
+cd python
+python3.11 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+
+The venv lives at `python/.venv` and is gitignored.
+
+## Running
+
+```bash
+source .venv/bin/activate
+python cv/detection_loop.py # Ctrl+C to stop
+```
+
+This opens the webcam, samples ~10 frames/sec, runs the phone detector on
+each frame, and prints the result. The camera is always released cleanly
+on exit.
+
+## Module layout
+
+```
+python/
+├── main.py # FastAPI + WebSocket server (not implemented yet)
+├── models/ # detection model files (gitignored; fetched by setup.sh)
+│ └── yolox_s.onnx # YOLOX-S phone detector (Apache-2.0)
+└── cv/
+ ├── camera.py # owns the webcam handle: start / read / stop
+ ├── detection_loop.py # the loop: grab frame -> run detectors -> emit result
+ ├── phone_detector.py # detect_phone(frame) -> event dict (YOLOX via onnxruntime)
+ ├── phone_detect_test.py# manual visual test: draws boxes on the webcam feed
+ └── gaze_detector.py # gaze/face detection (planned)
+```
+
+### Design: why `camera.py` and `detection_loop.py` are separate
+
+Each module should have **one reason to change**:
+
+- `camera.py` is a **resource owner** — it only cares about the webcam
+ hardware. It changes when capture concerns change.
+- `detection_loop.py` is **orchestration/policy** — sampling rate, which
+ detectors run, what happens to results. It changes when the detection
+ pipeline changes.
+
+The dependency arrow points one way: `detection_loop` imports `camera` and
+the detectors; `camera` knows nothing about detection. This keeps the
+camera reusable (onboarding preview, calibration) and lets each piece be
+tested on its own.
+
+## Detection event shape
+
+Every detector returns a dict matching the WebSocket protocol in
+[`PLAN.md`](../PLAN.md):
+
+```python
+{ "type": "phone", "status": "none" | "detected",
+ "confidence": float, "timestamp": int } # timestamp = ms since epoch
+```
+
+### Phone detection
+
+`phone_detector.detect_phone()` runs **YOLOX-S** (general COCO detector,
+Apache-2.0) locally via **onnxruntime** (MIT), and reports the `cell phone`
+class. Both are permissively licensed and bundle into a shipped app — no
+PyTorch, no AGPL (unlike Ultralytics YOLO).
+
+- `find_phones(frame)` → list of `(x1, y1, x2, y2, score)` boxes (perception).
+- `detect_phone(frame)` → the protocol event above.
+
+The detector is **perception only** — it answers "is there a phone in this
+frame?". Turning that into a *distracted* state (phone visible for N seconds)
+is policy that belongs in the loop/state layer, not here.
+
+The model file (`models/yolox_s.onnx`, ~34 MB) is gitignored and downloaded
+by `setup.sh`.
diff --git a/python/cv/detection_loop.py b/python/cv/detection_loop.py
new file mode 100644
index 0000000..b5a9fb5
--- /dev/null
+++ b/python/cv/detection_loop.py
@@ -0,0 +1,90 @@
+"""Detection loop — the heartbeat that ties the camera to the detectors.
+
+This module owns the repeating cycle:
+
+ grab a frame -> run the phone detector on it -> hand off the result
+
+For now it just prints each result so you can watch the pipeline work. Later
+the same loop will push results over the WebSocket to the Electron app instead
+of printing them, and it will also call the gaze detector alongside the phone
+detector.
+
+Run it directly to try it out (camera light should turn on):
+
+ cd python
+ source .venv/bin/activate
+ python cv/detection_loop.py
+
+Press Ctrl+C to stop — the camera is always released cleanly on the way out.
+"""
+
+# time — used to pace the loop so we don't pin the CPU at 100%.
+import time
+
+# Sibling modules in this same cv/ folder. When you run this file as a script,
+# Python puts this folder on the import path, so these plain imports resolve.
+import camera
+import phone_detector
+
+
+# How many times per second we sample the webcam. 10 fps is plenty for
+# detecting something as slow as "is the user holding a phone", and it keeps
+# CPU usage low. Derived sleep below is 1 / this.
+FRAMES_PER_SECOND = 10
+_SECONDS_PER_FRAME = 1.0 / FRAMES_PER_SECOND
+
+
+def run_detection_loop(camera_device_index: int = 0) -> None:
+ """Open the camera and run the detect-on-every-frame loop until stopped.
+
+ camera_device_index = 0 means the default webcam (see camera.py).
+ The loop runs forever; stop it with Ctrl+C (KeyboardInterrupt).
+ """
+
+ # Turn the webcam on. Raises if no camera is available, so if we get past
+ # this line we know frames should be coming.
+ camera.start_camera(camera_device_index)
+ print(f"[detection_loop] camera started — sampling at {FRAMES_PER_SECOND} fps")
+ print("[detection_loop] press Ctrl+C to stop\n")
+
+ try:
+ # The main loop. Keep going until the user interrupts us.
+ while True:
+ # 1) Grab the most recent frame from the webcam. May be None if a
+ # single read failed (driver hiccup) — the detector handles that.
+ current_frame = camera.read_current_frame()
+
+ # 2) Ask the phone detector what it sees in this frame.
+ phone_result = phone_detector.detect_phone(current_frame)
+
+ # 3) For now, just print it. Later: send over WebSocket instead.
+ print(_format_result_for_console(phone_result))
+
+ # 4) Wait a beat so we sample at roughly FRAMES_PER_SECOND, not as
+ # fast as the CPU can spin.
+ time.sleep(_SECONDS_PER_FRAME)
+
+ except KeyboardInterrupt:
+ # Ctrl+C lands here. Not an error — it's the normal way to stop.
+ print("\n[detection_loop] stop requested")
+
+ finally:
+ # Whatever happens (normal stop OR a crash), always free the webcam so
+ # other apps — and the next run — can use it.
+ camera.stop_camera()
+ print("[detection_loop] camera released — bye")
+
+
+def _format_result_for_console(result: dict) -> str:
+ """Turn a detection dict into a short, readable one-line string."""
+ return (
+ f"phone={result['status']:<8} "
+ f"confidence={result['confidence']:.2f} "
+ f"t={result['timestamp']}"
+ )
+
+
+# Standard Python entry-point guard: this block only runs when the file is
+# executed directly (python cv/detection_loop.py), not when it is imported.
+if __name__ == "__main__":
+ run_detection_loop()
diff --git a/python/cv/phone_detect_test.py b/python/cv/phone_detect_test.py
new file mode 100644
index 0000000..804c39d
--- /dev/null
+++ b/python/cv/phone_detect_test.py
@@ -0,0 +1,58 @@
+"""Manual visual test for the phone detector.
+
+Opens the webcam, runs phone_detector.find_phones() on each frame, and draws
+a red box + confidence around any phone it sees. Hold your phone up — it
+should light up. This is just a visualiser; all the detection logic lives in
+phone_detector.py.
+
+Run it:
+ cd python
+ source .venv/bin/activate
+ python cv/phone_detect_test.py
+
+Press 'q' (video window focused) or Ctrl+C to quit.
+"""
+
+import cv2 # type: ignore
+
+import phone_detector
+
+
+def main() -> None:
+ capture = cv2.VideoCapture(0)
+ if not capture.isOpened():
+ raise RuntimeError("Could not open webcam (index 0).")
+
+ print("Running. Hold a phone up. Press 'q' or Ctrl+C to quit.\n")
+
+ try:
+ while True:
+ ok, frame = capture.read()
+ if not ok:
+ continue
+
+ phones = phone_detector.find_phones(frame)
+
+ for x1, y1, x2, y2, score in phones:
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
+ cv2.putText(
+ frame, f"PHONE {score:.2f}", (x1, max(y1 - 8, 12)),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2,
+ )
+
+ print("PHONE DETECTED" if phones else "...", end="\r", flush=True)
+
+ cv2.imshow("phone detector test (press q to quit)", frame)
+ if cv2.waitKey(1) & 0xFF == ord("q"):
+ break
+
+ except KeyboardInterrupt:
+ pass
+ finally:
+ capture.release()
+ cv2.destroyAllWindows()
+ print("\nstopped.")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/python/cv/phone_detector.py b/python/cv/phone_detector.py
new file mode 100644
index 0000000..7e549aa
--- /dev/null
+++ b/python/cv/phone_detector.py
@@ -0,0 +1,306 @@
+"""Phone-in-frame detection.
+
+Perception only: given one webcam frame, decide whether a phone is visible.
+Whether being on the phone *counts as distracted* (for how long, etc.) is
+policy that lives elsewhere — this module just answers "phone in this frame?".
+
+Backed by YOLOX-S (Apache-2.0) running locally via onnxruntime (MIT). Both
+are permissively licensed and bundle cleanly into a shipped app — no PyTorch,
+no AGPL.
+
+Public API:
+ find_phones(frame) -> list[(x1, y1, x2, y2, score)] # raw boxes
+ detect_phone(frame) -> dict # protocol event
+
+The event dict matches the WebSocket protocol in PLAN.md:
+ { "type": "phone", "status": "none"|"detected",
+ "confidence": float, "timestamp": int }
+"""
+
+import os
+import time
+
+import cv2
+import numpy as np
+import onnxruntime as ort
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+# Model path resolved relative to this file (python/cv/), so it works no
+# matter which directory the program is launched from.
+_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+MODEL_PATH = os.path.join(_THIS_DIR, "..", "models", "yolox_s.onnx")
+
+# YOLOX-S input resolution.
+INPUT_SIZE = (640, 640)
+
+# COCO class index for "cell phone" (class 67 of the model's 80).
+CELL_PHONE_CLASS_ID = 67
+
+# Minimum combined score (objectness * class prob) to believe a detection.
+SCORE_THRESHOLD = 0.30
+
+# IoU threshold for non-max suppression (drops duplicate overlapping boxes).
+NMS_IOU_THRESHOLD = 0.45
+
+
+# ---------------------------------------------------------------------------
+# Module-level state: the model is loaded once and reused for every frame.
+# Loading on every call would be unbearably slow.
+# ---------------------------------------------------------------------------
+
+_session: ort.InferenceSession | None = None
+
+
+def _get_session() -> ort.InferenceSession:
+ """Lazily create (and cache) the onnxruntime session."""
+ global _session
+ if _session is None:
+ if not os.path.exists(MODEL_PATH):
+ raise FileNotFoundError(
+ f"YOLOX model not found at {MODEL_PATH}. "
+ "Run ./setup.sh (or download yolox_s.onnx into python/models/)."
+ )
+ _session = ort.InferenceSession(
+ MODEL_PATH, providers=["CPUExecutionProvider"]
+ )
+ return _session
+
+
+# ---------------------------------------------------------------------------
+# Pre/post-processing
+# ---------------------------------------------------------------------------
+
+
+def _preprocess(frame_bgr: np.ndarray):
+ """Resize a webcam frame into the exact input the YOLOX model wants.
+
+ The model only accepts a (1, 3, 640, 640) float32 array. A webcam frame is
+ a different size and shape, so we convert it here. We shrink the frame to
+ fit a 640x640 box WITHOUT stretching it, then pad the leftover space with
+ grey (this is called "letterboxing", like the bars around a widescreen
+ movie). Stretching would distort the phone and hurt detection.
+
+ Returns:
+ input_tensor: the (1, 3, 640, 640) float32 array for the model.
+ scale_ratio: how much we shrank by, so detected boxes can later be
+ scaled back up onto the original full-size frame.
+ """
+
+ # 1. Measure the original webcam frame. A frame's shape is (height, width,
+ # channels), so shape[0] is height and shape[1] is width.
+ original_height = frame_bgr.shape[0]
+ original_width = frame_bgr.shape[1]
+
+ # 2. The size the model wants (640 x 640).
+ target_height = INPUT_SIZE[0]
+ target_width = INPUT_SIZE[1]
+
+ # 3. Work out how much to shrink. We need one scale factor that fits BOTH
+ # sides inside 640. Taking the smaller of the two keeps the aspect ratio
+ # and guarantees neither side spills past 640.
+ height_scale = target_height / original_height
+ width_scale = target_width / original_width
+ scale_ratio = min(height_scale, width_scale)
+
+ # 4. The frame's new size after shrinking by that factor.
+ resized_width = int(original_width * scale_ratio)
+ resized_height = int(original_height * scale_ratio)
+
+ # 5. Actually shrink the frame to that size.
+ # Note: cv2.resize takes the size as (width, height) — width first.
+ resized_frame = cv2.resize(
+ frame_bgr,
+ (resized_width, resized_height),
+ interpolation=cv2.INTER_LINEAR,
+ )
+
+ # 6. Make a blank 640x640 grey canvas (every pixel = 114, a mid grey),
+ # then paste the shrunk frame into its top-left corner. The rest stays
+ # grey — that grey padding is the "letterbox".
+ grey_value = 114
+ letterboxed = np.full(
+ (target_height, target_width, 3), grey_value, dtype=np.uint8
+ )
+ letterboxed[:resized_height, :resized_width] = resized_frame
+
+ # 7. Reorder the axes from (height, width, channels) to (channels, height,
+ # width). OpenCV stores colour last; the model wants colour first.
+ channels_first = letterboxed.transpose(2, 0, 1)
+
+ # 8. Add a "batch" dimension at the front: (3, 640, 640) -> (1, 3, 640,
+ # 640). Models process a batch of images at once; our batch is 1 image.
+ batched = channels_first[np.newaxis, :, :, :]
+
+ # 9. Convert the pixels from whole numbers to decimals (float32), the
+ # number type the model computes in. We do NOT divide by 255 here —
+ # YOLOX expects raw 0-255 values, unlike many other models.
+ input_tensor = batched.astype(np.float32)
+
+ # 10. transpose/newaxis above only relabelled the data without moving it,
+ # leaving it scattered in memory. onnxruntime needs it laid out in one
+ # contiguous block, so make a tidy packed copy.
+ input_tensor = np.ascontiguousarray(input_tensor)
+
+ return input_tensor, scale_ratio
+
+
+def _decode(raw: np.ndarray) -> np.ndarray:
+ """Turn the model's raw grid output into real pixel coordinates.
+
+ The model does NOT output ready-to-use boxes. It mentally splits the
+ 640x640 image into grids of cells at three zoom levels (8, 16, and 32
+ pixels per cell) and, for each cell, predicts a box as an OFFSET from that
+ cell's position. To get real pixel coordinates we add each cell's position
+ back and multiply by its stride (its pixels-per-cell). That is "decoding".
+
+ `raw` has shape (8400, 85): 8400 candidate boxes, each row is
+ [x_offset, y_offset, width_raw, height_raw, objectness, 80 class scores].
+ """
+
+ # For every one of the 8400 rows we need two matching facts:
+ # - which grid cell it came from (its column, row position)
+ # - the stride (pixels-per-cell) of the grid it belongs to
+ all_cell_positions = []
+ all_cell_strides = []
+
+ # Three grids, fine to coarse. Stride 8 -> 80x80 cells, stride 16 -> 40x40,
+ # stride 32 -> 20x20. 80*80 + 40*40 + 20*20 = 8400, matching raw's rows.
+ for stride in (8, 16, 32):
+ cells_across = INPUT_SIZE[1] // stride # number of columns
+ cells_down = INPUT_SIZE[0] // stride # number of rows
+
+ # Build the (column, row) index of every cell in this grid.
+ column_index, row_index = np.meshgrid(
+ np.arange(cells_across), np.arange(cells_down)
+ )
+ cell_positions = np.stack((column_index, row_index), axis=2).reshape(-1, 2)
+ all_cell_positions.append(cell_positions)
+
+ # Every cell in this grid shares the same stride.
+ cell_count = cell_positions.shape[0]
+ all_cell_strides.append(np.full((cell_count, 1), stride))
+
+ # Glue the three grids into one long list lined up with raw's 8400 rows.
+ cell_positions = np.concatenate(all_cell_positions, axis=0)
+ cell_strides = np.concatenate(all_cell_strides, axis=0)
+
+ # Box center: real_xy = (predicted_offset + cell_position) * stride
+ raw[:, 0:2] = (raw[:, 0:2] + cell_positions) * cell_strides
+
+ # Box size: real_wh = exp(predicted) * stride
+ # (exp keeps width/height positive whatever the model outputs.)
+ raw[:, 2:4] = np.exp(raw[:, 2:4]) * cell_strides
+
+ return raw
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def find_phones(frame) -> list:
+ """Detect phones in one frame.
+
+ Returns a list of (x1, y1, x2, y2, score) tuples in the frame's pixel
+ coordinates — empty if no phone is found or the frame is None.
+ """
+ # No frame (camera hiccup) -> nothing to detect.
+ if frame is None:
+ return []
+
+ # 1. Load the model (once) and turn the frame into model input.
+ session = _get_session()
+ input_tensor, scale_ratio = _preprocess(frame)
+
+ # 2. Run the model. It returns a list of outputs; we take the first output
+ # and the first (only) image in the batch -> shape (8400, 85).
+ # np.asarray pins the type to a normal array — the library's type hint
+ # says run() *might* return a non-indexable SparseTensor, which YOLOX
+ # never does — which keeps the type-checker quiet.
+ model_outputs = session.run(None, {session.get_inputs()[0].name: input_tensor})
+ raw_predictions = np.asarray(model_outputs[0])[0]
+
+ # 3. Decode the raw grid output into real pixel boxes (center x/y, w, h).
+ predictions = _decode(raw_predictions)
+
+ # 4. For each of the 8400 candidates, compute how confident we are it is a
+ # PHONE: objectness (is anything here at all?) x phone-class probability.
+ # Column 4 is objectness; column 5 + 67 is the phone class score.
+ objectness = predictions[:, 4]
+ phone_class_score = predictions[:, 5 + CELL_PHONE_CLASS_ID]
+ phone_confidence = objectness * phone_class_score
+
+ # 5. Keep only candidates above our confidence bar. If none survive, there
+ # is no phone in this frame.
+ is_confident_phone = phone_confidence > SCORE_THRESHOLD
+ if not np.any(is_confident_phone):
+ return []
+
+ kept_boxes = predictions[is_confident_phone, :4] # each row: cx, cy, w, h
+ kept_scores = phone_confidence[is_confident_phone]
+
+ # 6. Convert each box from (center_x, center_y, width, height) to corner
+ # form (x1, y1, x2, y2), and divide by scale_ratio to map it from the
+ # 640x640 model space back onto the original full-size frame.
+ center_x = kept_boxes[:, 0]
+ center_y = kept_boxes[:, 1]
+ width = kept_boxes[:, 2]
+ height = kept_boxes[:, 3]
+
+ corner_boxes = np.empty_like(kept_boxes)
+ corner_boxes[:, 0] = (center_x - width / 2) / scale_ratio # x1 (left)
+ corner_boxes[:, 1] = (center_y - height / 2) / scale_ratio # y1 (top)
+ corner_boxes[:, 2] = (center_x + width / 2) / scale_ratio # x2 (right)
+ corner_boxes[:, 3] = (center_y + height / 2) / scale_ratio # y2 (bottom)
+
+ # 7. The model often fires several overlapping boxes for one phone.
+ # Non-Max Suppression keeps the strongest box and drops its duplicates.
+ # cv2.dnn.NMSBoxes wants each box as [x, y, width, height].
+ boxes_for_nms = []
+ for box in corner_boxes:
+ x1, y1, x2, y2 = box
+ boxes_for_nms.append([int(x1), int(y1), int(x2 - x1), int(y2 - y1)])
+
+ kept_indices = cv2.dnn.NMSBoxes(
+ boxes_for_nms, kept_scores.tolist(), SCORE_THRESHOLD, NMS_IOU_THRESHOLD
+ )
+
+ # 8. Build the final list of surviving phones in original-frame pixels.
+ phones = []
+ for index in np.array(kept_indices).flatten():
+ x1, y1, x2, y2 = corner_boxes[index].astype(int)
+ score = float(kept_scores[index])
+ phones.append((int(x1), int(y1), int(x2), int(y2), score))
+ return phones
+
+
+def detect_phone(frame) -> dict:
+ """High-level per-frame phone event, shaped for the WebSocket protocol.
+
+ status is "detected" if any phone is found, else "none". confidence is
+ the strongest phone score in the frame (0.0 when none).
+ """
+ timestamp_ms = int(time.time() * 1000)
+ phones = find_phones(frame)
+
+ if not phones:
+ return _build_result("none", 0.0, timestamp_ms)
+
+ # Each phone tuple is (x1, y1, x2, y2, score); we only want the score (idx 4).
+ best_score = max(phone[4] for phone in phones)
+ return _build_result("detected", best_score, timestamp_ms)
+
+
+def _build_result(status: str, confidence: float, timestamp_ms: int) -> dict:
+ """Assemble the protocol-shaped result dict in one place."""
+ return {
+ "type": "phone",
+ "status": status,
+ "confidence": confidence,
+ "timestamp": timestamp_ms,
+ }
diff --git a/python/docker_README.md b/python/docker_README.md
new file mode 100644
index 0000000..c1fcacd
--- /dev/null
+++ b/python/docker_README.md
@@ -0,0 +1,121 @@
+# Taskmaster CV Worker
+
+Computer vision worker responsible for phone detection and future focus monitoring features.
+
+## Option 1: Docker (recommended)
+
+No local Python setup required.
+
+Build the image:
+
+```bash
+docker build -t taskmaster-cv-worker ./python
+```
+
+Run image-based detection:
+
+```bash
+docker run --rm taskmaster-cv-worker python cv/phone_image_test.py test_assets/phone_sample.jpg
+```
+
+Expected output:
+
+```text
+{'type': 'phone', 'status': 'detected', ...}
+```
+
+This uses a sample image and does not require a webcam.
+
+---
+
+## Option 2: Local development
+
+Recommended for webcam testing.
+
+Create a virtual environment:
+
+```bash
+cd python
+python3.11 -m venv .venv
+```
+
+Activate it:
+
+Linux/macOS:
+
+```bash
+source .venv/bin/activate
+```
+
+Windows:
+
+```powershell
+.\.venv\Scripts\Activate.ps1
+```
+
+Install dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+Download the model:
+
+```bash
+./setup.sh
+```
+
+Run webcam test:
+
+```bash
+python cv/phone_detect_test.py
+```
+
+Run detection loop:
+
+```bash
+python cv/detection_loop.py
+```
+
+---
+
+## Install Docker
+
+### Windows / macOS
+
+Download Docker Desktop:
+
+https://www.docker.com/products/docker-desktop/
+
+Verify installation:
+
+```bash
+docker --version
+docker run hello-world
+```
+
+### Ubuntu
+
+```bash
+sudo apt update
+sudo apt install -y docker.io
+sudo systemctl enable docker
+sudo systemctl start docker
+```
+
+Verify installation:
+
+```bash
+docker --version
+docker run hello-world
+```
+
+---
+
+## Notes
+
+- Python 3.11 is required.
+- The YOLOX-S model is not committed to Git.
+- Docker downloads the model during image build.
+- Docker is intended for environment consistency and automated testing.
+- Webcam testing is currently easier to perform locally.
\ No newline at end of file
diff --git a/python/requirements.txt b/python/requirements.txt
new file mode 100644
index 0000000..a5293d1
--- /dev/null
+++ b/python/requirements.txt
@@ -0,0 +1,19 @@
+# Taskmaster — Python CV worker dependencies
+# Built/tested on Python 3.11 (MediaPipe has no wheels for 3.13/3.14 yet).
+#
+# Installed for you by ./setup.sh. To do it manually:
+# cd python
+# python3.11 -m venv .venv
+# source .venv/bin/activate
+# pip install -r requirements.txt
+
+# --- Computer vision (needed now: phone + gaze detection) ---
+opencv-python>=4.9 # webcam capture + image ops
+mediapipe>=0.10.9 # face mesh for gaze detection (planned)
+onnxruntime>=1.17 # runs the YOLOX phone-detection model (local, MIT)
+numpy>=1.26,<2.0 # array math; pinned <2 for MediaPipe compatibility
+
+# --- WebSocket server (needed later: Electron <-> Python bridge) ---
+fastapi>=0.110 # app + routing for the detection-event server
+uvicorn[standard]>=0.27 # ASGI server that runs FastAPI
+websockets>=12.0 # WebSocket transport to the Electron main process
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 0000000..9005fe0
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+#
+# Taskmaster one-shot setup.
+# Installs BOTH halves of the app:
+# 1. Python CV worker -> venv at python/.venv + pip deps from python/requirements.txt
+# 2. Electron app -> npm deps in electron/
+#
+# Usage:
+# ./setup.sh
+#
+# Re-runnable: safe to run again; it reuses an existing venv and npm cache.
+
+# Stop immediately if any command fails, and treat unset vars as errors.
+set -euo pipefail
+
+# Always operate relative to this script's own location, no matter where it
+# is called from.
+PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$PROJECT_ROOT"
+
+# ---------------------------------------------------------------------------
+# 1. Python CV worker
+# ---------------------------------------------------------------------------
+echo "==> [1/2] Python CV worker"
+
+# MediaPipe has no wheels for Python 3.13/3.14, so we pin to 3.11 explicitly.
+if ! command -v python3.11 >/dev/null 2>&1; then
+ echo "ERROR: python3.11 not found. Install it (e.g. 'brew install python@3.11') and re-run." >&2
+ exit 1
+fi
+
+# Create the venv only if it does not already exist.
+if [ ! -d "python/.venv" ]; then
+ echo " creating venv at python/.venv (Python 3.11)"
+ python3.11 -m venv python/.venv
+else
+ echo " reusing existing venv at python/.venv"
+fi
+
+# Install dependencies into the venv using its own pip (no need to 'activate').
+echo " installing Python dependencies"
+python/.venv/bin/pip install --upgrade pip
+python/.venv/bin/pip install -r python/requirements.txt
+
+# Download the phone-detection model (YOLOX-S, Apache-2.0). It is gitignored
+# (~34 MB), so a fresh clone needs to fetch it once.
+PHONE_MODEL="python/models/yolox_s.onnx"
+if [ ! -f "$PHONE_MODEL" ]; then
+ echo " downloading phone-detection model (YOLOX-S)"
+ mkdir -p python/models
+ curl -sSL -o "$PHONE_MODEL" \
+ "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx"
+else
+ echo " phone-detection model already present"
+fi
+
+# ---------------------------------------------------------------------------
+# 2. Electron app
+# ---------------------------------------------------------------------------
+echo "==> [2/2] Electron app"
+
+if ! command -v npm >/dev/null 2>&1; then
+ echo "ERROR: npm not found. Install Node.js >= 18 and re-run." >&2
+ exit 1
+fi
+
+echo " installing npm dependencies in electron/"
+( cd electron && npm install )
+
+# ---------------------------------------------------------------------------
+echo ""
+echo "Done. To run the CV worker:"
+echo " cd python && source .venv/bin/activate && python cv/detection_loop.py"
+echo "To run the Electron app:"
+echo " cd electron && npm run dev"