diff --git a/README.md b/README.md index 7221e90..eed2cdf 100644 --- a/README.md +++ b/README.md @@ -1,410 +1,124 @@ -# boss-cli +# boss-cli (recruiter fork) [![PyPI version](https://img.shields.io/pypi/v/kabi-boss-cli.svg)](https://pypi.org/project/kabi-boss-cli/) -[![CI](https://github.com/jackwener/boss-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/jackwener/boss-cli/actions/workflows/ci.yml) [![Python](https://img.shields.io/badge/python-%3E%3D3.10-blue.svg)](https://pypi.org/project/kabi-boss-cli/) -A CLI for BOSS 直聘 — search jobs, view recommendations, manage applications, chat with recruiters, **and manage candidates as a recruiter** via reverse-engineered API 🤝 +CLI for BOSS 直聘 **招聘方 (recruiter)** workflows — manage candidates, sync resumes to a local cache, and chat with applicants from the terminal. -[English](#features) | [中文](#功能特性) +> This fork is scoped to recruiter mode. Job-seeker commands still exist in the binary but are not documented here. -## More Tools - -- [xiaohongshu-cli](https://github.com/jackwener/xiaohongshu-cli) — Xiaohongshu CLI for notes, search, and interactions -- [bilibili-cli](https://github.com/jackwener/bilibili-cli) — Bilibili CLI for videos, users, and search -- [twitter-cli](https://github.com/jackwener/twitter-cli) — Twitter/X CLI for timelines, bookmarks, and posting -- [discord-cli](https://github.com/jackwener/discord-cli) — Discord CLI for local-first sync, search, and export -- [tg-cli](https://github.com/jackwener/tg-cli) — Telegram CLI for local-first sync, search, and export -- [rdt-cli](https://github.com/jackwener/rdt-cli) — Reddit CLI for feed, search, posts, and interactions - -## Features - -- 🔐 **Auth** — auto-extract browser cookies (10+ browsers), QR code login, `--cookie-source` explicit browser selection, live validation against real search APIs -- 🔍 **Search** — jobs by keyword with city/salary/experience/degree/industry/scale/stage/job-type filters -- ⭐ **Recommendations** — personalized job recommendations based on profile -- 📋 **Detail & Export** — view full job details, short-index navigation (`boss show 3`), CSV/JSON export -- 📜 **History** — browse job viewing history -- 👤 **Profile** — view personal info, resume status -- 📮 **Applications** — view applied jobs list -- 📋 **Interviews** — view interview invitations -- 💬 **Chat** — view communicated boss list -- 🤝 **Greet** — send greetings to recruiters, single or batch (with 1.5s rate-limit delay) -- 🏙️ **Cities** — 40+ supported cities -- 🤖 **Agent-friendly** — structured output envelope (`{ok, schema_version, data}`), Rich output on stderr -- 👔 **Recruiter Mode** — view posted jobs, manage candidates, chat history, export candidate data (CSV/JSON) - -## Installation +## Install ```bash -# Recommended: uv tool (fast, isolated) uv tool install kabi-boss-cli - -# Or: pipx -pipx install kabi-boss-cli - -# Optional: YAML output support -pip install kabi-boss-cli[yaml] -``` - -Upgrade to the latest version: - -```bash -uv tool upgrade kabi-boss-cli -# Or: pipx upgrade kabi-boss-cli +# or: pipx install kabi-boss-cli ``` From source: ```bash -git clone git@github.com:jackwener/boss-cli.git -cd boss-cli -uv sync +git clone git@github.com:qianjunye/boss-cli.git +cd boss-cli && uv sync ``` -## Usage +## Auth ```bash -# ─── Auth ───────────────────────────────────────── -boss login # Auto-detect browser cookies, fallback to QR -boss login --cookie-source chrome # Extract from specific browser -boss login --qrcode # QR code login only -boss status # Check login status (validates real search session, shows cookie names) -boss logout # Clear saved cookies - -# ─── Search ─────────────────────────────────────── -boss search "golang" # Search jobs -boss search "Python" --city 杭州 # Filter by city -boss search "Java" --salary 20-30K # Filter by salary -boss search "前端" --exp 3-5年 # Filter by experience -boss search "AI" --degree 硕士 # Filter by degree -boss search "后端" --industry 互联网 # Filter by industry -boss search "产品" --scale 1000-9999人 # Filter by company size -boss search "数据" --stage 已上市 # Filter by funding stage -boss search "运维" --job-type 全职 # Filter by job type -boss search "后端" --city 深圳 -p 2 # Pagination - -# ─── Detail & Export ────────────────────────────── -boss show 3 # View job #3 from last search -boss detail # View full job details -boss detail --json # JSON output (with schema envelope) -boss export "Python" -n 50 -o jobs.csv # Export search results to CSV -boss export "golang" --format json -o jobs.json # Export as JSON - -# ─── Recommendations ────────────────────────────── -boss recommend # View recommended jobs -boss recommend -p 2 --json # Next page, JSON output - -# ─── Personal Center ───────────────────────────── -boss me # View profile -boss me --json # JSON output -boss applied # View applied jobs -boss interviews # View interview invitations -boss history # View browsing history -boss chat # View communicated bosses - -# ─── Greet ──────────────────────────────────────── -boss greet # Send greeting to a boss -boss greet --json # JSON result -boss batch-greet "golang" --city 杭州 -n 5 # Batch greet top 5 -boss batch-greet "Python" --salary 20-30K --dry-run # Preview only - -# ─── Utilities ──────────────────────────────────── -boss cities # List supported cities -boss --version # Show version -boss -v search "Python" # Verbose logging (request timing) +boss login # auto-extract cookies from local browser +boss login --cookie-source chrome +boss login --cdp # cookies from running Chrome (no QR needed) — recommended +boss login --qrcode # QR scan fallback +boss status +boss logout ``` -## Recruiter Mode (雇主端) +### `boss login --cdp` (recommended) -If you are an employer on BOSS直聘, these commands let you manage candidates from the terminal: - -```bash -# ─── Search & Discover (搜索 & 发现) ───────────── -boss recruiter search "golang" --city 深圳 --exp 3-5年 # Search candidates -boss recruiter recommend # Recommended candidates -boss recruiter recommend --job # Switch to different 岗位 -boss recruiter recommend -p 2 # Next page - -# ─── Greet & Communicate (沟通) ────────────────── -boss recruiter greet # Initiate chat with candidate -boss recruiter batch-view "Python" --city 杭州 -n 10 # Batch view top 10 (triggers "viewed" notice) -boss recruiter inbox # View candidate messages -boss recruiter inbox --job -p 2 # Filter by job, page 2 -boss recruiter reply "感谢您的关注..." # Reply to candidate -boss recruiter chat # View chat history - -# ─── Chat Actions (沟通页操作) ─────────────────── -boss recruiter request-resume --yes # 求简历 -boss recruiter exchange-phone --yes # 换电话 -boss recruiter exchange-wechat --yes # 换微信 -boss recruiter invite-interview --job # 约面试 -boss recruiter mark-unsuitable --job # 不合适 - -# ─── Resume (简历) ─────────────────────────────── -boss recruiter resume # View full resume in terminal -boss recruiter resume-download --job # Download resume as Markdown -boss recruiter geek --job-id 526908510 # Quick candidate info - -# ─── Job Management (职位管理) ─────────────────── -boss recruiter jobs # List your posted jobs -boss recruiter job-close --yes # Take job offline -boss recruiter job-reopen --yes # Bring job back online - -# ─── Export & Tags ─────────────────────────────── -boss recruiter labels # View candidate tags -boss recruiter export -o candidates.csv # Export to CSV -boss recruiter export --format json -o out.json # Export to JSON -``` +If you've already logged into zhipin.com in a Chrome started with `--remote-debugging-port=9222`, this command reads cookies directly from that browser — **no QR scan**. -### Recruiter Workflow Example +1. Launch Chrome with remote debugging: -```bash -# 1. Check your posted jobs -boss recruiter jobs + ```bash + # macOS + /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 --user-data-dir=/tmp/boss-chrome + # Linux + google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/boss-chrome + ``` -# 2. Browse recommended candidates for a specific job -boss recruiter recommend --job f806096ea327cd610nZ80t21FVNQ +2. Log into `https://www.zhipin.com` in that Chrome. +3. `pip install websocket-client` (one-time). +4. `boss login --cdp` (or `--cdp --cdp-port 9333` for a custom port). -# 3. Search for specific skills -boss recruiter search "golang" --city 深圳 +Required cookies: `wt2` / `wbg` / `zp_at`. `__zp_stoken__` is treated as optional — it's JS-generated and often unobtainable; recruiter APIs (recommend / inbox / chat / resume-sync) work without it. A few endpoints (search, some chat actions) may return `环境异常` when stoken is missing. -# 4. View a candidate's full resume -boss recruiter resume --job +## Recruiter commands -# 5. Download resume for offline review -boss recruiter resume-download --job +```bash +# Jobs +boss recruiter jobs # list posted jobs +boss recruiter job-close --yes +boss recruiter job-reopen --yes -# 6. Start a conversation +# Candidate discovery +boss recruiter recommend --job +boss recruiter search "golang" --city 深圳 --exp 3-5年 +boss recruiter geek --job-id +boss recruiter resume +boss recruiter resume-download --job + +# Local resume cache (incremental sync) ⭐ +export BOSS_CACHE_DIR=./candidates +boss recruiter resume-sync # sync all online jobs +boss recruiter resume-sync # sync one job +boss recruiter resume-sync --force # full re-fetch +boss recruiter resume-sync --dry-run + +# Communication (requires __zp_stoken__) +boss recruiter inbox --job +boss recruiter chat +boss recruiter reply "感谢您的关注..." boss recruiter greet - -# 7. Check inbox and reply -boss recruiter inbox -p 1 -boss recruiter reply "感谢您的关注,方便电话聊聊吗?" - -# 8. Export all candidates -boss recruiter export --format json -o candidates.json -``` - -## Structured Output - -All commands with `--json` / `--yaml` use a unified output envelope (see [SCHEMA.md](./SCHEMA.md)): - -```json -{ - "ok": true, - "schema_version": "1", - "data": { ... } -} -``` - -- **Non-TTY stdout** → auto YAML (agent-friendly) -- **`--json`** → explicit JSON -- **Rich output** → stderr (won't pollute pipes: `boss search X --json | jq .data`) - -## Authentication - -boss-cli supports multiple authentication methods: - -1. **Saved cookies** — loads from `~/.config/boss-cli/credential.json` -2. **Browser cookies** — auto-detects installed browsers (Chrome, Firefox, Edge, Brave, Arc, Chromium, Opera, Vivaldi, Safari, LibreWolf) -3. **QR code login** — terminal QR output using Unicode half-blocks, scan with Boss 直聘 APP - -`boss login` auto-extracts browser cookies first, falls back to QR login. Use `--cookie-source chrome` to specify a browser, or `--qrcode` to skip browser detection. The command now verifies the saved credential against a real authenticated API before reporting success. - -`boss recommend` follows the live web app's current recommendation data source and request context, which improves compatibility when the legacy recommendation endpoint is rejected. - -`boss status --json` now reports per-flow health such as `search_authenticated` and `recommend_authenticated`, which helps diagnose partial-session issues. To avoid turning repeated checks into their own anti-bot problem, health snapshots are cached briefly in-memory. - -### Cookie TTL & Auto-Refresh - -Saved cookies auto-refresh from browser after **7 days**. If browser refresh fails, falls back to stale cookies and logs a warning. - -## Rate Limiting & Anti-Detection - -- **Gaussian jitter**: request delays with `random.gauss(0.3, 0.15)` -- **Random long pauses**: 5% chance of 2-5s pause to mimic reading -- **Rate-limit auto-cooldown**: code=9 triggers exponential backoff (10s→20s→40s→60s) + request delay doubling -- **Exponential backoff**: auto-retry on HTTP 429/5xx (max 3 retries) -- **Response cookie merge**: `Set-Cookie` headers merged back into session -- **HTML redirect detection**: catches auth redirects to login page -- **Browser fingerprint**: macOS Chrome 145 UA, `sec-ch-ua`, `DNT`, `Priority` headers -- **Request logging**: `boss -v` shows request URLs, status codes, and timing - -## Use as AI Agent Skill - -boss-cli ships with a [`SKILL.md`](./SKILL.md) that teaches AI agents how to use it. - -### [Skills CLI](https://github.com/vercel-labs/skills) (Recommended) - -```bash -npx skills add jackwener/boss-cli +boss recruiter request-resume --yes +boss recruiter exchange-phone --yes +boss recruiter exchange-wechat --yes +boss recruiter invite-interview --job +boss recruiter mark-unsuitable --job + +# Export +boss recruiter export -o candidates.csv +boss recruiter export --format json -o out.json ``` -| Flag | Description | -| --- | --- | -| `-g` | Install globally (user-level, shared across projects) | -| `-a claude-code` | Target a specific agent | -| `-y` | Non-interactive mode | - -### Manual Install +### Resume cache layout -```bash -mkdir -p .agents/skills -git clone git@github.com:jackwener/boss-cli.git .agents/skills/boss-cli -``` - -### ~~OpenClaw / ClawHub~~ (Deprecated) - -> ⚠️ ClawHub install method is deprecated and no longer supported. Use [Skills CLI](#skills-cli-recommended) or Manual Install above. - -## Project Structure - -```text -boss_cli/ -├── __init__.py # Package version -├── cli.py # Click entry point (lightweight, add_command only) -├── client.py # API client (rate-limit, cooldown, retry, anti-detection) -├── auth.py # Authentication (10+ browsers, QR login, TTL refresh) -├── constants.py # URLs, headers (Chrome 145), city codes, filter enums -├── exceptions.py # Structured exceptions (BossApiError hierarchy) -├── index_cache.py # Short-index cache for `boss show` -└── commands/ - ├── _common.py # SCHEMA envelope, handle_command, stderr console - ├── auth.py # login (--cookie-source/--qrcode), logout, status, me - ├── search.py # search, recommend, detail, show, export, history, cities - ├── personal.py # applied, interviews - ├── social.py # chat, greet (--json), batch-greet (1.5s delay) - └── recruiter.py # recruiter-jobs, inbox, geek, chat, labels, export ``` - -## Development - -```bash -# Install dependencies -uv sync - -# Run tests -uv run pytest tests/ -v - -# Smoke tests (need cookies) -uv run pytest tests/ -v -m smoke - -# Lint -uv run ruff check . +$BOSS_CACHE_DIR/{encrypt_job_id}/ + _meta.json # job info, last_sync_at, candidates list + {encrypt_uid}.md # one Markdown resume per candidate ``` -## Troubleshooting - -**Q: `boss status` says not authenticated but local cookies still exist** - -`boss status` now verifies the session against a real search API. If `authenticated=false`, your local credential file exists but the underlying web session is no longer usable. - -**Q: `环境异常 (__zp_stoken__ 已过期)`** - -Your session cookies have expired. Run `boss logout && boss login` to refresh. If QR login only returns a partial cookie set, log in from a browser first and then run `boss login`. - -**Q: `暂无投递记录` but I have applied** - -Some features require fresh `__zp_stoken__`. Try re-logging in from a browser, then `boss login`. +`resume-sync` is incremental: only new uids are fetched; vanished candidates are marked `archived` (files preserved). 300-person platform cap on the recommend list — periodic sync is how you accumulate a longer history. -**Q: Search returns no results** +## Output -Check your city filter. Some keywords are city-specific. Use `boss cities` to see available cities. +Structured envelope `{ ok, schema_version, data }` for `--json` / `--yaml`. Rich output goes to stderr so pipes stay clean: `boss recruiter jobs --json | jq .data`. ---- - -## 功能特性 - -- 🔐 **认证** — 自动提取浏览器 Cookie(10+ 浏览器),二维码扫码登录,`--cookie-source` 指定浏览器 -- 🔍 **搜索** — 按关键词搜索职位,支持城市/薪资/经验/学历/行业/规模/融资阶段/职位类型筛选 -- ⭐ **推荐** — 基于求职期望的个性化推荐 -- 📋 **详情 & 导出** — 职位详情,编号导航 (`boss show 3`),CSV/JSON 导出 -- 📜 **历史** — 查看浏览历史 -- 👤 **个人** — 查看个人资料 -- 📮 **投递** — 查看已投递职位列表 -- 📋 **面试** — 查看面试邀请 -- 💬 **沟通** — 查看沟通过的 Boss 列表 -- 🤝 **打招呼** — 向 Boss 打招呼/投递,支持批量操作(内置 1.5s 防风控延迟) -- 🏙️ **城市** — 40+ 城市支持 -- 🤖 **Agent 友好** — 结构化输出 envelope,Rich 输出走 stderr -- 👔 **招聘方模式** — 查看职位、候选人管理、聊天记录、导出候选人数据 (CSV/JSON) - -## 使用示例 +## Use as AI agent skill ```bash -# 认证 -boss login # 自动提取浏览器 Cookie,失败则二维码 -boss login --cookie-source chrome # 指定浏览器 -boss status # 检查登录状态 -boss logout # 清除 Cookie - -# 搜索 & 详情 -boss search "golang" --city 杭州 # 按城市搜索 -boss search "AI" --industry 互联网 --scale 1000-9999人 # 行业+规模 -boss search "数据" --stage 已上市 --salary 30-50K # 融资+薪资 -boss show 3 # 按编号查看详情 -boss detail --json # 指定 ID 查看(JSON envelope) -boss export "Python" -n 50 -o jobs.csv # 导出 CSV - -# 推荐 & 历史 -boss recommend # 个性化推荐 -boss history # 浏览历史 - -# 个人中心 -boss me --json # 个人资料(JSON) -boss applied # 已投递 -boss interviews # 面试邀请 -boss chat # 沟通列表 - -# 打招呼 -boss greet --json # 单个打招呼 -boss batch-greet "golang" -n 10 # 批量打招呼 -boss batch-greet "golang" --dry-run # 预览 - -# 工具 -boss cities # 城市列表 -boss -v search "Python" # 详细日志 +npx skills add qianjunye/boss-cli +# or manually: +git clone git@github.com:qianjunye/boss-cli.git .agents/skills/boss-cli ``` -## 招聘方模式 - -```bash -# 搜索 & 推荐 -boss recruiter search "golang" --city 深圳 --exp 3-5年 -boss recruiter recommend --job # 按岗位查看推荐牛人 -boss recruiter recommend -p 2 # 翻页 - -# 沟通 -boss recruiter greet # 向候选人打招呼 -boss recruiter batch-view "Python" -n 10 # 批量查看 (触发被查看通知) -boss recruiter inbox -p 1 # 查看候选人消息 -boss recruiter reply "您好..." # 回复候选人 - -# 沟通页操作 -boss recruiter request-resume # 求简历 -boss recruiter exchange-phone # 换电话 -boss recruiter exchange-wechat # 换微信 -boss recruiter invite-interview --job # 约面试 -boss recruiter mark-unsuitable --job # 不合适 - -# 简历 -boss recruiter resume # 终端查看简历 -boss recruiter resume-download --job # 下载简历为 Markdown - -# 职位管理 -boss recruiter jobs # 查看招聘职位 -boss recruiter job-close # 关闭职位 -boss recruiter job-reopen # 重新开启 - -# 导出 -boss recruiter labels # 查看标签 -boss recruiter export -o candidates.csv # 导出候选人 -``` +See [`SKILL.md`](./SKILL.md) for the agent contract. -## 常见问题 +## Troubleshooting -- `环境异常` — Cookie 过期,执行 `boss logout && boss login` 刷新 -- 搜索无结果 — 检查城市筛选或关键词,使用 `boss cities` 查看支持的城市 +- `环境异常 (__zp_stoken__ 已过期)` → `boss logout && boss login` (use the CDP flow above) +- `code=9` rate-limit → built-in cooldown auto-retries; just wait +- CDP path skipped silently → install `websocket-client`, ensure Chrome is on port 9222 and logged in ## License diff --git a/SKILL.md b/SKILL.md index ceff297..75791a4 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,20 +1,21 @@ --- name: boss-cli -description: Use boss-cli for ALL BOSS 直聘 operations — searching jobs, viewing recommendations, managing applications, chatting with recruiters, and batch greeting. Invoke whenever the user requests any job search or recruitment platform interaction on BOSS 直聘. +description: Use boss-cli for BOSS 直聘 recruiter/employer operations — managing posted jobs, discovering candidates, syncing resumes to a local cache for AI analysis, and communicating with candidates. Invoke whenever the user requests any recruitment or candidate management on BOSS 直聘. author: jackwener -version: "0.3.0" +version: "0.3.6" tags: - boss - zhipin - boss直聘 - - job-search - recruitment + - recruiter - cli --- -# boss-cli — BOSS 直聘 CLI Tool +# boss-cli — BOSS 直聘 招聘者 CLI **Binary:** `boss` +**Scope of this skill:** recruiter (雇主端) commands only. Job-seeker commands exist but are not covered here. **Credentials:** browser cookies (auto-extracted from 10+ browsers) or QR code login (`--qrcode`) ## Setup @@ -24,14 +25,13 @@ tags: uv tool install kabi-boss-cli # Or: pipx install kabi-boss-cli -# Upgrade to latest (recommended) +# Upgrade to latest uv tool upgrade kabi-boss-cli -# Or: pipx upgrade kabi-boss-cli ``` ## Authentication -**IMPORTANT FOR AGENTS**: Before executing ANY boss command, check if credentials exist first. Do NOT assume cookies are configured. +**IMPORTANT FOR AGENTS**: Before executing ANY boss command, check if credentials exist first. ### Step 0: Check if already authenticated @@ -39,12 +39,12 @@ uv tool upgrade kabi-boss-cli boss status --json 2>/dev/null | jq -r '.authenticated' | grep -q true && echo "AUTH_OK" || echo "AUTH_NEEDED" ``` -If `AUTH_OK`, skip to [Command Reference](#command-reference). +If `AUTH_OK`, skip to [Recruiter Commands](#recruiter-commands). If `AUTH_NEEDED`, proceed to Step 1. ### Step 1: Guide user to authenticate -Ensure user is logged into zhipin.com in any supported browser (Chrome, Firefox, Edge, Brave, Arc, Chromium, Opera, Vivaldi, Safari, LibreWolf). Then: +Ensure user is logged into zhipin.com (recruiter account) in any supported browser. Then: ```bash boss login # auto-detect browser with valid cookies @@ -56,9 +56,38 @@ Verify with: ```bash boss status -boss me --json | jq '.data.name' ``` +### `boss login --cdp` (recommended for recruiter mode) + +If the user is already logged into zhipin.com in a Chrome they started with `--remote-debugging-port=9222`, **no QR scan is needed** — `--cdp` reads cookies directly from that browser session. + +1. Launch Chrome with remote debugging: + + ```bash + # macOS + /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 \ + --user-data-dir=/tmp/boss-chrome + # Linux + google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/boss-chrome + ``` + +2. In that Chrome, log into `https://www.zhipin.com` (recruiter account). +3. `pip install websocket-client` (one-time, optional dependency). +4. Run: + + ```bash + boss login --cdp # default port 9222 + boss login --cdp --cdp-port 9333 # custom port + ``` + +The required cookies are `wt2`, `wbg`, `zp_at`. **`__zp_stoken__` is no longer required** — it's JS-generated and frequently unobtainable; recruiter APIs like recommend / inbox / chat work without it. Search and some communication endpoints may return `环境异常` when stoken is missing. + +### `boss login --qrcode` (fallback) + +QR scan via the Boss app. Cannot obtain `__zp_stoken__` either, so the result is equivalent to `--cdp` minus stoken. Use when CDP is not available. + ### Step 2: Handle common auth issues | Symptom | Agent action | @@ -70,110 +99,108 @@ boss me --json | jq '.data.name' ## Agent Defaults -All machine-readable output uses the envelope documented in [SCHEMA.md](./SCHEMA.md). -Payloads live under `.data`. +All machine-readable output uses the envelope documented in [SCHEMA.md](./SCHEMA.md). Payloads live under `.data`. - Non-TTY stdout → auto YAML - `--json` / `--yaml` → explicit format -- Rich output → **stderr** (safe for pipes: `boss search X --json | jq .data`) +- Rich output → **stderr** (safe for pipes: `boss recruiter jobs --json | jq .data`) -## Command Reference +## Recruiter Commands -### Search & Browse +All recruiter commands live under `boss recruiter `. -| Command | Description | Example | -|---------|-------------|---------| -| `boss search ` | Search jobs with filters | `boss search "golang" --city 杭州 --salary 20-30K` | -| `boss show ` | View job #N from last search | `boss show 3` | -| `boss detail ` | View full job details | `boss detail abc123 --json` | -| `boss export ` | Export search results to CSV/JSON | `boss export "Python" -n 50 -o jobs.csv` | -| `boss recommend` | Personalized recommendations | `boss recommend -p 2 --json` | -| `boss history` | View browsing history | `boss history --json` | -| `boss cities` | List supported cities | `boss cities` | +### Candidate Cache Sync (本地缓存同步) ⭐ -### Personal Center +The most important recruiter workflow for AI analysis. Syncs candidate resumes to local Markdown files so they can be read and analyzed without real-time API calls. -| Command | Description | Example | -|---------|-------------|---------| -| `boss me` | View profile (name, age, degree) | `boss me --json` | -| `boss applied` | View applied jobs | `boss applied -p 1 --json` | -| `boss interviews` | View interview invitations | `boss interviews --json` | -| `boss chat` | View communicated bosses | `boss chat --json` | +```bash +# Sync all online jobs (incremental — skips already-cached candidates) +boss recruiter resume-sync -### Actions +# Sync a specific job only +boss recruiter resume-sync -| Command | Description | Example | -|---------|-------------|---------| -| `boss greet ` | Greet a boss / apply | `boss greet abc123 --json` | -| `boss batch-greet ` | Batch greet from search | `boss batch-greet "Python" --city 杭州 -n 5` | -| `boss batch-greet --dry-run` | Preview without sending | `boss batch-greet "golang" --dry-run` | +# Specify output directory +boss recruiter resume-sync --output-dir /path/to/workspace/candidates -### Account +# Force full re-fetch +boss recruiter resume-sync --force -| Command | Description | -|---------|-------------| -| `boss login` | Extract cookies from browser (auto-detect, fallback QR) | -| `boss login --cookie-source ` | Extract from specific browser | -| `boss login --qrcode` | QR code login only (terminal QR output) | -| `boss status` | Check authentication status (shows cookie names) | -| `boss logout` | Clear saved credentials | +# Preview without writing files +boss recruiter resume-sync --dry-run -## Search Filter Options +# Set default cache dir via env var +export BOSS_CACHE_DIR=/path/to/workspace/candidates +boss recruiter resume-sync +``` -| Filter | Flag | Values | -|--------|------|--------| -| City | `--city` | 北京, 上海, 杭州, 深圳, etc. (use `boss cities` for full list) | -| Salary | `--salary` | 3K以下, 3-5K, 5-10K, 10-15K, 15-20K, 20-30K, 30-50K, 50K以上 | -| Experience | `--exp` | 不限, 在校/应届, 1年以内, 1-3年, 3-5年, 5-10年, 10年以上 | -| Degree | `--degree` | 不限, 大专, 本科, 硕士, 博士 | -| Industry | `--industry` | 互联网, 电子商务, 游戏, 人工智能, 金融, 教育培训, 医疗健康, etc. | -| Company Scale | `--scale` | 0-20人, 20-99人, 100-499人, 500-999人, 1000-9999人, 10000人以上 | -| Funding Stage | `--stage` | 未融资, 天使轮, A轮, B轮, C轮, D轮及以上, 已上市, 不需要融资 | -| Job Type | `--job-type` | 全职, 兼职, 实习 | +**Cache directory structure:** +``` +$BOSS_CACHE_DIR/ + /{encrypt_job_id}/ + _meta.json # Job info + last sync time + candidate uid list + /{encrypt_uid}.md # Candidate resume in Markdown format +``` + +**_meta.json fields:** `job_name`, `encrypt_job_id`, `salary_desc`, `last_sync_at`, `total_candidates`, `new_this_sync`, `archived_candidates`, `candidates` -## Agent Workflow Examples +**Incremental logic:** Only fetches candidates whose `encrypt_uid` is not already present in `_meta.json`. Candidates who disappear from the recommend list are marked `archived` (files kept). -### Search → Batch Greet pipeline +**Performance:** ~1s per candidate due to built-in rate-limit delay. Initial full sync of 200 candidates ≈ 4 minutes; incremental updates (few new candidates) ≈ 10-30 seconds. + +**To analyze cached candidates:** Read `.md` files directly from `$BOSS_CACHE_DIR/{encrypt_job_id}/`. Use `_meta.json` to know which candidates exist and when data was last updated. + +### Job Management ```bash -# Preview first -boss batch-greet "golang" --city 杭州 --salary 20-30K --dry-run -# Then execute -boss batch-greet "golang" --city 杭州 --salary 20-30K -n 10 -y +boss recruiter jobs # List posted jobs (encryptJobId needed for sync) +boss recruiter jobs --json # JSON output ``` -### Search → Detail pipeline (structured) +### Candidate Discovery ```bash -# Search and extract securityId -SEC_ID=$(boss search "golang" --city 杭州 --json | jq -r '.data.jobList[0].securityId') -# Get full detail -boss detail "$SEC_ID" --json | jq '.data.jobInfo | {jobName, salaryDesc, skills}' +boss recruiter recommend --job # Candidates who greeted this job (platform-sorted) +boss recruiter search "政府事务" --city 上海 # Active search for candidates +boss recruiter geek --job-id # View one candidate's detail +boss recruiter resume # View full resume in terminal +boss recruiter resume-download --job # Download resume as Markdown ``` -### Daily job check workflow +### Communication (requires __zp_stoken__) ```bash -boss recommend --json | jq '.data.jobList | length' # Check recommendations count -boss search "Python" --city 杭州 --json # Search specific jobs -boss show 1 # View top result details -boss applied --json # Check application status -boss interviews --json # Check interview invitations -boss chat --json # Check messages -boss history --json # Review browsing history +boss recruiter inbox --job # Candidates who messaged you +boss recruiter reply "消息内容" # Reply to candidate +boss recruiter chat # View chat history +boss recruiter greet # Initiate chat with candidate +boss recruiter request-resume --yes # Request resume from candidate +boss recruiter exchange-phone --yes # Exchange phone number +boss recruiter invite-interview --job # Invite for interview +boss recruiter mark-unsuitable --job # Mark as unsuitable ``` -### Export pipeline +### Export ```bash -boss export "golang" --city 杭州 --salary 20-30K -n 50 -o jobs.csv -boss export "Python" -n 100 --format json -o jobs.json +boss recruiter export -o candidates.csv # Export candidate list to CSV +boss recruiter export --format json -o out.json # Export as JSON ``` -### Profile check +## Recruiter Agent Workflow ```bash -boss me --json | jq '.data | {name, age, degreeCategory}' +# Step 1: Get job list and encryptJobIds +boss recruiter jobs --json | jq '.data[] | select(.jobOnlineStatus==1) | {jobName, encryptJobId}' + +# Step 2: Sync candidates to local cache +export BOSS_CACHE_DIR=./candidates +boss recruiter resume-sync + +# Step 3: Analyze from local files (no API needed) +ls ./candidates/{encrypt_job_id}/ # List candidate files +cat ./candidates/{encrypt_job_id}/_meta.json # Check sync status +cat ./candidates/{encrypt_job_id}/{uid}.md # Read one resume ``` ## Error Codes @@ -188,18 +215,16 @@ Structured error codes returned in the `error.code` field (see [SCHEMA.md](./SCH ## Limitations -- **No message sending** — cannot send chat messages (MQTT/Protobuf required) -- **No resume editing** — cannot edit resume from CLI -- **No company search** — company pages return HTML (need __zp_stoken__) +- **No message sending via MQTT** — only HTTP-based reply/greet - **Single account** — one set of cookies at a time -- **Rate limited** — batch-greet has built-in 1.5s delay between greetings +- **Rate limited** — built-in delays between requests +- **Communication commands need __zp_stoken__** — obtained via browser cookie extraction or CDP hydration, not pure QR login ## Anti-Detection Notes for Agents - **Do NOT parallelize requests** — built-in Gaussian jitter delays exist for account safety -- **Rate-limit auto-recovery**: if code=9 occurs, client auto-cools-down with increasing delays (10s→20s→40s→60s) and retries once -- **Use `-v` flag for debugging**: `boss -v search "Python"` shows request timing -- **Batch greet limit**: recommend ≤ 10 greetings per session to avoid detection +- **Rate-limit auto-recovery**: if code=9 occurs, client auto-cools-down (10s→20s→40s→60s) and retries once +- **Use `-v` flag for debugging**: `boss -v recruiter jobs` shows request timing - **Cookies auto-refresh**: if ≥ 7 days old, boss-cli auto-tries browser extraction - **Re-login if `__zp_stoken__` expires**: run `boss logout && boss login` @@ -207,6 +232,22 @@ Structured error codes returned in the `error.code` field (see [SCHEMA.md](./SCH - Do not ask users to share raw cookie values in chat logs. - Prefer local browser cookie extraction over manual secret copy/paste. -- If auth fails, ask the user to re-login via `boss login`. -- Agent should treat cookie values as secrets (do not echo to stdout). +- Treat cookie values as secrets (do not echo to stdout). - Built-in rate-limit delay protects accounts; do not bypass it. + +## 候选人缓存策略说明(Agent 必读) + +### 300人上限问题 +BOSS直聘推荐列表每次最多返回 **300 人**,翻页返回相同数据(无效翻页)。 +这是平台硬限制,无法突破。 + +### 正确的增量同步策略 +- 推荐列表会**动态变化**:新候选人投递后会出现,旧的会消失 +- `resume-sync` 的增量逻辑:将新出现的 uid 与本地 `_meta.json` 中的 `candidates` 列表对比 +- 消失的候选人标记为 `archived`,简历文件**保留不删除** +- 定期同步可以积累超过300人的历史候选人库 + +### 建议同步频率 +- 热门岗位(候选人多):每天同步 1 次 +- 一般岗位:每 2-3 天同步 1 次 +- 使用 `--force` 强制覆盖时,会重新拉取当前推荐列表中的所有人 diff --git a/boss_cli/browser_login.py b/boss_cli/browser_login.py index 0fb2e37..f46829e 100644 --- a/boss_cli/browser_login.py +++ b/boss_cli/browser_login.py @@ -1,25 +1,43 @@ -"""Browser-assisted login enhancement via Camoufox. +"""Browser-assisted login enhancement via Camoufox or CDP. Hybrid approach: 1. Complete the QR login flow via HTTP (httpx) to obtain session cookies (wt2, wbg, zp_at). -2. Inject those cookies into a Camoufox browser and navigate to the site - so that client-side JavaScript generates ``__zp_stoken__``. -3. Export all cookies from the browser context. +2. If ``__zp_stoken__`` is missing, first try to obtain it via Chrome + DevTools Protocol (CDP) from a running real Chrome instance. A real + browser session bypasses Boss Zhipin's anti-bot fingerprinting more + reliably than a headless browser. +3. If CDP is unavailable, fall back to injecting cookies into a Camoufox + browser and navigating to the site so that client-side JavaScript + generates ``__zp_stoken__``. +4. Export all cookies from whichever method succeeded. This gives us the complete cookie set that pure HTTP cannot achieve. NOTE: Boss Zhipin uses aggressive anti-bot detection that may prevent ``__zp_stoken__`` generation even in Camoufox. The QR login still works without it for most APIs (recommend, chat, applied, etc.). + +CDP usage +--------- +Launch Chrome with the remote-debugging port enabled before running +``boss login --qrcode``:: + + chrome --remote-debugging-port=9222 --user-data-dir=/tmp/boss-chrome + +The CDP path requires the ``websocket-client`` package +(``pip install websocket-client``). It is tried first and silently +skipped when the package is absent or Chrome is not running. """ from __future__ import annotations import asyncio +import json import logging import subprocess import sys +import time from typing import Any from .auth import Credential, qr_login, save_credential @@ -77,6 +95,98 @@ def _normalize_browser_cookies(raw_cookies: list[dict[str, Any]]) -> dict[str, s return cookies +def _hydrate_stoken_via_cdp( + debug_port: int = 9222, + wait_seconds: float = 4.0, +) -> dict[str, str] | None: + """Try to obtain ``__zp_stoken__`` from a running Chrome instance via CDP. + + Boss Zhipin's anti-bot JS generates ``__zp_stoken__`` during a real + browser page load. By connecting to a Chrome instance that the user + already has open (via the Chrome DevTools Protocol), we can trigger + that JS in a genuine browser environment — defeating fingerprint + checks that block headless browsers like Camoufox. + + Prerequisites + ------------- + * Chrome must be running with ``--remote-debugging-port=9222``. + * ``websocket-client`` must be installed (``pip install websocket-client``). + + Parameters + ---------- + debug_port: + CDP port Chrome was started with (default: 9222). + wait_seconds: + How long to wait after navigation for JS to set the cookie. + + Returns + ------- + dict[str, str] | None + Flat dict of zhipin.com cookies (including ``__zp_stoken__``) on + success, or ``None`` when CDP is unavailable / the token was not + generated. + """ + try: + import websocket # type: ignore[import] + except ImportError: + logger.debug("CDP hydration skipped: websocket-client not installed") + return None + + try: + import urllib.request + with urllib.request.urlopen( + f"http://127.0.0.1:{debug_port}/json", timeout=3 + ) as resp: + tabs = json.loads(resp.read()) + except Exception as exc: + logger.debug("Chrome CDP not available on port %d: %s", debug_port, exc) + return None + + if not tabs: + logger.debug("CDP: no open tabs found") + return None + + ws_url = tabs[0].get("webSocketDebuggerUrl") + if not ws_url: + logger.debug("CDP: no webSocketDebuggerUrl in first tab") + return None + + try: + ws = websocket.create_connection(ws_url, timeout=10) + ws.send(json.dumps({ + "id": 1, + "method": "Page.navigate", + "params": {"url": f"{BASE_URL}/"}, + })) + ws.recv() # navigation ack + + time.sleep(wait_seconds) # let JS generate __zp_stoken__ + + ws.send(json.dumps({"id": 2, "method": "Network.getAllCookies"})) + result = json.loads(ws.recv()) + ws.close() + except Exception as exc: + logger.warning("CDP WebSocket error: %s", exc) + return None + + all_cookies = result.get("result", {}).get("cookies", []) + cookies: dict[str, str] = {} + for c in all_cookies: + domain = c.get("domain", "") + name = c.get("name") + value = c.get("value") + if not isinstance(name, str) or not isinstance(value, str): + continue + if any(domain.endswith(d) for d in BROWSER_EXPORT_DOMAINS): + cookies[name] = value + + if "__zp_stoken__" not in cookies: + logger.debug("CDP: connected but __zp_stoken__ not generated") + return None + + return cookies + + def _hydrate_stoken_via_browser(cookies: dict[str, str]) -> dict[str, str]: """Inject session cookies into a Camoufox browser and harvest __zp_stoken__. @@ -119,6 +229,100 @@ def _hydrate_stoken_via_browser(cookies: dict[str, str]) -> dict[str, str]: return result +def cdp_login(debug_port: int = 9222, wait_seconds: float = 4.0) -> Credential: + """Harvest zhipin.com cookies directly from a running Chrome via CDP. + + Assumes the user is already logged into zhipin.com in a Chrome started + with ``--remote-debugging-port=``. No QR scan needed. + Saves and returns the credential. Raises ``BrowserLoginUnavailable`` + if Chrome/CDP is not reachable or the user is not logged in. + """ + try: + import websocket # noqa: F401 + except ImportError as exc: + raise BrowserLoginUnavailable( + "websocket-client 未安装。安装: pip install websocket-client" + ) from exc + + cookies = _hydrate_stoken_via_cdp(debug_port=debug_port, wait_seconds=wait_seconds) + if cookies is None: + # _hydrate_stoken_via_cdp returns None if either CDP is unreachable + # OR __zp_stoken__ is missing. Try a second, simpler pass that just + # grabs whatever cookies exist (we no longer treat stoken as required). + cookies = _harvest_cookies_via_cdp(debug_port=debug_port, wait_seconds=wait_seconds) + + if not cookies: + raise BrowserLoginUnavailable( + f"未能通过 CDP 获取 cookies。请确认 Chrome 已以 " + f"--remote-debugging-port={debug_port} 启动,并已登录 zhipin.com" + ) + + cred = Credential(cookies=cookies) + if not cred.has_required_cookies: + missing = ", ".join(cred.missing_required_cookies) + raise BrowserLoginUnavailable( + f"CDP 已连接但缺少必要 cookies ({missing})。请在该 Chrome 中登录 zhipin.com 后重试" + ) + + save_credential(cred) + return cred + + +def _harvest_cookies_via_cdp( + debug_port: int = 9222, + wait_seconds: float = 4.0, +) -> dict[str, str] | None: + """Like _hydrate_stoken_via_cdp but returns whatever cookies exist + (does not require __zp_stoken__).""" + try: + import websocket # type: ignore[import] + except ImportError: + return None + + try: + import urllib.request + with urllib.request.urlopen( + f"http://127.0.0.1:{debug_port}/json", timeout=3 + ) as resp: + tabs = json.loads(resp.read()) + except Exception as exc: + logger.debug("Chrome CDP not available on port %d: %s", debug_port, exc) + return None + + if not tabs: + return None + ws_url = tabs[0].get("webSocketDebuggerUrl") + if not ws_url: + return None + + try: + ws = websocket.create_connection(ws_url, timeout=10) + ws.send(json.dumps({ + "id": 1, + "method": "Page.navigate", + "params": {"url": f"{BASE_URL}/"}, + })) + ws.recv() + time.sleep(wait_seconds) + ws.send(json.dumps({"id": 2, "method": "Network.getAllCookies"})) + result = json.loads(ws.recv()) + ws.close() + except Exception as exc: + logger.warning("CDP WebSocket error: %s", exc) + return None + + cookies: dict[str, str] = {} + for c in result.get("result", {}).get("cookies", []): + domain = c.get("domain", "") + name = c.get("name") + value = c.get("value") + if not isinstance(name, str) or not isinstance(value, str): + continue + if any(domain.endswith(d) for d in BROWSER_EXPORT_DOMAINS): + cookies[name] = value + return cookies or None + + def browser_qr_login( *, on_status: callable | None = None, @@ -140,10 +344,24 @@ def _emit(msg: str) -> None: # Step 1: Complete QR login via HTTP (reuse existing flow) cred = asyncio.run(qr_login()) - # Step 2: If __zp_stoken__ is missing, try to hydrate via browser + # Step 2: If __zp_stoken__ is missing, try CDP first, then Camoufox if "__zp_stoken__" not in cred.cookies: - _emit("\n🔧 正在通过浏览器补全 __zp_stoken__...") + _emit("\n🔧 正在补全 __zp_stoken__...") + + # --- Attempt 1: CDP (real Chrome, best anti-bot bypass) --- + cdp_result = _hydrate_stoken_via_cdp() + if cdp_result is not None: + merged = {**cred.cookies, **cdp_result} + cred = Credential(cookies=merged) + save_credential(cred) + _emit("✅ __zp_stoken__ 补全成功(CDP)!所有接口可正常使用") + return cred + + # CDP unavailable or Chrome not running — fall back to Camoufox + _emit(" (未检测到运行中的 Chrome,尝试 Camoufox 补全...)") + _emit(" 提示:以 --remote-debugging-port=9222 启动 Chrome 可提高成功率") + # --- Attempt 2: Camoufox headless browser --- try: enriched = _hydrate_stoken_via_browser(cred.cookies) except Exception as exc: @@ -155,9 +373,10 @@ def _emit(msg: str) -> None: merged = {**cred.cookies, **enriched} cred = Credential(cookies=merged) save_credential(cred) - _emit("✅ __zp_stoken__ 补全成功!所有接口可正常使用") + _emit("✅ __zp_stoken__ 补全成功(Camoufox)!所有接口可正常使用") else: _emit("⚠️ 浏览器未能生成 __zp_stoken__(Boss 直聘反爬检测)") _emit(" recommend/chat/applied 等接口仍可使用,search 可能受限") + _emit(" 如需完整功能,请以 --remote-debugging-port=9222 启动 Chrome 后重试") return cred diff --git a/boss_cli/commands/auth.py b/boss_cli/commands/auth.py index c5a4ef2..3fc3a88 100644 --- a/boss_cli/commands/auth.py +++ b/boss_cli/commands/auth.py @@ -20,28 +20,36 @@ @click.command() @click.option("--qrcode", is_flag=True, help="使用二维码扫码登录") +@click.option("--cdp", is_flag=True, help="从已登录的 Chrome(--remote-debugging-port=9222)直接抓取 cookie,无需扫码") +@click.option("--cdp-port", default=9222, type=int, show_default=True, help="CDP 端口") @click.option("--cookie-source", default=None, help="指定浏览器 (chrome/firefox/edge/brave/arc/safari等)") -def login(qrcode: bool, cookie_source: str | None) -> None: - """扫码登录 Boss 直聘 APP""" +def login(qrcode: bool, cdp: bool, cdp_port: int, cookie_source: str | None) -> None: + """登录 Boss 直聘""" from ..auth import clear_credential, verify_credential def _finalize_login(cred, *, from_qr: bool = False) -> None: - # QR login cannot obtain __zp_stoken__ (generated by JS). - # If only that cookie is missing, accept the credential with a warning - # instead of failing the full API verification. - if from_qr and not cred.has_required_cookies: - missing = cred.missing_required_cookies - if missing == ["__zp_stoken__"]: - console.print(f"[green]✅ 登录成功![/green] ({len(cred.cookies)} cookies)") - console.print( - "[yellow]⚠️ __zp_stoken__ 缺失(该 cookie 由浏览器 JS 生成,QR 登录无法获取)。\n" - " 部分接口可能返回「环境异常」,建议用浏览器登录后再执行 boss login 补全。[/yellow]" - ) - return + # __zp_stoken__ is JS-generated and treated as optional. If it's + # missing, warn but don't reject the login — wt2/wbg/zp_at unlock + # roughly half of the recruiter API surface. + stoken_missing = "__zp_stoken__" not in cred.cookies authenticated, message = verify_credential(cred, force_refresh=True) if authenticated: console.print(f"[green]✅ 登录成功![/green] ({len(cred.cookies)} cookies)") + if stoken_missing: + console.print( + "[yellow]⚠️ __zp_stoken__ 缺失(由站点 JS 生成,CDP/QR 均可能无法获取)。\n" + " recommend / chat / inbox 等接口可用;search / 通信类接口可能返回「环境异常」。[/yellow]" + ) + return + if stoken_missing: + console.print( + f"[green]✅ 已保存 {len(cred.cookies)} 个 cookies[/green]" + ) + console.print( + "[yellow]⚠️ 接口校验未通过,且 __zp_stoken__ 缺失。\n" + " 部分接口可用,建议在浏览器登录后再次执行 boss login 补全。[/yellow]" + ) return clear_credential() console.print("[red]❌ 登录失败:凭证未通过实际接口校验[/red]") @@ -51,10 +59,31 @@ def _finalize_login(cred, *, from_qr: bool = False) -> None: console.print( "\n[yellow]💡 提示:浏览器运行时 Cookie 可能未写入磁盘,建议:\n" " 1. 关闭浏览器后重试 boss login\n" - " 2. 或使用 boss login --qrcode 扫码登录[/yellow]" + " 2. 或使用 boss login --cdp(已登录 Chrome 直接抓取,无需扫码)\n" + " 3. 或使用 boss login --qrcode 扫码登录[/yellow]" ) raise SystemExit(1) + if cdp: + try: + from ..browser_login import cdp_login, BrowserLoginUnavailable + except ImportError as e: + console.print(f"[red]❌ {e}[/red]") + raise SystemExit(1) from None + try: + cred = cdp_login(debug_port=cdp_port) + except BrowserLoginUnavailable as e: + console.print(f"[red]❌ CDP 登录失败: {e}[/red]") + console.print( + "[yellow]💡 启动方式(macOS):\n" + " /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome \\\n" + " --remote-debugging-port=9222 --user-data-dir=/tmp/boss-chrome\n" + " 然后在该 Chrome 中登录 zhipin.com,再运行 boss login --cdp[/yellow]" + ) + raise SystemExit(1) from None + _finalize_login(cred, from_qr=True) + return + if qrcode: # Prefer browser-assisted login (captures __zp_stoken__ via JS) # Fallback to HTTP-only QR flow when camoufox is unavailable diff --git a/boss_cli/commands/recruiter.py b/boss_cli/commands/recruiter.py index 09ca7dc..f771600 100644 --- a/boss_cli/commands/recruiter.py +++ b/boss_cli/commands/recruiter.py @@ -6,7 +6,10 @@ import io import json import logging +import os import time +from datetime import datetime, timezone +from pathlib import Path import click from rich.panel import Panel @@ -852,109 +855,12 @@ def _fetch(c: BossClient) -> dict: console.print(f"[red]{data['error']}[/red]") return - # Build markdown - geek_detail = data.get("geekDetailInfo", data) - base_info = geek_detail.get("geekBaseInfo", geek_detail) - - name = base_info.get("name", base_info.get("geekName", "candidate")) - gender_val = base_info.get("gender", 0) - gender = "男" if gender_val == 1 else "女" if gender_val == 2 else "" - degree = base_info.get("degreeCategory", base_info.get("degree", "")) - work_year = base_info.get("workYearDesc", base_info.get("workYear", "")) - age = base_info.get("ageDesc", base_info.get("age", "")) - apply_status = base_info.get("applyStatusContent", base_info.get("applyStatus", "")) - expect_position = base_info.get("expectPosition", "") - expect_city = base_info.get("expectCity", "") - expect_salary = base_info.get("expectSalary", base_info.get("salaryDesc", "")) - - lines: list[str] = [] - lines.append(f"# {name}") - lines.append("") - - info_parts = [p for p in [gender, age, degree, work_year] if p] - if info_parts: - lines.append(" | ".join(info_parts)) - lines.append("") - - if apply_status: - lines.append(f"**求职状态:** {apply_status}") - lines.append("") - - expect_parts = [p for p in [expect_position, expect_city, expect_salary] if p] - if expect_parts: - lines.append("## 求职期望") - lines.append("") - lines.append(" | ".join(expect_parts)) - lines.append("") - - # Work experience - work_exp = geek_detail.get("geekWorkExpList", base_info.get("workExpList", [])) - if work_exp: - lines.append("## 工作经历") - lines.append("") - for w in work_exp: - company = w.get("company", w.get("companyName", "")) - position = w.get("positionName", w.get("position", "")) - time_desc = w.get("timeDesc", w.get("workTime", "")) - industry = w.get("industry", "") - desc = w.get("description", w.get("workDesc", "")) - header = f"### {company}" - if industry: - header += f" ({industry})" - lines.append(header) - lines.append("") - if time_desc: - lines.append(f"**{time_desc}** - {position}") - elif position: - lines.append(f"**{position}**") - lines.append("") - if desc: - lines.append(desc) - lines.append("") - - # Education - edu_exp = geek_detail.get("geekEduExpList", base_info.get("eduExpList", [])) - if edu_exp: - lines.append("## 教育经历") - lines.append("") - for e in edu_exp: - school = e.get("school", e.get("schoolName", "")) - major_name = e.get("major", e.get("majorName", "")) - degree_name = e.get("degree", e.get("degreeName", "")) - time_desc = e.get("timeDesc", e.get("eduTime", "")) - header = f"### {school}" - if degree_name: - header += f" - {degree_name}" - lines.append(header) - lines.append("") - parts = [p for p in [time_desc, major_name] if p] - if parts: - lines.append(" | ".join(parts)) - lines.append("") + md_content = _build_candidate_md(data) - # Projects - project_exp = geek_detail.get("geekProjectExpList", base_info.get("projectExpList", [])) - if project_exp: - lines.append("## 项目经历") - lines.append("") - for p in project_exp: - proj_name = p.get("projectName", p.get("name", "")) - role = p.get("roleName", p.get("role", "")) - time_desc = p.get("timeDesc", p.get("projectTime", "")) - desc = p.get("description", p.get("projectDesc", "")) - header = f"### {proj_name}" - if role: - header += f" ({role})" - lines.append(header) - lines.append("") - if time_desc: - lines.append(f"**{time_desc}**") - lines.append("") - if desc: - lines.append(desc) - lines.append("") - - md_content = "\n".join(lines).rstrip() + "\n" + # Determine output filename from name in data + geek_detail = data.get("geekDetailInfo") or data + base_info = geek_detail.get("geekBaseInfo") or geek_detail if isinstance(geek_detail, dict) else {} + name = base_info.get("name", "candidate") if isinstance(base_info, dict) else "candidate" # Write to file or stdout if output_file is None: @@ -1261,3 +1167,371 @@ def _render(data: dict) -> None: as_json=as_json, as_yaml=as_yaml, error_hint=_chat_action_hint, ) + + +# ── recruiter sync ────────────────────────────────────────────────── + + +def _get_cache_dir(output_dir: str | None) -> Path: + """Resolve cache directory: --output-dir > $BOSS_CACHE_DIR > ~/.boss-cli/cache/""" + if output_dir: + return Path(output_dir) + env_dir = os.environ.get("BOSS_CACHE_DIR") + if env_dir: + return Path(env_dir) + return Path.home() / ".boss-cli" / "cache" + + +def _build_candidate_md(data: dict) -> str: + """Build Markdown resume content from geek detail API response (get_boss_view_geek). + + The API returns a nested structure: + geekDetailInfo.geekBaseInfo — personal info + geekDetailInfo.geekExpPosList — expected positions/salary/city + geekDetailInfo.geekWorkExpList — work history + geekDetailInfo.geekEduExpList — education + geekDetailInfo.geekProjExpList — project experience + """ + if not data: + return "# (简历数据为空)\n" + geek_detail = data.get("geekDetailInfo") or data + if not isinstance(geek_detail, dict): + geek_detail = data + base_info = geek_detail.get("geekBaseInfo") or geek_detail + if not isinstance(base_info, dict): + base_info = {} + + name = base_info.get("name", base_info.get("geekName", "candidate")) + gender_val = base_info.get("gender", 0) + gender = "男" if gender_val == 1 else "女" if gender_val == 2 else "" + # degreeCategory is the text label ("本科"); degree is the numeric code + degree = base_info.get("degreeCategory") or "" + if not degree: + d = base_info.get("degree", "") + degree = str(d) if d and not isinstance(d, int) else "" + work_year = base_info.get("workYearsDesc", base_info.get("workYearDesc", "")) + age = base_info.get("ageDesc", "") + apply_status = base_info.get("applyStatusContent", "") + active_time = base_info.get("activeTimeDesc", "") + user_desc = base_info.get("userDescription", "") + + lines: list[str] = [] + lines.append(f"# {name}") + lines.append("") + + info_parts = [p for p in [gender, age, degree, work_year] if p] + if info_parts: + lines.append(" | ".join(info_parts)) + lines.append("") + + status_parts = [] + if apply_status: + status_parts.append(f"**求职状态:** {apply_status}") + if active_time: + status_parts.append(f"**活跃时间:** {active_time}") + for sp in status_parts: + lines.append(sp) + if status_parts: + lines.append("") + + # Expected positions (may have multiple) + exp_pos_list = geek_detail.get("geekExpPosList", []) + if exp_pos_list: + lines.append("## 求职期望") + lines.append("") + for ep in exp_pos_list: + pos_name = ep.get("positionName", "") + city_name = ep.get("locationName", "") + salary = ep.get("salaryDesc", "") + industry_desc = ep.get("industryDesc", "") + parts = [p for p in [pos_name, city_name, salary, industry_desc] if p] + if parts: + lines.append(" | ".join(parts)) + lines.append("") + + if user_desc: + lines.append("## 个人简介") + lines.append("") + lines.append(user_desc) + lines.append("") + + work_exp = geek_detail.get("geekWorkExpList", []) + if work_exp: + lines.append("## 工作经历") + lines.append("") + for w in work_exp: + company = w.get("company", "") + position = w.get("positionName", "") + # Build time range from startYearMonStr/endYearMonStr + start = w.get("startYearMonStr", "") + end = w.get("endYearMonStr", "") + if start and end: + time_desc = f"{start}-{end}" + elif start: + time_desc = start + else: + time_desc = w.get("workYearDesc", "") + dept = w.get("department", "") + # Work description is in 'responsibility' field + desc = w.get("responsibility", "") + header = f"### {company}" + if dept: + header += f" · {dept}" + lines.append(header) + lines.append("") + if time_desc and position: + lines.append(f"**{time_desc}** | {position}") + elif position: + lines.append(f"**{position}**") + lines.append("") + if desc: + lines.append(desc) + lines.append("") + + edu_exp = geek_detail.get("geekEduExpList", []) + if edu_exp: + lines.append("## 教育经历") + lines.append("") + for e in edu_exp: + school = e.get("school", "") + major_name = e.get("major", "") + # degreeName is the text label; degree is numeric code + degree_name = e.get("degreeName") or "" + if not degree_name: + d = e.get("degree", "") + degree_name = str(d) if d and not isinstance(d, int) else "" + # Build time from startDateDesc/endDateDesc + start = e.get("startDateDesc", "") + end = e.get("endDateDesc", "") + time_desc = f"{start}-{end}" if start and end else start or end + header = f"### {school}" + if degree_name: + header += f" — {degree_name}" + lines.append(header) + lines.append("") + parts = [p for p in [time_desc, major_name] if p] + if parts: + lines.append(" | ".join(parts)) + lines.append("") + + project_exp = geek_detail.get("geekProjExpList", []) + if project_exp: + lines.append("## 项目经历") + lines.append("") + for p in project_exp: + proj_name = p.get("projectName", p.get("name", "")) + role = p.get("roleName", p.get("role", "")) + start = p.get("startYearMonStr", "") + end = p.get("endYearMonStr", "") + time_desc = f"{start}-{end}" if start and end else start or end + desc = p.get("responsibility", p.get("description", "")) + header = f"### {proj_name}" + if role: + header += f" ({role})" + lines.append(header) + lines.append("") + if time_desc: + lines.append(f"**{time_desc}**") + lines.append("") + if desc: + lines.append(desc) + lines.append("") + + return "\n".join(lines).rstrip() + "\n" + + +def _sync_job( + client: BossClient, + job: dict, + cache_dir: Path, + force: bool, + dry_run: bool, +) -> dict: + """Sync candidates for a single job. Returns a summary dict.""" + enc_job_id = job["encryptJobId"] + job_name = job.get("jobName", enc_job_id) + job_dir = cache_dir / enc_job_id + meta_path = job_dir / "_meta.json" + + # Load existing meta + existing_meta: dict = {} + if meta_path.exists(): + try: + existing_meta = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + existing_meta = {} + + existing_uids: set[str] = set(existing_meta.get("candidates", [])) + archived_uids: set[str] = set(existing_meta.get("archived_candidates", [])) + + # Fetch recommend list + rec_data = client.get_boss_recommend_geeks(enc_job_id=enc_job_id) + friend_list = rec_data.get("friendList", []) + + current_uids: set[str] = { + f["encryptUid"] for f in friend_list if f.get("encryptUid") + } + + # Incremental: only new uids (not in existing, not archived) + new_uids = current_uids - existing_uids - archived_uids + if force: + new_uids = current_uids # re-fetch all when forced + + # Candidates that disappeared from the recommend list + newly_archived = existing_uids - current_uids + archived_uids |= newly_archived + + uid_to_friend = {f["encryptUid"]: f for f in friend_list if f.get("encryptUid")} + + new_count = 0 + errors: list[str] = [] + + if not dry_run: + job_dir.mkdir(parents=True, exist_ok=True) + + for uid in new_uids: + friend = uid_to_friend.get(uid, {}) + security_id = friend.get("securityId", "") + name = friend.get("name", uid) + + if dry_run: + console.print(f" [dim][dry-run] 将写入: {enc_job_id}/{uid}.md ({name})[/dim]") + new_count += 1 + continue + + try: + detail = client.get_boss_view_geek( + encrypt_geek_id=uid, + encrypt_job_id=enc_job_id, + security_id=security_id, + ) + md = _build_candidate_md(detail or {}) + md_path = job_dir / f"{uid}.md" + md_path.write_text(md, encoding="utf-8") + new_count += 1 + console.print(f" [green]✓[/green] {name} ({uid[:12]}...)") + except BossApiError as exc: + errors.append(f"{uid}: {exc}") + console.print(f" [yellow]✗[/yellow] {name} 拉取失败: {exc}") + + # Update _meta.json + all_candidates = list((existing_uids | current_uids) - archived_uids) + meta = { + "job_name": job_name, + "job_id": job.get("jobId"), + "encrypt_job_id": enc_job_id, + "salary_desc": job.get("salaryDesc", ""), + "address": job.get("address", ""), + "job_online_status": job.get("jobOnlineStatus", 1), + "last_sync_at": datetime.now(timezone.utc).isoformat(), + "total_candidates": len(all_candidates), + "new_this_sync": new_count, + "archived_candidates": sorted(archived_uids), + "candidates": sorted(all_candidates), + } + + if not dry_run: + meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") + + return { + "job_name": job_name, + "enc_job_id": enc_job_id, + "skipped": False, + "new": new_count, + "archived": len(newly_archived), + "total": len(all_candidates), + "errors": errors, + } + + +@recruiter.command("resume-sync") +@click.argument("encrypt_job_id", default="", required=False) +@click.option( + "--output-dir", "output_dir", default=None, + help="缓存根目录(默认: $BOSS_CACHE_DIR 或 ~/.boss-cli/cache/)", +) +@click.option("--force", is_flag=True, help="强制全量重拉(忽略24小时冷却,覆盖已有文件)") +@click.option("--dry-run", is_flag=True, help="只打印将执行的操作,不实际写文件") +@structured_output_options +def recruiter_resume_sync( + encrypt_job_id: str, + output_dir: str | None, + force: bool, + dry_run: bool, + as_json: bool, + as_yaml: bool, +) -> None: + """将候选人简历缓存到本地 Markdown 文件(增量更新) + + 不指定 ENCRYPT_JOB_ID 时同步所有在线岗位。 + + \b + 目录结构: + {output_dir}/{encrypt_job_id}/_meta.json + {output_dir}/{encrypt_job_id}/{encrypt_uid}.md + + \b + 环境变量: + BOSS_CACHE_DIR 默认缓存目录(--output-dir 优先级更高) + """ + cred = require_auth() + cache_dir = _get_cache_dir(output_dir) + + if dry_run: + console.print(f"[cyan][dry-run 模式] 缓存目录: {cache_dir}[/cyan]") + else: + console.print(f"[dim]缓存目录: {cache_dir}[/dim]") + + try: + with BossClient(cred) as client: + # Get job list + all_jobs = client.get_boss_chatted_jobs() + + if encrypt_job_id: + jobs = [j for j in all_jobs if j.get("encryptJobId") == encrypt_job_id] + if not jobs: + console.print(f"[red]未找到岗位: {encrypt_job_id}[/red]") + raise SystemExit(1) + else: + jobs = [j for j in all_jobs if j.get("jobOnlineStatus") == 1] + + console.print(f"共 [bold]{len(jobs)}[/bold] 个岗位待同步\n") + + results = [] + for job in jobs: + job_name = job.get("jobName", job.get("encryptJobId", "")) + console.print(f"[bold cyan]▶ {job_name}[/bold cyan]") + result = _sync_job(client, job, cache_dir, force=force, dry_run=dry_run) + results.append(result) + + if result.get("skipped"): + console.print(f" [dim]跳过: {result['reason']}[/dim]") + else: + console.print( + f" 新增 [green]{result['new']}[/green] 人 | " + f"归档 [yellow]{result['archived']}[/yellow] 人 | " + f"总计 {result['total']} 人" + ) + console.print("") + + # Summary + total_new = sum(r["new"] for r in results) + total_archived = sum(r["archived"] for r in results) + total_skipped = sum(1 for r in results if r.get("skipped")) + + console.print( + f"[bold green]同步完成[/bold green] — " + f"新增 {total_new} 人 | 归档 {total_archived} 人 | " + f"跳过 {total_skipped} 个岗位" + ) + + if as_json: + import sys + print(json.dumps( + {"ok": True, "schema_version": "1", "data": results}, + ensure_ascii=False, indent=2, + ), file=sys.stdout) + + except BossApiError as exc: + console.print(f"[red]同步失败: {exc}[/red]") + raise SystemExit(1) from None diff --git a/boss_cli/constants.py b/boss_cli/constants.py index 4100d5c..e48b18f 100644 --- a/boss_cli/constants.py +++ b/boss_cli/constants.py @@ -93,7 +93,9 @@ } # ── Cookie keys required for authenticated sessions ───────────────── -REQUIRED_COOKIES = {"__zp_stoken__", "wt2", "wbg", "zp_at"} +# __zp_stoken__ is JS-generated and not always obtainable; treat it as optional. +# wt2 / wbg / zp_at are sufficient for ~half of recruiter APIs. +REQUIRED_COOKIES = {"wt2", "wbg", "zp_at"} # ── City codes ────────────────────────────────────────────────────── CITY_CODES: dict[str, str] = {