Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 132 additions & 7 deletions boss_cli/browser_login.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,43 @@
"""Browser-assisted login enhancement via Camoufox.
"""Browser-assisted login enhancement via Camoufox or CDP.

Hybrid approach:
1. Complete the QR login flow via HTTP (httpx) to obtain session cookies
(wt2, wbg, zp_at).
2. Inject those cookies into a Camoufox browser and navigate to the site
so that client-side JavaScript generates ``__zp_stoken__``.
3. Export all cookies from the browser context.
2. If ``__zp_stoken__`` is missing, first try to obtain it via Chrome
DevTools Protocol (CDP) from a running real Chrome instance. A real
browser session bypasses Boss Zhipin's anti-bot fingerprinting more
reliably than a headless browser.
3. If CDP is unavailable, fall back to injecting cookies into a Camoufox
browser and navigating to the site so that client-side JavaScript
generates ``__zp_stoken__``.
4. Export all cookies from whichever method succeeded.

This gives us the complete cookie set that pure HTTP cannot achieve.

NOTE: Boss Zhipin uses aggressive anti-bot detection that may prevent
``__zp_stoken__`` generation even in Camoufox. The QR login still
works without it for most APIs (recommend, chat, applied, etc.).

CDP usage
---------
Launch Chrome with the remote-debugging port enabled before running
``boss login --qrcode``::

chrome --remote-debugging-port=9222 --user-data-dir=/tmp/boss-chrome

The CDP path requires the ``websocket-client`` package
(``pip install websocket-client``). It is tried first and silently
skipped when the package is absent or Chrome is not running.
"""

from __future__ import annotations

import asyncio
import json
import logging
import subprocess
import sys
import time
from typing import Any

from .auth import Credential, qr_login, save_credential
Expand Down Expand Up @@ -77,6 +95,98 @@ def _normalize_browser_cookies(raw_cookies: list[dict[str, Any]]) -> dict[str, s
return cookies


def _hydrate_stoken_via_cdp(
debug_port: int = 9222,
wait_seconds: float = 4.0,
) -> dict[str, str] | None:
"""Try to obtain ``__zp_stoken__`` from a running Chrome instance via CDP.

Boss Zhipin's anti-bot JS generates ``__zp_stoken__`` during a real
browser page load. By connecting to a Chrome instance that the user
already has open (via the Chrome DevTools Protocol), we can trigger
that JS in a genuine browser environment — defeating fingerprint
checks that block headless browsers like Camoufox.

Prerequisites
-------------
* Chrome must be running with ``--remote-debugging-port=9222``.
* ``websocket-client`` must be installed (``pip install websocket-client``).

Parameters
----------
debug_port:
CDP port Chrome was started with (default: 9222).
wait_seconds:
How long to wait after navigation for JS to set the cookie.

Returns
-------
dict[str, str] | None
Flat dict of zhipin.com cookies (including ``__zp_stoken__``) on
success, or ``None`` when CDP is unavailable / the token was not
generated.
"""
try:
import websocket # type: ignore[import]
except ImportError:
logger.debug("CDP hydration skipped: websocket-client not installed")
return None

try:
import urllib.request
with urllib.request.urlopen(
f"http://127.0.0.1:{debug_port}/json", timeout=3
) as resp:
tabs = json.loads(resp.read())
except Exception as exc:
logger.debug("Chrome CDP not available on port %d: %s", debug_port, exc)
return None

if not tabs:
logger.debug("CDP: no open tabs found")
return None

ws_url = tabs[0].get("webSocketDebuggerUrl")
if not ws_url:
logger.debug("CDP: no webSocketDebuggerUrl in first tab")
return None

try:
ws = websocket.create_connection(ws_url, timeout=10)
ws.send(json.dumps({
"id": 1,
"method": "Page.navigate",
"params": {"url": f"{BASE_URL}/"},
}))
ws.recv() # navigation ack

time.sleep(wait_seconds) # let JS generate __zp_stoken__

ws.send(json.dumps({"id": 2, "method": "Network.getAllCookies"}))
result = json.loads(ws.recv())
ws.close()
except Exception as exc:
logger.warning("CDP WebSocket error: %s", exc)
return None

all_cookies = result.get("result", {}).get("cookies", [])
cookies: dict[str, str] = {}
for c in all_cookies:
domain = c.get("domain", "")
name = c.get("name")
value = c.get("value")
if not isinstance(name, str) or not isinstance(value, str):
continue
if any(domain.endswith(d) for d in BROWSER_EXPORT_DOMAINS):
cookies[name] = value

if "__zp_stoken__" not in cookies:
logger.debug("CDP: connected but __zp_stoken__ not generated")
return None

return cookies


def _hydrate_stoken_via_browser(cookies: dict[str, str]) -> dict[str, str]:
"""Inject session cookies into a Camoufox browser and harvest __zp_stoken__.

Expand Down Expand Up @@ -140,10 +250,24 @@ def _emit(msg: str) -> None:
# Step 1: Complete QR login via HTTP (reuse existing flow)
cred = asyncio.run(qr_login())

# Step 2: If __zp_stoken__ is missing, try to hydrate via browser
# Step 2: If __zp_stoken__ is missing, try CDP first, then Camoufox
if "__zp_stoken__" not in cred.cookies:
_emit("\n🔧 正在通过浏览器补全 __zp_stoken__...")
_emit("\n🔧 正在补全 __zp_stoken__...")

# --- Attempt 1: CDP (real Chrome, best anti-bot bypass) ---
cdp_result = _hydrate_stoken_via_cdp()
if cdp_result is not None:
merged = {**cred.cookies, **cdp_result}
cred = Credential(cookies=merged)
save_credential(cred)
_emit("✅ __zp_stoken__ 补全成功(CDP)!所有接口可正常使用")
return cred

# CDP unavailable or Chrome not running — fall back to Camoufox
_emit(" (未检测到运行中的 Chrome,尝试 Camoufox 补全...)")
_emit(" 提示:以 --remote-debugging-port=9222 启动 Chrome 可提高成功率")

# --- Attempt 2: Camoufox headless browser ---
try:
enriched = _hydrate_stoken_via_browser(cred.cookies)
except Exception as exc:
Expand All @@ -155,9 +279,10 @@ def _emit(msg: str) -> None:
merged = {**cred.cookies, **enriched}
cred = Credential(cookies=merged)
save_credential(cred)
_emit("✅ __zp_stoken__ 补全成功!所有接口可正常使用")
_emit("✅ __zp_stoken__ 补全成功(Camoufox)!所有接口可正常使用")
else:
_emit("⚠️ 浏览器未能生成 __zp_stoken__(Boss 直聘反爬检测)")
_emit(" recommend/chat/applied 等接口仍可使用,search 可能受限")
_emit(" 如需完整功能,请以 --remote-debugging-port=9222 启动 Chrome 后重试")

return cred