Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
92ad9dc
refactor(meeting-bots): point modal submit at Flow A (CEF webview)
oxoxDev May 22, 2026
8e9b7b2
feat(human): add join-meeting pill that opens Flow A modal
oxoxDev May 22, 2026
54333fc
fix(meet-call): force off-screen position post-build + minimize
oxoxDev May 22, 2026
0260c16
fix(meet-call): drop minimize, close stale windows, log post-build pos
oxoxDev May 22, 2026
c322dc7
fix(meet-call): hide window post-build instead of positioning off-screen
oxoxDev May 22, 2026
e98dbd1
fix(meet-call): hide window post-join (not post-build) to keep scanne…
oxoxDev May 22, 2026
34bdd11
fix(meet-scanner): flip camera + mic toggles ON before Ask to join
oxoxDev May 22, 2026
7a72279
fix(meet-scanner): widen aria-label matchers + dump on toggle miss
oxoxDev May 22, 2026
ea08f76
fix(meet-scanner): only click 'Turn ON' variants; never click 'Turn off'
oxoxDev May 22, 2026
be8f61b
diag(meet-scanner): drop post-join hide to isolate pipeline regression
oxoxDev May 22, 2026
6b11c96
fix(meet-agent): bare-wake (no tail) replies with greeting ack, not s…
oxoxDev May 22, 2026
2152db9
feat(meet-scanner): scanner-side admission watch + captions force-enable
oxoxDev May 22, 2026
309301a
diag(meet-agent): log every push_caption text + wake_fired outcome
oxoxDev May 22, 2026
62014ab
feat(meet-scanner): cycle mic off→on after Ask-to-join to force fresh…
oxoxDev May 22, 2026
298094c
Merge remote-tracking branch 'upstream/main' into feat/mascot-meet-flowA
oxoxDev May 22, 2026
356f27c
fix(meet-agent): widen wake matcher — accept 'hi/hello openhuman' + b…
oxoxDev May 22, 2026
ea61575
fix(meet-scanner): cycle mic off→on BEFORE Ask-to-join, not after
oxoxDev May 22, 2026
b4897bd
fix(meet-scanner): clear browser cookies pre-join to drop leaked Goog…
oxoxDev May 22, 2026
576173c
fix(meet-agent): filter 'You'-speaker noise, strip <think> blocks, in…
oxoxDev May 22, 2026
9055241
fix(meet-agent): break echo loop — speaking gate, caption dedup, TTS cap
oxoxDev May 22, 2026
96346a8
fix(meet-agent): tighten reply length — 80-token cap + strict spoken-…
oxoxDev May 22, 2026
8f7e371
fix(meet-agent): switch model agentic-v1 → chat-v1 + strip leaked rea…
oxoxDev May 22, 2026
44d1bbe
feat(meet-agent): route bot through orchestrator agent — second brain…
oxoxDev May 22, 2026
71d5036
fix(meet-agent): switch to with_profile builder to pick up connected …
oxoxDev May 22, 2026
ef7e648
fix(meet-agent): per-turn unique definition_name to bypass corrupt tr…
oxoxDev May 22, 2026
5d97de2
fix(meet-agent): rewrite voice directive to encourage tool use
oxoxDev May 22, 2026
d8de1f3
fix(meet-agent): per-speaker caption dedup + turn_in_progress gate + …
oxoxDev May 22, 2026
cb4117b
fix(meet-agent): 60s→90s timeout + drop basic-LLM hallucination fallback
oxoxDev May 22, 2026
3c864a4
feat(meet-agent): inject current date/time into orchestrator system p…
oxoxDev May 22, 2026
b20e466
fix(meet-scanner): broaden CC auto-enable matchers + double attempt b…
oxoxDev May 22, 2026
ded0bfc
feat(meet-agent): pre-roll "On it." ack to bridge 30-60s tool-call si…
oxoxDev May 22, 2026
89b3372
feat(meet-agent): cache orchestrator Agent per meet for cross-turn me…
oxoxDev May 22, 2026
8560d22
feat(meet-agent): barge-in — JS audio flush + relaxed wake gate
oxoxDev May 22, 2026
12bc5e7
fix(meet-agent): force English-only TTS + log reply preview
oxoxDev May 22, 2026
273e390
fix(meet-agent): triple-up dedup — 30s cooldown + 15s min-turn-gap + …
oxoxDev May 22, 2026
cab2e75
chore: apply auto-fixes from pre-push hook
oxoxDev May 22, 2026
26c65fc
fix(meet-agent): skip pre-roll ack for short prompts (greetings / tim…
oxoxDev May 22, 2026
257646c
fix(meet-agent): cooldown + min-turn-gap → 60s each to kill residual-…
oxoxDev May 22, 2026
140f371
test(skills): update MeetingBotsCard tests for Flow A (joinMeetCall)
oxoxDev May 22, 2026
d2fcda7
test(meet-agent): poll for audio up to 30s instead of single 50ms sleep
oxoxDev May 22, 2026
42834af
feat(meet-agent): owner-only wake gate (privacy lock)
oxoxDev May 25, 2026
6b1c7a8
feat(meet-agent): plumb owner+bot display names into start_session RPC
oxoxDev May 25, 2026
994e666
feat(tauri/meet): forward owner display name from shell to core wake …
oxoxDev May 25, 2026
6e44f16
feat(skills): owner-name input + privacy hint on MeetingBotsCard modal
oxoxDev May 25, 2026
683f3e2
feat(tauri/meet-audio): emit speaking-state edges from speak_pump
oxoxDev May 25, 2026
93912b1
feat(meet): animate the in-call mascot's mouth while the bot speaks
oxoxDev May 25, 2026
f26e25b
feat(meet-agent): JSONL store for completed call records
oxoxDev May 25, 2026
a39bf38
feat(meet-agent): capture meet_url + started_at_ms on session start
oxoxDev May 25, 2026
0eebe5b
feat(meet-agent): persist on stop_session + new list_calls RPC
oxoxDev May 25, 2026
0f46ed0
feat(tauri/meet-audio): forward meet_url into start_session
oxoxDev May 25, 2026
b4e1747
feat(meet): listMeetCalls() service + tests
oxoxDev May 25, 2026
2a5014a
feat(skills): inline recent-calls list in MeetingBotsCard modal
oxoxDev May 25, 2026
0e5e2ec
chore: apply auto-fixes
oxoxDev May 25, 2026
18e70f3
feat(meet-agent): CaptionOutcome enum + soft-deny + owner-grant state
oxoxDev May 25, 2026
1c29cff
feat(meet-agent): run_soft_deny_turn + run_grant_turn
oxoxDev May 25, 2026
4e52b0a
feat(meet-agent): dispatch CaptionOutcome to soft-deny / wake / ignore
oxoxDev May 25, 2026
7969b4d
chore: apply auto-fixes
oxoxDev May 25, 2026
62a70b4
fix(skills): drop {label} placeholder from sendTo/comingSoon i18n str…
oxoxDev May 25, 2026
60916bb
fix(meet-agent): declare owner_display_name / bot_display_name / meet…
oxoxDev May 25, 2026
6d1e365
chore: apply auto-fixes
oxoxDev May 25, 2026
62732c7
fix(meet-agent): dedup unauthorized wakes before soft-deny dispatch
oxoxDev May 25, 2026
52a22d3
fix(meet-agent): session-wide soft-deny cooldown + greeting/refusal s…
oxoxDev May 25, 2026
17dd6c9
fix(meet-audio): route bot PCM to local speakers in addition to Meet …
oxoxDev May 25, 2026
9a3c2db
feat(meet-agent): toolless LLM for non-owner non-greeting wakes
oxoxDev May 25, 2026
15df969
chore: apply auto-fixes
oxoxDev May 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions app/src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2989,6 +2989,10 @@ pub fn run() {
request_id: request_id.clone(),
meet_url: meet_url.clone(),
display_name: "OpenHuman Dev".to_string(),
// Dev-auto launch has no real user identity — the
// wake gate will fail-closed (no wakes fire) which
// is the safe posture for an automated harness.
owner_display_name: String::new(),
};
match meet_call::meet_call_open_window(app_handle.clone(), state, args)
.await
Expand Down
38 changes: 38 additions & 0 deletions app/src-tauri/src/meet_audio/audio_bridge.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,29 @@
return out;
}

// Track every scheduled AudioBufferSource so __openhumanFlushAudio
// can stop them on barge-in (user re-asks during a long bot reply).
// Without this list, only the queue tail past `nextStartTime` would
// be cancellable; anything already start()-ed plays to completion.
var activeSources = [];

// Stop in-flight playback and reset the schedule cursor. Called by
// the Rust shell when the brain cancels outbound (new wake fires
// mid-reply). Returns the number of sources that were stopped, so
// the shell can log how much speech got cut.
window.__openhumanFlushAudio = function () {
var stopped = 0;
while (activeSources.length) {
var s = activeSources.pop();
try { s.stop(); stopped++; } catch (_) {}
try { s.disconnect(); } catch (_) {}
}
if (ctx) {
nextStartTime = ctx.currentTime;
}
return stopped;
};

// Public push API. Returns the duration in seconds the chunk added
// to the queue, mostly for diagnostics; the shell ignores it.
window.__openhumanFeedPcm = function (b64) {
Expand All @@ -110,6 +133,16 @@
var src = ctx.createBufferSource();
src.buffer = buffer;
src.connect(dest);
// Also pipe to the page's default audio output so the bot is
// audible on the host machine (the openhuman app's speakers).
// Without this, bot audio only flows up Meet's gUM intercept
// and the user has to be receiving the meeting on a separate
// endpoint (other browser tab / phone) to hear it. Playing
// locally too costs nothing audio-quality-wise and removes the
// "captions appear but no sound" foot-gun. Follow-up #20
// (mute bot CEF at OS level) will re-introduce a clean off
// switch once we have a config toggle.
src.connect(ctx.destination);
// Schedule strictly after the previous chunk so successive
// 100 ms feeds line up gaplessly. If the queue has emptied
// (caller fell behind), restart at currentTime so we don't try
Expand All @@ -118,6 +151,11 @@
nextStartTime = ctx.currentTime;
}
src.start(nextStartTime);
activeSources.push(src);
src.onended = function () {
var idx = activeSources.indexOf(src);
if (idx !== -1) activeSources.splice(idx, 1);
};
nextStartTime += buffer.duration;
// High-frequency log gated by a counter so we don't drown the
// console at 10 Hz; emit ~1 in 50 frames (~5 s cadence at the
Expand Down
27 changes: 22 additions & 5 deletions app/src-tauri/src/meet_audio/captions_bridge.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,18 +139,35 @@
}

// Auto-enable captions: walk every button on the page and click any
// that has an aria-label starting with "Turn on captions". Caps the
// attempts so we don't fight a user who deliberately disables CC.
var ENABLE_ATTEMPT_BUDGET = 30; // ~30 * 2s = 60s
// that has an aria-label matching the "turn on captions" intent.
// Substring match (not prefix) — Meet rolls out variant labels
// ("Turn on captions (c)", "Turn on live captions", "Subtitles",
// "Captions") that the strict prefix-only matcher missed, forcing
// the user to click the toggle by hand. Caps attempts so a user who
// deliberately disables CC isn't fought over forever.
var ENABLE_ATTEMPT_BUDGET = 60; // ~60 * 2s = 120s — covers slow admit
var enableAttempts = 0;
function tryEnableCaptions() {
if (enableAttempts >= ENABLE_ATTEMPT_BUDGET) return;
enableAttempts++;
var buttons = document.querySelectorAll("button[aria-label]");
var ON_PATTERNS = [
"turn on captions",
"turn on live captions",
"turn on subtitles",
"turn on closed captions",
"captions on",
"captions (c)",
"show captions",
"enable captions",
];
// Negative guard: never click anything that is already-on (Meet
// shows "Turn off captions" when CC is active).
var OFF_PATTERNS = ["turn off captions", "captions off", "disable captions"];
for (var i = 0; i < buttons.length; i++) {
var lbl = (buttons[i].getAttribute("aria-label") || "").toLowerCase();
// Match "Turn on captions" but NOT "Turn off captions".
if (lbl.indexOf("turn on captions") === 0 || /^turn on captions/.test(lbl)) {
if (OFF_PATTERNS.some(function (p) { return lbl.indexOf(p) >= 0; })) continue;
if (ON_PATTERNS.some(function (p) { return lbl.indexOf(p) >= 0; })) {
try {
buttons[i].click();
enableAttempts = ENABLE_ATTEMPT_BUDGET; // success — stop trying.
Expand Down
28 changes: 28 additions & 0 deletions app/src-tauri/src/meet_audio/inject.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,31 @@ pub async fn feed_pcm_chunk(cdp: &mut CdpConn, session: &str, pcm_b64: &str) ->
}
Ok(())
}

/// Stop any in-flight audio playback inside the page bridge and reset
/// its schedule cursor. Called when the brain cancels outbound (user
/// re-asks during a long reply) so the previous reply's tail doesn't
/// keep playing while the new turn is dispatched. Returns the count
/// of sources that were stopped, useful for diagnostic logging.
pub async fn flush_audio_bridge(cdp: &mut CdpConn, session: &str) -> Result<i64, String> {
let res = cdp
.call(
"Runtime.evaluate",
json!({
"expression": "(typeof window.__openhumanFlushAudio === 'function') ? window.__openhumanFlushAudio() : -1",
"returnByValue": true,
}),
Some(session),
)
.await
.map_err(|e| format!("Runtime.evaluate flush: {e}"))?;
if let Some(exception) = res.get("exceptionDetails") {
return Err(format!("page exception: {exception}"));
}
let stopped = res
.get("result")
.and_then(|r| r.get("value"))
.and_then(|v| v.as_i64())
.unwrap_or(0);
Ok(stopped)
}
23 changes: 19 additions & 4 deletions app/src-tauri/src/meet_audio/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,15 @@ pub async fn start<R: Runtime>(
app: AppHandle<R>,
request_id: String,
meet_url: String,
owner_display_name: String,
bot_display_name: String,
) -> Result<(), String> {
log::info!(
"[meet-audio] start request_id={request_id} url_prefix={}",
truncate_for_log(&meet_url, 64)
"[meet-audio] start request_id={request_id} url_prefix={} \
owner_chars={} bot_chars={}",
truncate_for_log(&meet_url, 64),
owner_display_name.chars().count(),
bot_display_name.chars().count()
);

if let Some(state) = app.try_state::<MeetAudioState>() {
Expand All @@ -104,12 +109,22 @@ pub async fn start<R: Runtime>(
}

// Tell core to open its session first so the very first PCM push
// doesn't race the start RPC.
// doesn't race the start RPC. Hand the call owner + bot display
// names through with the request so the core wake-word gate
// (privacy lock: only the owner can trigger tool calls) is
// active before the first caption can arrive.
rpc_call(
"openhuman.meet_agent_start_session",
serde_json::json!({
"request_id": request_id,
"sample_rate_hz": 16_000,
"owner_display_name": owner_display_name,
"bot_display_name": bot_display_name,
// Persisted into the recent-calls JSONL by stop_session
// so the Skills "Meeting Bots" card can show "joined
// <code>" in the history list. The URL the shell built
// the CEF window with is the canonical value.
"meet_url": meet_url,
}),
)
.await?;
Expand Down Expand Up @@ -170,7 +185,7 @@ pub async fn start<R: Runtime>(
caption_listener_disabled(request_id.clone())
}
};
let speak = speak_pump::start(request_id.clone(), cdp, session);
let speak = speak_pump::start(app.clone(), request_id.clone(), cdp, session);
(speak, captions)
}
Err(err) => {
Expand Down
Loading