From 6c7ea70f999a06dde6ee7e4abe67cb495fdefe58 Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Sun, 1 Feb 2026 21:55:26 +0530
Subject: [PATCH 1/8] Updated session settings for interrupt handling

---
 examples/voice_agents/basic_agent.py |  97 +++++++--------
 examples/voice_agents/test.py        | 172 +++++++++++++++++++++++++++
 2 files changed, 212 insertions(+), 57 deletions(-)
 create mode 100644 examples/voice_agents/test.py

diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
index f064dab5d7..6371ab0243 100644
--- a/examples/voice_agents/basic_agent.py
+++ b/examples/voice_agents/basic_agent.py
@@ -3,16 +3,9 @@
 from dotenv import load_dotenv
 
 from livekit.agents import (
-    Agent,
-    AgentServer,
-    AgentSession,
-    JobContext,
-    JobProcess,
-    MetricsCollectedEvent,
-    RunContext,
-    cli,
-    metrics,
-    room_io,
+   Agent, AgentServer, AgentSession, JobContext, JobProcess,
+    cli, metrics, UserInputTranscribedEvent, AgentStateChangedEvent,
+    UserStateChangedEvent, SpeechCreatedEvent, AgentFalseInterruptionEvent
 )
 from livekit.agents.llm import function_tool
 from livekit.plugins import silero
@@ -21,7 +14,7 @@
 # uncomment to enable Krisp background voice/noise cancellation
 # from livekit.plugins import noise_cancellation
 
-logger = logging.getLogger("basic-agent")
+logger = logging.getLogger("intelligent-kelly")
 
 load_dotenv()
 
@@ -35,33 +28,13 @@ def __init__(self) -> None:
             "You are curious and friendly, and have a sense of humor."
             "you will speak english to the user",
         )
-
+        self.is_speaking = False
+        self.current_speech_handle = None
     async def on_enter(self):
-        # when the agent is added to the session, it'll generate a reply
-        # according to its instructions
-        self.session.generate_reply()
-
-    # all functions annotated with @function_tool will be passed to the LLM when this
-    # agent is active
-    @function_tool
-    async def lookup_weather(
-        self, context: RunContext, location: str, latitude: str, longitude: str
-    ):
-        """Called when the user asks for weather related information.
-        Ensure the user's location (city or region) is provided.
-        When given a location, please estimate the latitude and longitude of the location and
-        do not ask the user for them.
-
-        Args:
-            location: The location they are asking for
-            latitude: The latitude of the location, do not ask user for it
-            longitude: The longitude of the location, do not ask user for it
-        """
-
-        logger.info(f"Looking up weather for {location}")
-
-        return "sunny with a temperature of 70 degrees."
-
+      
+        await self.session.generate_reply()
+    
+   
 
 server = AgentServer()
 
@@ -75,31 +48,41 @@ def prewarm(proc: JobProcess):
 
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
-    # each log entry will include these fields
-    ctx.log_context_fields = {
-        "room": ctx.room.name,
-    }
+    # === CRITICAL: Configure false interruption handling ===
     session = AgentSession(
-        # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
-        # See all available models at https://docs.livekit.io/agents/models/stt/
         stt="deepgram/nova-3",
-        # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
-        # See all available models at https://docs.livekit.io/agents/models/llm/
-        llm="openai/gpt-4.1-mini",
-        # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
-        # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
+        llm="openai/gpt-4o-mini",
         tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
-        # VAD and turn detection are used to determine when the user is speaking and when the agent should respond
-        # See more at https://docs.livekit.io/agents/build/turns
-        turn_detection=MultilingualModel(),
         vad=ctx.proc.userdata["vad"],
-        # allow the LLM to generate a response while waiting for the end of turn
-        # See more at https://docs.livekit.io/agents/build/audio/#preemptive-generation
-        preemptive_generation=True,
-        # sometimes background noise could interrupt the agent session, these are considered false positive interruptions
-        # when it's detected, you may resume the agent's speech
-        resume_false_interruption=True,
+        turn_detection=MultilingualModel(),
+        
+        # === PRIMARY DEFENSE: Built-in False Interruption Handling ===
+        # These parameters tell LiveKit to wait for real speech before committing to an interruption
+        allow_interruptions=True,
+        
+        # Require at least 0.8 seconds of detected speech before interrupting
+        # This filters out most short fillers like "yeah", "ok", "hmm"
+        min_interruption_duration=0.8,  
+        
+        # Require at least 2 words to be transcribed before confirming interruption
+        # Single filler words won't trigger interruption
+        min_interruption_words=2,
+        
+        # Wait up to 1 second for transcription after VAD detects speech
+        # If no real words are transcribed, treat it as a false interruption
         false_interruption_timeout=1.0,
+        
+        # Automatically resume speaking if interruption was false positive
+        # This is THE KEY FEATURE for seamless continuation
+        resume_false_interruption=True,
+        
+        # Don't buffer audio while agent is speaking
+        discard_audio_if_uninterruptible=True,
+        
+        # Other important settings
+        preemptive_generation=False,
+        min_endpointing_delay=0.6,
+        max_endpointing_delay=3.0,
     )
 
     # log metrics as they are emitted, and total usage after session is over
diff --git a/examples/voice_agents/test.py b/examples/voice_agents/test.py
new file mode 100644
index 0000000000..38e2906df7
--- /dev/null
+++ b/examples/voice_agents/test.py
@@ -0,0 +1,172 @@
+import logging
+import re
+from dotenv import load_dotenv
+from livekit.agents import (
+    Agent, AgentServer, AgentSession, JobContext, JobProcess,
+    cli, metrics, UserInputTranscribedEvent, AgentStateChangedEvent,
+    UserStateChangedEvent, SpeechCreatedEvent, AgentFalseInterruptionEvent
+)
+from livekit.plugins import silero, deepgram, openai, cartesia
+from livekit.plugins.turn_detector.multilingual import MultilingualModel
+
+logger = logging.getLogger("intelligent-kelly")
+logger.setLevel(logging.INFO)
+load_dotenv()
+
+# CONFIGURATION: Easily adjustable lists for modularity
+STOP_WORDS = {"wait", "no", "stop", "finish", "hold", "pause", "hold on"}
+FILLER_WORDS = {
+    "uhhuh", "okay", "alright", "mhm", "yeah", "yep", "yup", 
+    "hmm", "right", "uh", "um", "ah", "gotit", "isee", "ok", "k",
+    "sure", "yes", "interesting", "really", "wow", "ohh", "ooh",
+    "aha", "uhuh", "mhmm", "gotcha", "nice"
+}
+
+class MyAgent(Agent):
+    def __init__(self) -> None:
+        super().__init__(
+            instructions=(
+                "Your name is Kelly. Keep responses concise and witty. "
+                "If you hear acknowledgements like 'yeah' or 'ok' while explaining something, "
+                "that means the user is listening - keep going! "
+                "Only stop if they explicitly ask you to wait or stop."
+            ),
+        )
+        # More reliable state tracking
+        self.is_speaking = False
+        self.current_speech_handle = None
+        
+    async def on_enter(self):
+        await self.session.generate_reply()
+
+server = AgentServer()
+
+def prewarm(proc: JobProcess):
+    proc.userdata["vad"] = silero.VAD.load()
+
+server.setup_fnc = prewarm
+
+@server.rtc_session()
+async def entrypoint(ctx: JobContext):
+    # === CRITICAL: Configure false interruption handling ===
+    session = AgentSession(
+        stt="deepgram/nova-3",
+        llm="openai/gpt-4o-mini",
+        tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
+        vad=ctx.proc.userdata["vad"],
+        turn_detection=MultilingualModel(),
+        
+        # === PRIMARY DEFENSE: Built-in False Interruption Handling ===
+        # These parameters tell LiveKit to wait for real speech before committing to an interruption
+        allow_interruptions=True,
+        
+        # Require at least 0.8 seconds of detected speech before interrupting
+        # This filters out most short fillers like "yeah", "ok", "hmm"
+        min_interruption_duration=0.8,  
+        
+        # Require at least 2 words to be transcribed before confirming interruption
+        # Single filler words won't trigger interruption
+        min_interruption_words=2,
+        
+        # Wait up to 1 second for transcription after VAD detects speech
+        # If no real words are transcribed, treat it as a false interruption
+        false_interruption_timeout=1.0,
+        
+        # Automatically resume speaking if interruption was false positive
+        # This is THE KEY FEATURE for seamless continuation
+        resume_false_interruption=True,
+        
+        # Don't buffer audio while agent is speaking
+        discard_audio_if_uninterruptible=True,
+        
+        # Other important settings
+        preemptive_generation=False,
+        min_endpointing_delay=0.6,
+        max_endpointing_delay=3.0,
+    )
+    
+    kelly = MyAgent()
+    
+    # === STATE TRACKING ===
+    
+    @session.on("speech_created")
+    def on_speech_created(ev: SpeechCreatedEvent):
+        """Track when Kelly actually starts speaking audio"""
+        kelly.is_speaking = True
+        kelly.current_speech_handle = ev.speech_handle
+        logger.info("🎤 KELLY STARTED SPEAKING")
+    
+    @session.on("agent_state_changed")
+    def on_agent_state_changed(ev: AgentStateChangedEvent):
+        """Track agent state transitions"""
+        logger.info(f"🎭 AGENT STATE: {ev.old_state} → {ev.new_state}")
+        
+        # Kelly is no longer speaking when she returns to listening
+        if ev.new_state == "listening":
+            kelly.is_speaking = False
+            kelly.current_speech_handle = None
+            logger.info("👂 Kelly finished and is now listening")
+    
+    @session.on("user_state_changed")
+    def on_user_state_changed(ev: UserStateChangedEvent):
+        """Track user state for debugging"""
+        logger.info(f"👤 USER STATE: {ev.old_state} → {ev.new_state}")
+    
+    @session.on("agent_false_interruption")
+    def on_false_interruption(ev: AgentFalseInterruptionEvent):
+        """Log when false interruptions are detected and resumed"""
+        if ev.resumed:
+            logger.info("✅ FALSE INTERRUPTION: Kelly resumed seamlessly")
+        else:
+            logger.warning("⚠️ FALSE INTERRUPTION: Not auto-resumed")
+    
+    # === SECONDARY FILTER: Transcript-Level Suppression ===
+    
+    @session.on("user_input_transcribed")
+    def on_user_input_transcribed(ev: UserInputTranscribedEvent):
+        """
+        This handler provides a SECONDARY layer of defense.
+        It runs after STT completes, so it can't prevent the initial VAD interruption,
+        but it can prevent filler words from reaching the LLM and generating responses.
+        """
+        if not ev.is_final or not ev.transcript:
+            return
+        
+        # Normalize text: lowercase, remove punctuation
+        clean_text = re.sub(r'[^\w\s]', '', ev.transcript.lower()).strip()
+        words = set(clean_text.split())
+        raw_phrase = clean_text.replace(" ", "")  # For "uhhuh" type phrases
+        
+        if not words:
+            return
+        
+        logger.info(f"📝 TRANSCRIPT: '{clean_text}' | Kelly speaking: {kelly.is_speaking}")
+        
+        # === CASE 1: Command words ALWAYS cause interruption ===
+        if words.intersection(STOP_WORDS):
+            logger.info(f"🛑 COMMAND DETECTED: '{clean_text}' → Calling interrupt()")
+            session.interrupt()
+            return
+        
+        # === CASE 2: Pure filler words while Kelly is speaking ===
+        # These should be ignored - don't let them reach the LLM
+        if kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
+            logger.info(f"🔇 FILLER SUPPRESSED: '{clean_text}' ignored during speech")
+            # By returning here without calling session.interrupt() or forwarding to LLM,
+            # we ensure this transcript is effectively dropped
+            return
+        
+        # === CASE 3: Lone filler when Kelly is silent ===
+        # Still suppress these - they shouldn't trigger LLM processing
+        if not kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
+            logger.info(f"🍃 LONE FILLER: '{clean_text}' suppressed (Kelly idle)")
+            return
+        
+        # === CASE 4: Real user input (commands, questions, statements) ===
+        logger.info(f"✅ VALID INPUT: '{clean_text}' forwarded to LLM")
+        # Let this proceed normally - it will reach the LLM
+    
+    await session.start(agent=kelly, room=ctx.room)
+
+if __name__ == "__main__":
+    cli.run_app(server)
\ No newline at end of file

From 473e580c60dd49621d1ab0bd2958dbf500f874e7 Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Sun, 1 Feb 2026 21:58:16 +0530
Subject: [PATCH 2/8] Added State tracking

---
 examples/voice_agents/basic_agent.py | 68 ++++++++++++++--------------
 1 file changed, 33 insertions(+), 35 deletions(-)

diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
index 6371ab0243..f6174584ec 100644
--- a/examples/voice_agents/basic_agent.py
+++ b/examples/voice_agents/basic_agent.py
@@ -48,58 +48,56 @@ def prewarm(proc: JobProcess):
 
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
-    # === CRITICAL: Configure false interruption handling ===
+   
     session = AgentSession(
         stt="deepgram/nova-3",
         llm="openai/gpt-4o-mini",
         tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
         vad=ctx.proc.userdata["vad"],
         turn_detection=MultilingualModel(),
-        
-        # === PRIMARY DEFENSE: Built-in False Interruption Handling ===
-        # These parameters tell LiveKit to wait for real speech before committing to an interruption
         allow_interruptions=True,
-        
-        # Require at least 0.8 seconds of detected speech before interrupting
-        # This filters out most short fillers like "yeah", "ok", "hmm"
         min_interruption_duration=0.8,  
-        
-        # Require at least 2 words to be transcribed before confirming interruption
-        # Single filler words won't trigger interruption
         min_interruption_words=2,
-        
-        # Wait up to 1 second for transcription after VAD detects speech
-        # If no real words are transcribed, treat it as a false interruption
         false_interruption_timeout=1.0,
-        
-        # Automatically resume speaking if interruption was false positive
-        # This is THE KEY FEATURE for seamless continuation
         resume_false_interruption=True,
-        
-        # Don't buffer audio while agent is speaking
         discard_audio_if_uninterruptible=True,
-        
-        # Other important settings
         preemptive_generation=False,
         min_endpointing_delay=0.6,
         max_endpointing_delay=3.0,
     )
+    kelly = MyAgent()
+    @session.on("speech_created")
+    def on_speech_created(ev: SpeechCreatedEvent):
+        """Track when Kelly actually starts speaking audio"""
+        kelly.is_speaking = True
+        kelly.current_speech_handle = ev.speech_handle
+        logger.info("🎤 KELLY STARTED SPEAKING")
+    
+    @session.on("agent_state_changed")
+    def on_agent_state_changed(ev: AgentStateChangedEvent):
+        """Track agent state transitions"""
+        logger.info(f"🎭 AGENT STATE: {ev.old_state} → {ev.new_state}")
+        
+        # Kelly is no longer speaking when she returns to listening
+        if ev.new_state == "listening":
+            kelly.is_speaking = False
+            kelly.current_speech_handle = None
+            logger.info("👂 Kelly finished and is now listening")
+    
+    @session.on("user_state_changed")
+    def on_user_state_changed(ev: UserStateChangedEvent):
+        """Track user state for debugging"""
+        logger.info(f"👤 USER STATE: {ev.old_state} → {ev.new_state}")
+    
+    @session.on("agent_false_interruption")
+    def on_false_interruption(ev: AgentFalseInterruptionEvent):
+        """Log when false interruptions are detected and resumed"""
+        if ev.resumed:
+            logger.info("✅ FALSE INTERRUPTION: Kelly resumed seamlessly")
+        else:
+            logger.warning("⚠️ FALSE INTERRUPTION: Not auto-resumed")
 
-    # log metrics as they are emitted, and total usage after session is over
-    usage_collector = metrics.UsageCollector()
-
-    @session.on("metrics_collected")
-    def _on_metrics_collected(ev: MetricsCollectedEvent):
-        metrics.log_metrics(ev.metrics)
-        usage_collector.collect(ev.metrics)
-
-    async def log_usage():
-        summary = usage_collector.get_summary()
-        logger.info(f"Usage: {summary}")
-
-    # shutdown callbacks are triggered when the session is over
-    ctx.add_shutdown_callback(log_usage)
-
+    
     await session.start(
         agent=MyAgent(),
         room=ctx.room,

From 3bf5414effb6e790ca544661687d179d3f29ead7 Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Sun, 1 Feb 2026 22:01:52 +0530
Subject: [PATCH 3/8] Added ignoring for fillers and stopping or stop words

---
 examples/voice_agents/basic_agent.py | 60 +++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
index f6174584ec..2576d13999 100644
--- a/examples/voice_agents/basic_agent.py
+++ b/examples/voice_agents/basic_agent.py
@@ -1,5 +1,5 @@
 import logging
-
+import re
 from dotenv import load_dotenv
 
 from livekit.agents import (
@@ -13,7 +13,13 @@
 
 # uncomment to enable Krisp background voice/noise cancellation
 # from livekit.plugins import noise_cancellation
-
+STOP_WORDS = {"wait", "no", "stop", "finish", "hold", "pause", "hold on"}
+FILLER_WORDS = {
+    "uhhuh", "okay", "alright", "mhm", "yeah", "yep", "yup", 
+    "hmm", "right", "uh", "um", "ah", "gotit", "isee", "ok", "k",
+    "sure", "yes", "interesting", "really", "wow", "ohh", "ooh",
+    "aha", "uhuh", "mhmm", "gotcha", "nice"
+}
 logger = logging.getLogger("intelligent-kelly")
 
 load_dotenv()
@@ -98,17 +104,43 @@ def on_false_interruption(ev: AgentFalseInterruptionEvent):
             logger.warning("⚠️ FALSE INTERRUPTION: Not auto-resumed")
 
     
-    await session.start(
-        agent=MyAgent(),
-        room=ctx.room,
-        room_options=room_io.RoomOptions(
-            audio_input=room_io.AudioInputOptions(
-                # uncomment to enable the Krisp BVC noise cancellation
-                # noise_cancellation=noise_cancellation.BVC(),
-            ),
-        ),
-    )
-
+    @session.on("user_input_transcribed")
+    def on_user_input_transcribed(ev: UserInputTranscribedEvent):
+        """
+        This handler provides a SECONDARY layer of defense.
+        It runs after STT completes, so it can't prevent the initial VAD interruption,
+        but it can prevent filler words from reaching the LLM and generating responses.
+        """
+        if not ev.is_final or not ev.transcript:
+            return
+        
+        clean_text = re.sub(r'[^\w\s]', '', ev.transcript.lower()).strip()
+        words = set(clean_text.split())
+        raw_phrase = clean_text.replace(" ", "")  # For "uhhuh" type phrases
+        
+        if not words:
+            return
+        
+        logger.info(f"📝 TRANSCRIPT: '{clean_text}' | Kelly speaking: {kelly.is_speaking}")
+        
+        
+        if words.intersection(STOP_WORDS):
+            logger.info(f"🛑 COMMAND DETECTED: '{clean_text}' → Calling interrupt()")
+            session.interrupt()
+            return
+    
+        if kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
+            logger.info(f"🔇 FILLER SUPPRESSED: '{clean_text}' ignored during speech")
+           
+            return
+        
+        if not kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
+            logger.info(f"🍃 LONE FILLER: '{clean_text}' suppressed (Kelly idle)")
+            return
+        
+        logger.info(f"✅ VALID INPUT: '{clean_text}' forwarded to LLM")
+       
+    await session.start(agent=kelly, room=ctx.room)
 
 if __name__ == "__main__":
-    cli.run_app(server)
+    cli.run_app(server)
\ No newline at end of file

From 421add1b47445d8bee03b69a60d2cc16e3308aca Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Mon, 2 Feb 2026 00:48:10 +0530
Subject: [PATCH 4/8] Medium VAD thresholds with transcript handler

---
 examples/voice_agents/basic_agent.py | 275 +++++++++++++++++++--------
 1 file changed, 196 insertions(+), 79 deletions(-)

diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
index 2576d13999..cd75b790d7 100644
--- a/examples/voice_agents/basic_agent.py
+++ b/examples/voice_agents/basic_agent.py
@@ -1,145 +1,262 @@
+"""
+THE ACTUAL WORKING HYBRID SOLUTION
+
+Problem statement:
+- "okay" said slowly (1.5s) should NOT interrupt
+- "stop" said quickly (0.5s) SHOULD interrupt immediately
+- Can't use duration-based filtering because it fails one of these cases
+
+Solution: 
+- Use MEDIUM thresholds (catches most fillers, but some slip through)
+- Have transcript handler RESUME if interrupted by filler
+- Have transcript handler FORCE INTERRUPT if command detected but VAD didn't trigger
+
+This way:
+- Fast "stop" (0.5s) → Below threshold → VAD doesn't interrupt → Transcript handler forces interrupt ✅
+- Slow "okay" (1.5s) → Above threshold → VAD interrupts → Transcript handler RESUMES ✅
+- Fast "okay" (0.3s) → Below threshold → VAD doesn't interrupt → Transcript suppresses ✅
+"""
+
 import logging
 import re
+import asyncio
+from typing import Optional
 from dotenv import load_dotenv
-
 from livekit.agents import (
-   Agent, AgentServer, AgentSession, JobContext, JobProcess,
-    cli, metrics, UserInputTranscribedEvent, AgentStateChangedEvent,
-    UserStateChangedEvent, SpeechCreatedEvent, AgentFalseInterruptionEvent
+    Agent, AgentServer, AgentSession, JobContext, JobProcess,
+    cli, UserInputTranscribedEvent, AgentStateChangedEvent,
+    UserStateChangedEvent
 )
-from livekit.agents.llm import function_tool
-from livekit.plugins import silero
+from livekit.plugins import silero, deepgram, openai, cartesia
 from livekit.plugins.turn_detector.multilingual import MultilingualModel
 
-# uncomment to enable Krisp background voice/noise cancellation
-# from livekit.plugins import noise_cancellation
-STOP_WORDS = {"wait", "no", "stop", "finish", "hold", "pause", "hold on"}
+logger = logging.getLogger("intelligent-kelly")
+logger.setLevel(logging.INFO)
+load_dotenv()
+
+# CONFIGURATION
+STOP_WORDS = {"wait", "stop", "finish", "hold", "pause", "halt"}
 FILLER_WORDS = {
-    "uhhuh", "okay", "alright", "mhm", "yeah", "yep", "yup", 
+    "uhhuh", "okay", "alright", "mhm", "yeah", "yep", "yup",
     "hmm", "right", "uh", "um", "ah", "gotit", "isee", "ok", "k",
     "sure", "yes", "interesting", "really", "wow", "ohh", "ooh",
-    "aha", "uhuh", "mhmm", "gotcha", "nice"
+    "aha", "mhmm", "gotcha", "nice", "oh", "all", "got", "it", "i", "see"
 }
-logger = logging.getLogger("intelligent-kelly")
+FILLER_PHRASES = {"all right", "got it", "i see", "uh huh", "oh okay", "oh ok"}
 
-load_dotenv()
+def is_filler_input(transcript: str) -> bool:
+    """Check if transcript is purely a filler acknowledgment"""
+    clean = transcript.lower().strip()
+    clean_no_punct = re.sub(r'[^\w\s]', '', clean)
+    
+    if clean_no_punct in FILLER_PHRASES:
+        return True
+    if clean_no_punct.replace(" ", "") in FILLER_WORDS:
+        return True
+    
+    words = clean_no_punct.split()
+    if words and all(word in FILLER_WORDS for word in words):
+        return True
+    return False
 
+def contains_command(transcript: str) -> bool:
+    """Check if transcript contains an explicit stop command"""
+    clean = transcript.lower().strip()
+    clean_no_punct = re.sub(r'[^\w\s]', '', clean)
+    words = clean_no_punct.split()
+    
+    if not words:
+        return False
+    
+    # Direct command (starts with stop word)
+    if words[0] in STOP_WORDS:
+        return True
+    
+    # Command after brief acknowledgment: "yeah wait", "okay stop"
+    if len(words) >= 2:
+        for i in range(len(words) - 1):
+            if words[i] in FILLER_WORDS and words[i + 1] in STOP_WORDS:
+                return True
+            if words[i] in {"but", "and"} and words[i + 1] in STOP_WORDS:
+                return True
+    
+    # Avoid false positives in longer sentences
+    # "I have no idea" should NOT be a command
+    if len(words) > 3 and any(w in STOP_WORDS for w in words):
+        # Only treat as command if stop word is in first 2 positions
+        return any(words[i] in STOP_WORDS for i in range(min(2, len(words))))
+    
+    return False
 
-class MyAgent(Agent):
+class IntelligentAgent(Agent):
     def __init__(self) -> None:
         super().__init__(
-            instructions="Your name is Kelly. You would interact with users via voice."
-            "with that in mind keep your responses concise and to the point."
-            "do not use emojis, asterisks, markdown, or other special characters in your responses."
-            "You are curious and friendly, and have a sense of humor."
-            "you will speak english to the user",
+            instructions=(
+                "Your name is Kelly. Keep responses concise and witty. "
+                "When users say things like 'yeah' or 'okay' while you're speaking, "
+                "it means they're listening - keep going! "
+                "Only stop if they explicitly say 'wait', 'stop', or 'hold on'."
+            ),
         )
         self.is_speaking = False
-        self.current_speech_handle = None
+        self.was_interrupted_by_vad = False
+        self.last_speech_content = ""
+        
     async def on_enter(self):
-      
         await self.session.generate_reply()
-    
-   
 
 server = AgentServer()
 
-
 def prewarm(proc: JobProcess):
     proc.userdata["vad"] = silero.VAD.load()
 
-
 server.setup_fnc = prewarm
 
-
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
-   
     session = AgentSession(
         stt="deepgram/nova-3",
         llm="openai/gpt-4o-mini",
         tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
         vad=ctx.proc.userdata["vad"],
         turn_detection=MultilingualModel(),
+        
+        # === HYBRID STRATEGY ===
+        # Medium-low threshold: Catches most fillers but allows quick commands
         allow_interruptions=True,
-        min_interruption_duration=0.8,  
-        min_interruption_words=2,
-        false_interruption_timeout=1.0,
-        resume_false_interruption=True,
-        discard_audio_if_uninterruptible=True,
+        min_interruption_duration=0.6,  # 0.6s - faster than most fillers, slower than most commands
+        min_interruption_words=2,        # Require 2 words minimum
+        
+        # Enable auto-resume for false positives
+        false_interruption_timeout=1.0,  # Wait 1s for transcript
+        resume_false_interruption=True,  # Auto-resume if false positive
+        
         preemptive_generation=False,
-        min_endpointing_delay=0.6,
-        max_endpointing_delay=3.0,
+        min_endpointing_delay=0.5,
+        max_endpointing_delay=2.5,
     )
-    kelly = MyAgent()
+    
+    kelly = IntelligentAgent()
+    
+    logger.info("=" * 80)
+    logger.info("🚀 HYBRID INTELLIGENT INTERRUPTION HANDLER")
+    logger.info("⚙️  Strategy:")
+    logger.info("   - Medium VAD thresholds (0.6s, 2 words)")
+    logger.info("   - Auto-resume on false interruptions")
+    logger.info("   - Manual interrupt on commands that slip through")
+    logger.info("   - Transcript suppression for fillers")
+    logger.info("=" * 80)
+    
+    # Track interruption state
+    vad_just_interrupted = False
+    
     @session.on("speech_created")
-    def on_speech_created(ev: SpeechCreatedEvent):
-        """Track when Kelly actually starts speaking audio"""
+    def on_speech_created(ev):
+        nonlocal vad_just_interrupted
         kelly.is_speaking = True
-        kelly.current_speech_handle = ev.speech_handle
+        kelly.was_interrupted_by_vad = False
+        vad_just_interrupted = False
+        
+        # Store what Kelly is saying for potential resume
+        if hasattr(ev, 'speech_handle') and hasattr(ev.speech_handle, 'text'):
+            kelly.last_speech_content = ev.speech_handle.text
+        
         logger.info("🎤 KELLY STARTED SPEAKING")
     
     @session.on("agent_state_changed")
-    def on_agent_state_changed(ev: AgentStateChangedEvent):
-        """Track agent state transitions"""
+    def on_agent_state_changed(ev):
+        nonlocal vad_just_interrupted
+        
         logger.info(f"🎭 AGENT STATE: {ev.old_state} → {ev.new_state}")
         
-        # Kelly is no longer speaking when she returns to listening
+        # Detect if Kelly was interrupted while speaking
+        if ev.old_state == "speaking" and ev.new_state == "listening":
+            if kelly.is_speaking:
+                kelly.was_interrupted_by_vad = True
+                vad_just_interrupted = True
+                logger.info("⚠️ KELLY INTERRUPTED - waiting for transcript to decide action...")
+        
         if ev.new_state == "listening":
             kelly.is_speaking = False
-            kelly.current_speech_handle = None
-            logger.info("👂 Kelly finished and is now listening")
     
     @session.on("user_state_changed")
-    def on_user_state_changed(ev: UserStateChangedEvent):
-        """Track user state for debugging"""
+    def on_user_state_changed(ev):
         logger.info(f"👤 USER STATE: {ev.old_state} → {ev.new_state}")
     
-    @session.on("agent_false_interruption")
-    def on_false_interruption(ev: AgentFalseInterruptionEvent):
-        """Log when false interruptions are detected and resumed"""
-        if ev.resumed:
-            logger.info("✅ FALSE INTERRUPTION: Kelly resumed seamlessly")
-        else:
-            logger.warning("⚠️ FALSE INTERRUPTION: Not auto-resumed")
-
+    # Try to register false interruption handler
+    try:
+        @session.on("agent_false_interruption")
+        def on_false_interruption(ev):
+            if hasattr(ev, 'resumed') and ev.resumed:
+                logger.info("✅ FALSE INTERRUPTION AUTO-RESUMED by LiveKit")
+    except:
+        logger.warning("⚠️ False interruption event not available in this LiveKit version")
     
     @session.on("user_input_transcribed")
-    def on_user_input_transcribed(ev: UserInputTranscribedEvent):
-        """
-        This handler provides a SECONDARY layer of defense.
-        It runs after STT completes, so it can't prevent the initial VAD interruption,
-        but it can prevent filler words from reaching the LLM and generating responses.
-        """
+    def on_user_input_transcribed(ev):
+        nonlocal vad_just_interrupted
+        
         if not ev.is_final or not ev.transcript:
             return
         
         clean_text = re.sub(r'[^\w\s]', '', ev.transcript.lower()).strip()
-        words = set(clean_text.split())
-        raw_phrase = clean_text.replace(" ", "")  # For "uhhuh" type phrases
         
-        if not words:
-            return
+        logger.info(f"📝 TRANSCRIPT: '{clean_text}' | Kelly speaking: {kelly.is_speaking} | Just interrupted: {vad_just_interrupted}")
         
-        logger.info(f"📝 TRANSCRIPT: '{clean_text}' | Kelly speaking: {kelly.is_speaking}")
+        # === CASE 1: Kelly was just interrupted by VAD ===
+        if kelly.was_interrupted_by_vad or vad_just_interrupted:
+            
+            if contains_command(clean_text):
+                logger.info(f"🛑 REAL COMMAND after VAD interrupt: '{clean_text}' - staying stopped")
+                kelly.was_interrupted_by_vad = False
+                vad_just_interrupted = False
+                # Allow normal processing - the interrupt was correct
+                return
+            
+            elif is_filler_input(clean_text):
+                logger.info(f"🔄 FALSE INTERRUPT: '{clean_text}' was just a filler - should resume")
+                kelly.was_interrupted_by_vad = False
+                vad_just_interrupted = False
+                
+                # LiveKit's resume_false_interruption should handle this automatically
+                # But we still suppress the transcript from reaching LLM
+                return
+            
+            else:
+                logger.info(f"✅ REAL INPUT after interrupt: '{clean_text}' - valid interruption")
+                kelly.was_interrupted_by_vad = False
+                vad_just_interrupted = False
+                # Allow normal processing
+                return
         
+        # === CASE 2: Kelly is currently speaking (VAD didn't interrupt yet) ===
+        if kelly.is_speaking:
+            
+            if contains_command(clean_text):
+                logger.info(f"🛑 COMMAND while speaking: '{clean_text}' - forcing interrupt NOW")
+                session.interrupt()
+                return
+            
+            elif is_filler_input(clean_text):
+                logger.info(f"🔇 FILLER while speaking: '{clean_text}' - completely ignored")
+                # Don't interrupt, don't pass to LLM
+                return
+            
+            else:
+                logger.info(f"💬 REAL INPUT while speaking: '{clean_text}' - allowing interrupt")
+                session.interrupt()
+                return
         
-        if words.intersection(STOP_WORDS):
-            logger.info(f"🛑 COMMAND DETECTED: '{clean_text}' → Calling interrupt()")
-            session.interrupt()
-            return
+        # === CASE 3: Kelly is idle ===
+        if not kelly.is_speaking:
+            
+            if is_filler_input(clean_text):
+                logger.info(f"🍃 FILLER while idle: '{clean_text}' - suppressed")
+                return
+            
+            logger.info(f"✅ VALID INPUT while idle: '{clean_text}'")
+            # Normal processing
     
-        if kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
-            logger.info(f"🔇 FILLER SUPPRESSED: '{clean_text}' ignored during speech")
-           
-            return
-        
-        if not kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
-            logger.info(f"🍃 LONE FILLER: '{clean_text}' suppressed (Kelly idle)")
-            return
-        
-        logger.info(f"✅ VALID INPUT: '{clean_text}' forwarded to LLM")
-       
     await session.start(agent=kelly, room=ctx.room)
 
 if __name__ == "__main__":

From 27a5bb93aece07ca422cce69befbcd80e13bcdef Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Mon, 2 Feb 2026 19:50:03 +0530
Subject: [PATCH 5/8] changed to stop commmand from command in logging info

---
 examples/voice_agents/basic_agent.py |   2 +-
 examples/voice_agents/test.py        | 172 ---------------------------
 2 files changed, 1 insertion(+), 173 deletions(-)
 delete mode 100644 examples/voice_agents/test.py

diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
index cd75b790d7..4eb6876cee 100644
--- a/examples/voice_agents/basic_agent.py
+++ b/examples/voice_agents/basic_agent.py
@@ -233,7 +233,7 @@ def on_user_input_transcribed(ev):
         if kelly.is_speaking:
             
             if contains_command(clean_text):
-                logger.info(f"🛑 COMMAND while speaking: '{clean_text}' - forcing interrupt NOW")
+                logger.info(f"🛑 STOP COMMAND while speaking: '{clean_text}' - forcing interrupt NOW")
                 session.interrupt()
                 return
             
diff --git a/examples/voice_agents/test.py b/examples/voice_agents/test.py
deleted file mode 100644
index 38e2906df7..0000000000
--- a/examples/voice_agents/test.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import logging
-import re
-from dotenv import load_dotenv
-from livekit.agents import (
-    Agent, AgentServer, AgentSession, JobContext, JobProcess,
-    cli, metrics, UserInputTranscribedEvent, AgentStateChangedEvent,
-    UserStateChangedEvent, SpeechCreatedEvent, AgentFalseInterruptionEvent
-)
-from livekit.plugins import silero, deepgram, openai, cartesia
-from livekit.plugins.turn_detector.multilingual import MultilingualModel
-
-logger = logging.getLogger("intelligent-kelly")
-logger.setLevel(logging.INFO)
-load_dotenv()
-
-# CONFIGURATION: Easily adjustable lists for modularity
-STOP_WORDS = {"wait", "no", "stop", "finish", "hold", "pause", "hold on"}
-FILLER_WORDS = {
-    "uhhuh", "okay", "alright", "mhm", "yeah", "yep", "yup", 
-    "hmm", "right", "uh", "um", "ah", "gotit", "isee", "ok", "k",
-    "sure", "yes", "interesting", "really", "wow", "ohh", "ooh",
-    "aha", "uhuh", "mhmm", "gotcha", "nice"
-}
-
-class MyAgent(Agent):
-    def __init__(self) -> None:
-        super().__init__(
-            instructions=(
-                "Your name is Kelly. Keep responses concise and witty. "
-                "If you hear acknowledgements like 'yeah' or 'ok' while explaining something, "
-                "that means the user is listening - keep going! "
-                "Only stop if they explicitly ask you to wait or stop."
-            ),
-        )
-        # More reliable state tracking
-        self.is_speaking = False
-        self.current_speech_handle = None
-        
-    async def on_enter(self):
-        await self.session.generate_reply()
-
-server = AgentServer()
-
-def prewarm(proc: JobProcess):
-    proc.userdata["vad"] = silero.VAD.load()
-
-server.setup_fnc = prewarm
-
-@server.rtc_session()
-async def entrypoint(ctx: JobContext):
-    # === CRITICAL: Configure false interruption handling ===
-    session = AgentSession(
-        stt="deepgram/nova-3",
-        llm="openai/gpt-4o-mini",
-        tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
-        vad=ctx.proc.userdata["vad"],
-        turn_detection=MultilingualModel(),
-        
-        # === PRIMARY DEFENSE: Built-in False Interruption Handling ===
-        # These parameters tell LiveKit to wait for real speech before committing to an interruption
-        allow_interruptions=True,
-        
-        # Require at least 0.8 seconds of detected speech before interrupting
-        # This filters out most short fillers like "yeah", "ok", "hmm"
-        min_interruption_duration=0.8,  
-        
-        # Require at least 2 words to be transcribed before confirming interruption
-        # Single filler words won't trigger interruption
-        min_interruption_words=2,
-        
-        # Wait up to 1 second for transcription after VAD detects speech
-        # If no real words are transcribed, treat it as a false interruption
-        false_interruption_timeout=1.0,
-        
-        # Automatically resume speaking if interruption was false positive
-        # This is THE KEY FEATURE for seamless continuation
-        resume_false_interruption=True,
-        
-        # Don't buffer audio while agent is speaking
-        discard_audio_if_uninterruptible=True,
-        
-        # Other important settings
-        preemptive_generation=False,
-        min_endpointing_delay=0.6,
-        max_endpointing_delay=3.0,
-    )
-    
-    kelly = MyAgent()
-    
-    # === STATE TRACKING ===
-    
-    @session.on("speech_created")
-    def on_speech_created(ev: SpeechCreatedEvent):
-        """Track when Kelly actually starts speaking audio"""
-        kelly.is_speaking = True
-        kelly.current_speech_handle = ev.speech_handle
-        logger.info("🎤 KELLY STARTED SPEAKING")
-    
-    @session.on("agent_state_changed")
-    def on_agent_state_changed(ev: AgentStateChangedEvent):
-        """Track agent state transitions"""
-        logger.info(f"🎭 AGENT STATE: {ev.old_state} → {ev.new_state}")
-        
-        # Kelly is no longer speaking when she returns to listening
-        if ev.new_state == "listening":
-            kelly.is_speaking = False
-            kelly.current_speech_handle = None
-            logger.info("👂 Kelly finished and is now listening")
-    
-    @session.on("user_state_changed")
-    def on_user_state_changed(ev: UserStateChangedEvent):
-        """Track user state for debugging"""
-        logger.info(f"👤 USER STATE: {ev.old_state} → {ev.new_state}")
-    
-    @session.on("agent_false_interruption")
-    def on_false_interruption(ev: AgentFalseInterruptionEvent):
-        """Log when false interruptions are detected and resumed"""
-        if ev.resumed:
-            logger.info("✅ FALSE INTERRUPTION: Kelly resumed seamlessly")
-        else:
-            logger.warning("⚠️ FALSE INTERRUPTION: Not auto-resumed")
-    
-    # === SECONDARY FILTER: Transcript-Level Suppression ===
-    
-    @session.on("user_input_transcribed")
-    def on_user_input_transcribed(ev: UserInputTranscribedEvent):
-        """
-        This handler provides a SECONDARY layer of defense.
-        It runs after STT completes, so it can't prevent the initial VAD interruption,
-        but it can prevent filler words from reaching the LLM and generating responses.
-        """
-        if not ev.is_final or not ev.transcript:
-            return
-        
-        # Normalize text: lowercase, remove punctuation
-        clean_text = re.sub(r'[^\w\s]', '', ev.transcript.lower()).strip()
-        words = set(clean_text.split())
-        raw_phrase = clean_text.replace(" ", "")  # For "uhhuh" type phrases
-        
-        if not words:
-            return
-        
-        logger.info(f"📝 TRANSCRIPT: '{clean_text}' | Kelly speaking: {kelly.is_speaking}")
-        
-        # === CASE 1: Command words ALWAYS cause interruption ===
-        if words.intersection(STOP_WORDS):
-            logger.info(f"🛑 COMMAND DETECTED: '{clean_text}' → Calling interrupt()")
-            session.interrupt()
-            return
-        
-        # === CASE 2: Pure filler words while Kelly is speaking ===
-        # These should be ignored - don't let them reach the LLM
-        if kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
-            logger.info(f"🔇 FILLER SUPPRESSED: '{clean_text}' ignored during speech")
-            # By returning here without calling session.interrupt() or forwarding to LLM,
-            # we ensure this transcript is effectively dropped
-            return
-        
-        # === CASE 3: Lone filler when Kelly is silent ===
-        # Still suppress these - they shouldn't trigger LLM processing
-        if not kelly.is_speaking and (raw_phrase in FILLER_WORDS or words.issubset(FILLER_WORDS)):
-            logger.info(f"🍃 LONE FILLER: '{clean_text}' suppressed (Kelly idle)")
-            return
-        
-        # === CASE 4: Real user input (commands, questions, statements) ===
-        logger.info(f"✅ VALID INPUT: '{clean_text}' forwarded to LLM")
-        # Let this proceed normally - it will reach the LLM
-    
-    await session.start(agent=kelly, room=ctx.room)
-
-if __name__ == "__main__":
-    cli.run_app(server)
\ No newline at end of file

From 5fc30e258cedd64ab1fafa00a3d3765e68e263ee Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Mon, 2 Feb 2026 20:32:58 +0530
Subject: [PATCH 6/8] Added readme for provided soln and demo transcript logs

---
 .../voice_agents/INTERREUPT_HANDLER_README.md | 243 ++++++++++++++++++
 examples/voice_agents/demo_logs.txt           | 157 +++++++++++
 2 files changed, 400 insertions(+)
 create mode 100644 examples/voice_agents/INTERREUPT_HANDLER_README.md
 create mode 100644 examples/voice_agents/demo_logs.txt

diff --git a/examples/voice_agents/INTERREUPT_HANDLER_README.md b/examples/voice_agents/INTERREUPT_HANDLER_README.md
new file mode 100644
index 0000000000..c06aa11c4c
--- /dev/null
+++ b/examples/voice_agents/INTERREUPT_HANDLER_README.md
@@ -0,0 +1,243 @@
+# LiveKit Intelligent Interruption Handling
+
+A voice agent that knows the difference between "yeah, I'm listening" and "wait, stop talking."
+
+## Table of Contents
+
+- [Problem](#problem)
+- [Solution](#solution)
+- [Installation](#installation)
+- [Configuration](#configuration)
+- [Usage](#usage)
+- [How It Works](#how-it-works)
+- [Demo](#demo)
+- [Customization](#customization)
+- [Troubleshooting](#troubleshooting)
+
+---
+
+## Problem
+
+LiveKit's default Voice Activity Detection treats every sound as an interruption. When you say "yeah" or "okay" to show you're listening (backchanneling), the agent thinks you want to interrupt and stops talking mid-sentence.
+
+The goal was to fix this: make the agent smart enough to ignore casual acknowledgments while still responding immediately to actual commands like "wait" or "stop."
+
+The catch: the agent can't pause or stutter when it hears a filler word. It needs to keep talking smoothly, like a human would.
+
+---
+
+## Solution
+
+The solution uses three layers:
+
+**1. VAD Threshold Tuning**  
+Set the interruption thresholds higher (0.6 seconds, 2 words minimum). This filters out most quick "yeah" or "okay" responses before they even trigger an interruption.
+
+**2. Auto-Resume**  
+If VAD does interrupt on a filler word, LiveKit's `resume_false_interruption` feature kicks in and the agent continues talking without missing a beat.
+
+**3. Transcript Analysis**  
+When speech is detected, we wait for the actual transcription to come through. Then we check: is this a filler word or a real command? If it's a filler, suppress it. If it's a command like "stop", force an interruption even if VAD missed it.
+
+### Why This Works
+
+There's a timing problem: VAD detects sound almost instantly (~10ms), but speech-to-text takes longer (~200-500ms). By the time we know what was said, VAD might have already interrupted.
+
+The solution handles this by:
+- Fast fillers (under 0.6s) never trigger VAD in the first place
+- Slow fillers (over 0.6s) trigger VAD but get auto-resumed when we realize they're fillers
+- Commands work at any speed because we manually interrupt when we detect them in the transcript
+
+---
+
+## Installation
+
+### Requirements
+
+- Python 3.9+
+- LiveKit account (free tier works fine)
+
+### Install Dependencies
+
+```bash
+pip install "livekit-agents[openai,silero,deepgram,cartesia,turn-detector]~=1.0"
+```
+
+---
+
+## Configuration
+
+Create a `.env` file in the project root (copy from `.env.example`):
+
+```bash
+# Required - Get these from LiveKit Cloud
+LIVEKIT_API_SECRET="your-api-secret-here"
+LIVEKIT_API_KEY="your-api-key-here"
+LIVEKIT_URL="wss://your-project.livekit.cloud"
+
+# Optional - For model providers
+OPENAI_API_KEY="your-openai-key"
+DEEPGRAM_API_KEY="your-deepgram-key"
+CARTESIA_API_KEY="your-cartesia-key"
+```
+
+To get LiveKit credentials:
+1. Go to [cloud.livekit.io](https://cloud.livekit.io)
+2. Create a project
+3. Copy the API key, secret, and WebSocket URL from Settings
+
+
+
+---
+
+## Usage
+
+Run the agent locally:
+
+```bash
+python basic_agent.py dev
+```
+
+The agent starts in console mode. Just start talking - it'll respond.
+
+---
+
+## How It Works
+
+### The Flow
+
+```
+User says "yeah" while agent talks
+    ↓
+VAD detects speech in ~10ms
+    ↓
+Is it under 0.6 seconds? → Agent keeps talking
+Is it over 0.6 seconds? → VAD interrupts
+    ↓
+STT finishes transcribing (~200-500ms later)
+    ↓
+Our code checks the transcript:
+    - Filler word? → Resume immediately
+    - Command word? → Stay stopped
+    ↓
+Result: No awkward pauses
+```
+
+### State-Based Logic
+
+The same word behaves differently based on context:
+
+| What user says | Agent state | What happens |
+|----------------|-------------|--------------|
+| "yeah" / "okay" / "hmm" | Speaking | Ignored completely |
+| "wait" / "stop" | Speaking | Stops immediately |
+| "yeah" / "okay" | Silent | Suppressed (doesn't trigger response) |
+| "tell me more" | Silent | Normal response |
+
+### Filler Detection
+
+We check if something is a filler in two ways:
+
+1. **No-space check**: "uh-huh" becomes "uhhuh" and matches our list
+2. **All-words check**: "all right" splits to ["all", "right"], both are in our list, so it's a filler
+
+This handles things like "got it", "i see", "all right" automatically.
+
+### Command Detection
+
+For commands, we look for:
+- Stop word at the start: "stop", "wait please"
+- Filler + stop word: "yeah wait", "okay stop"  
+- Conjunction + stop word: "but wait", "and hold on"
+
+We skip long sentences (over 3 words) to avoid false positives like "I have no idea".
+
+---
+
+## Demo
+
+**[https://drive.google.com/file/d/1IHqKgqxAG2ZHRwWsDxhksyp3En5qv5iM/view?usp=sharing]**
+
+The demo shows:
+- Agent continuing to talk when hearing "yeah", "okay", "uh-huh"
+- Agent stopping immediately when hearing "stop" or "wait"
+- Handling mixed inputs like "yeah but wait"
+
+See `demo_logs.txt` for the full transcript from test runs.
+
+---
+
+## Customization
+
+### Adding More Filler Words
+
+Edit `FILLER_WORDS` in `basic_agent.py`:
+
+```python
+FILLER_WORDS = {
+    "yeah", "yep", "ok", "hmm", "uh", "um",
+    # Add your own here
+    "absolutely", "indeed", "totally"
+}
+```
+
+### Adding More Command Words
+
+Edit `STOP_WORDS`:
+
+```python
+STOP_WORDS = {
+    "wait", "stop", "hold", "pause",
+    # Add your own
+    "interrupt", "enough"
+}
+```
+
+### Tuning Sensitivity
+
+In the `AgentSession` configuration:
+
+```python
+min_interruption_duration=0.6,  # Lower = more responsive, higher = fewer interruptions
+min_interruption_words=2,        # Minimum words before triggering interrupt
+```
+
+Tuning guide:
+- Want fewer interruptions? Set duration to 0.8-1.0 seconds
+- Want faster response to commands? Set duration to 0.4-0.5 seconds
+- Want stricter filtering? Increase min_interruption_words to 3
+
+---
+
+## Troubleshooting
+
+### Agent doesn't stop for commands
+
+Check the logs for `🛑 STOP COMMAND` messages. If you don't see them:
+- Make sure the word is in `STOP_WORDS`
+- Try saying shorter commands (under 3 words)
+- Verify `session.interrupt()` is being called
+
+### Agent pauses briefly on fillers
+
+This means the auto-resume isn't working:
+- Increase `min_interruption_duration` to 0.8
+- Check that `resume_false_interruption=True` in the session config
+- Look for "FALSE INTERRUPTION AUTO-RESUMED" in logs
+
+### Fillers still triggering responses
+
+Check what STT is actually transcribing (look at the `📝 TRANSCRIPT` logs). Sometimes "yeah" comes through as "yea" or other variations. Add those to `FILLER_WORDS`.
+
+
+---
+
+## Additional Resources
+
+- [LiveKit Agents Docs](https://docs.livekit.io/agents/)
+- [Assignment Repo](https://github.com/Dark-Sys-Jenkins/agents-assignment)
+
+---
+
+**Last Updated**: February 2026  
+**LiveKit Version**: 1.3.12
\ No newline at end of file
diff --git a/examples/voice_agents/demo_logs.txt b/examples/voice_agents/demo_logs.txt
new file mode 100644
index 0000000000..22fb3c02fc
--- /dev/null
+++ b/examples/voice_agents/demo_logs.txt
@@ -0,0 +1,157 @@
+PS C:\Users\riddh\Desktop\agents-assignment> cd examples/voice_agents     
+PS C:\Users\riddh\Desktop\agents-assignment\examples\voice_agents> python basic_agent.py console
+    Agents   Starting console mode 🚀
+
+    19:53:10.327 DEBUG  asyncio            Using proactor: IocpProactor  
+    19:53:10.333 INFO   livekit.agents     starting worker {"version": "1.3.12", "rtc-version": "1.0.23"}
+    19:53:10.335 INFO   livekit.agents     starting inference executor  
+    19:53:10.383 INFO   livekit.agents     initializing process {"pid": 24660, "inference": true}
+    19:53:30.649 INFO   livekit.agents     process initialized {"pid": 24660, "inference": true, "elapsed_time": 20.26}
+    19:53:30.655 INFO   livekit.agents     HTTP server listening on :57067  
+    19:53:30.692 INFO   livekit.agents     initializing job runner {"tid": 13084}
+    19:53:30.942 DEBUG  asyncio            Using proactor: IocpProactor  
+    19:53:30.945 INFO   livekit.agents     job runner initialized {"tid": 13084, "elapsed_time": 0.25}
+    19:53:31.974 INFO   intelligent-kelly  ================================================================================  
+    19:53:31.977 INFO   intelligent-kelly  🚀 HYBRID INTELLIGENT INTERRUPTION HANDLER  
+    19:53:31.979 INFO   intelligent-kelly  ⚙️  Strategy:  
+    19:53:31.981 INFO   intelligent-kelly     - Medium VAD thresholds (0.6s, 2 words)  
+    19:53:31.992 INFO   intelligent-kelly     - Auto-resume on false interruptions  
+    19:53:31.999 INFO   intelligent-kelly     - Manual interrupt on commands that slip through  
+    19:53:32.010 INFO   intelligent-kelly     - Transcript suppression for fillers  
+    19:53:32.025 INFO   intelligent-kelly  ================================================================================  
+    19:53:32.064 DEBUG  livekit.agents     http_session(): creating a new httpclient ctx  
+    19:53:32.220 INFO   intelligent-kelly  🎤 KELLY STARTED SPEAKING  
+    19:53:32.677 INFO   intelligent-kelly  🎭 AGENT STATE: initializing → listening  
+    19:53:32.682 DEBUG  livekit.agents     using audio io: `Console` -> `AgentSession` -> `TranscriptSynchronizer` -> `Console`  
+    19:53:32.687 DEBUG  livekit.agents     using transcript io: `AgentSession` -> `TranscriptSynchronizer`  
+    19:53:32.723 INFO   intelligent-kelly  🎭 AGENT STATE: listening → thinking  
+    19:53:35.010 INFO   intelligent-kelly  🎭 AGENT STATE: thinking → speaking  
+    19:53:39.969 INFO   intelligent-kelly  🎭 AGENT STATE: speaking → listening  
+    19:53:41.662 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:53:44.565 INFO   intelligent-kelly  📝 TRANSCRIPT: 'tell me about retrieval augmented generation' | Kelly speaking: False | Just interrupted: False  
+    19:53:44.570 INFO   intelligent-kelly  ✅ VALID INPUT while idle: 'tell me about retrieval augmented generation'  
+    19:53:44.574 DEBUG  livekit.agents     received user transcript {"user_transcript": "Tell me about retrieval augmented generation.", "language": "en", "transcript_delay": 0.5072145462036133}
+    19:53:44.662 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:53:44.786 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.2723883092403412, "duration": 0.111, "input": "<|im_start|>assistant\ngot it ready to sprinkle some wit on your queries whats on your 
+mind<|im_end|>\n<|im_start|>user\ntell me about retrieval augmented generation"}
+    19:53:44.794 INFO   intelligent-kelly  🎤 KELLY STARTED SPEAKING  
+    19:53:44.809 INFO   intelligent-kelly  🎭 AGENT STATE: listening → thinking  
+    19:53:46.667 INFO   intelligent-kelly  🎭 AGENT STATE: thinking → speaking  
+    19:53:52.478 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:53:53.334 INFO   intelligent-kelly  📝 TRANSCRIPT: 'okay' | Kelly speaking: True | Just interrupted: False  
+    19:53:53.338 INFO   intelligent-kelly  🔇 FILLER while speaking: 'okay' - completely ignored  
+    19:53:53.342 DEBUG  livekit.agents     received user transcript {"user_transcript": "Okay.", "language": "en", "transcript_delay": 0.4681355953216553}
+    19:53:53.463 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:53:53.568 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.14005683362483978, "duration": 0.089, "input": "<|im_start|>assistant\ngot it ready to sprinkle some wit on your queries whats on your         
+mind<|im_end|>\n<|im_start|>user\ntell me about retrieval augmented generation okay"}
+    19:53:59.666 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:54:00.242 INFO   intelligent-kelly  📝 TRANSCRIPT: 'okay' | Kelly speaking: True | Just interrupted: False  
+    19:54:00.247 INFO   intelligent-kelly  🔇 FILLER while speaking: 'okay' - completely ignored  
+    19:54:00.252 DEBUG  livekit.agents     received user transcript {"user_transcript": "Okay.", "language": "en", "transcript_delay": 0.08392596244812012}
+    19:54:00.291 INFO   intelligent-kelly  🎭 AGENT STATE: speaking → listening  
+    19:54:00.301 INFO   intelligent-kelly  ⚠️ KELLY INTERRUPTED - waiting for transcript to decide action...  
+    19:54:01.170 INFO   intelligent-kelly  📝 TRANSCRIPT: 'more' | Kelly speaking: False | Just interrupted: True  
+    19:54:01.174 INFO   intelligent-kelly  ✅ REAL INPUT after interrupt: 'more' - valid interruption  
+    19:54:01.179 DEBUG  livekit.agents     received user transcript {"user_transcript": "More.", "language": "en", "transcript_delay": 0.5154080390930176}                                                      
+    19:54:01.282 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:54:01.522 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.04437876120209694, "duration": 0.228, "input": "<|im_start|>assistant\ngot it ready to sprinkle some wit on your queries whats on your         
+mind<|im_end|>\n<|im_start|>user\ntell me about retrieval augmented generation<|im_end|>\n<|im_start|>assistant\nretrieval augmented generation rag is like having your cake and eating it toocombining the best
+of retrieval systems with generative models it pulls relevant information from a database or documents to enhance response quality generating answers<|im_end|>\n<|im_start|>user\nokay okay more"}
+    19:54:01.529 INFO   intelligent-kelly  🎤 KELLY STARTED SPEAKING                                                                                                                                            
+    19:54:01.541 INFO   intelligent-kelly  🎭 AGENT STATE: listening → thinking  
+    19:54:02.441 INFO   intelligent-kelly  🎭 AGENT STATE: thinking → speaking  
+    19:54:13.964 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:54:14.665 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:54:15.043 INFO   intelligent-kelly  📝 TRANSCRIPT: 'stop' | Kelly speaking: True | Just interrupted: False  
+    19:54:15.047 INFO   intelligent-kelly  🛑 STOP COMMAND while speaking: 'stop' - forcing interrupt NOW  
+    19:54:15.054 DEBUG  livekit.agents     received user transcript {"user_transcript": "Stop.", "language": "en", "transcript_delay": 0.8940985202789307}
+    19:54:15.070 INFO   intelligent-kelly  🎭 AGENT STATE: speaking → listening  
+    19:54:15.076 INFO   intelligent-kelly  ⚠️ KELLY INTERRUPTED - waiting for transcript to decide action...  
+    19:54:15.318 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.03504103049635887, "duration": 0.252, "input": "<|im_start|>assistant\ngot it ready to sprinkle some wit on your queries whats on your         
+mind<|im_end|>\n<|im_start|>user\ntell me about retrieval augmented generation<|im_end|>\n<|im_start|>assistant\nretrieval augmented generation rag is like having your cake and eating it toocombining the best
+of retrieval systems with generative models it pulls relevant information from a database or documents to enhance response quality generating answers<|im_end|>\n<|im_start|>user\nokay okay more stop"}        
+    19:54:15.326 INFO   intelligent-kelly  🎤 KELLY STARTED SPEAKING                                                                                                                                            
+    19:54:15.339 INFO   intelligent-kelly  🎭 AGENT STATE: listening → thinking  
+    19:54:16.760 INFO   intelligent-kelly  🎭 AGENT STATE: thinking → speaking  
+    19:54:16.940 DEBUG  livekit.agents     flush audio emitter due to slow audio generation  
+    19:54:17.206 DEBUG  livekit.agents     flush audio emitter due to slow audio generation  
+    19:54:19.765 INFO   intelligent-kelly  🎭 AGENT STATE: speaking → listening  
+    19:54:19.770 INFO   intelligent-kelly  ⚠️ KELLY INTERRUPTED - waiting for transcript to decide action...  
+    19:54:20.959 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:54:24.253 INFO   intelligent-kelly  📝 TRANSCRIPT: 'tell me about graph retrieval augmented generation' | Kelly speaking: False | Just interrupted: True  
+    19:54:24.258 INFO   intelligent-kelly  ✅ REAL INPUT after interrupt: 'tell me about graph retrieval augmented generation' - valid interruption  
+    19:54:24.262 DEBUG  livekit.agents     received user transcript {"user_transcript": "Tell me about graph retrieval augmented generation.", "language": "en", "transcript_delay": 0.3029599189758301}        
+    19:54:24.556 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:54:24.912 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.24526239931583405, "duration": 0.341, "input": "<|im_start|>assistant\nretrieval augmented generation rag is like having your cake and eating  
+it toocombining the best of retrieval systems with generative models it pulls relevant information from a database or documents to enhance response quality generating answers<|im_end|>\n<|im_start|>user\nokay
+okay more<|im_end|>\n<|im_start|>assistant\nin rag the model first retrieves relevant snippets based on the input query then it uses those snippets to generate a more informed contextual response think of it 
+as a smart assistant that checks its notes before giving you an answerno more vague<|im_end|>\n<|im_start|>user\nstop<|im_end|>\n<|im_start|>assistant\nalright just let me know when you want to dive back     
+in<|im_end|>\n<|im_start|>user\ntell me about graph retrieval augmented generation"}
+    19:54:24.921 INFO   intelligent-kelly  🎤 KELLY STARTED SPEAKING                                                                                                                                            
+    19:54:24.934 INFO   intelligent-kelly  🎭 AGENT STATE: listening → thinking  
+    19:54:25.850 INFO   intelligent-kelly  🎭 AGENT STATE: thinking → speaking  
+    19:54:32.261 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:54:33.063 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:54:33.122 INFO   intelligent-kelly  📝 TRANSCRIPT: 'uhhuh' | Kelly speaking: True | Just interrupted: False  
+    19:54:33.128 INFO   intelligent-kelly  🔇 FILLER while speaking: 'uhhuh' - completely ignored  
+    19:54:33.132 DEBUG  livekit.agents     received user transcript {"user_transcript": "Uh-huh.", "language": "en", "transcript_delay": 0.661771297454834}
+    19:54:33.473 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.46071529388427734, "duration": 0.331, "input": "<|im_start|>user\nokay okay more<|im_end|>\n<|im_start|>assistant\nin rag the model first      
+retrieves relevant snippets based on the input query then it uses those snippets to generate a more informed contextual response think of it as a smart assistant that checks its notes before giving you an    
+answerno more vague<|im_end|>\n<|im_start|>user\nstop<|im_end|>\n<|im_start|>assistant\nalright just let me know when you want to dive back in<|im_end|>\n<|im_start|>user\ntell me about graph retrieval       
+augmented generation uh-huh"}
+    19:54:39.264 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:54:39.770 INFO   intelligent-kelly  🎭 AGENT STATE: speaking → listening  
+    19:54:39.775 INFO   intelligent-kelly  ⚠️ KELLY INTERRUPTED - waiting for transcript to decide action...  
+    19:54:40.260 INFO   intelligent-kelly  📝 TRANSCRIPT: 'yeah' | Kelly speaking: False | Just interrupted: True  
+    19:54:40.264 INFO   intelligent-kelly  🔄 FALSE INTERRUPT: 'yeah' was just a filler - should resume  
+    19:54:40.268 DEBUG  livekit.agents     received user transcript {"user_transcript": "Yeah,", "language": "en", "transcript_delay": 0.38580894470214844}
+    19:54:40.466 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:54:40.792 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.023591401055455208, "duration": 0.314, "input": "<|im_start|>assistant\nin rag the model first retrieves relevant snippets based on the input  
+query then it uses those snippets to generate a more informed contextual response think of it as a smart assistant that checks its notes before giving you an answerno more 
+vague<|im_end|>\n<|im_start|>user\nstop<|im_end|>\n<|im_start|>assistant\nalright just let me know when you want to dive back in<|im_end|>\n<|im_start|>user\ntell me about graph retrieval augmented 
+generation<|im_end|>\n<|im_start|>assistant\ngraph retrieval augmented generation graph rag takes the rag concept and gives it a social network makeover it involves using graph structureswhere data points are
+nodes linked by relationshipsallowing for more nuanced context retrieval this means the model can understand complex relationships<|im_end|>\n<|im_start|>user\nuh-huh yeah"}
+    19:54:40.801 INFO   intelligent-kelly  🎤 KELLY STARTED SPEAKING  
+    19:54:40.817 INFO   intelligent-kelly  🎭 AGENT STATE: listening → thinking  
+    19:54:41.808 INFO   intelligent-kelly  🎭 AGENT STATE: thinking → speaking  
+    19:54:48.169 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    19:54:49.045 INFO   intelligent-kelly  📝 TRANSCRIPT: 'wait' | Kelly speaking: True | Just interrupted: False  
+    19:54:49.048 INFO   intelligent-kelly  🛑 STOP COMMAND while speaking: 'wait' - forcing interrupt NOW  
+    19:54:49.056 DEBUG  livekit.agents     received user transcript {"user_transcript": "Wait.", "language": "en", "transcript_delay": 0.5885117053985596}
+    19:54:49.071 INFO   intelligent-kelly  🎭 AGENT STATE: speaking → listening  
+    19:54:49.080 INFO   intelligent-kelly  ⚠️ KELLY INTERRUPTED - waiting for transcript to decide action...  
+    19:54:49.089 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    19:54:49.423 DEBUG  livekit.…_detector eou prediction  
+                                           {"eou_probability": 0.042842086404561996, "duration": 0.323, "input": "<|im_start|>assistant\nalright just let me know when you want to dive back 
+in<|im_end|>\n<|im_start|>user\ntell me about graph retrieval augmented generation<|im_end|>\n<|im_start|>assistant\ngraph retrieval augmented generation graph rag takes the rag concept and gives it a social 
+network makeover it involves using graph structureswhere data points are nodes linked by relationshipsallowing for more nuanced context retrieval this means the model can understand complex 
+relationships<|im_end|>\n<|im_start|>user\nuh-huh yeah<|im_end|>\n<|im_start|>assistant\nwith graph rag the system can pull not just relevant info but also grasp context from interconnected data its like     
+having a super-smart friend who<|im_end|>\n<|im_start|>user\nwait"}
+    19:54:49.431 INFO   intelligent-kelly  🎤 KELLY STARTED SPEAKING                                                                                                                                            
+    19:54:49.448 INFO   intelligent-kelly  🎭 AGENT STATE: listening → thinking  
+    19:54:50.287 INFO   intelligent-kelly  🎭 AGENT STATE: thinking → speaking  
+    19:54:53.471 INFO   intelligent-kelly  🎭 AGENT STATE: speaking → listening  
+    19:54:53.475 INFO   intelligent-kelly  ⚠️ KELLY INTERRUPTED - waiting for transcript to decide action...  
+    19:55:08.472 INFO   intelligent-kelly  👤 USER STATE: listening → away  
+    20:00:52.988 INFO   intelligent-kelly  👤 USER STATE: away → speaking  
+    20:00:56.464 INFO   intelligent-kelly  📝 TRANSCRIPT: 'hello so videos we do' | Kelly speaking: False | Just interrupted: True  
+    20:00:56.469 INFO   intelligent-kelly  ✅ REAL INPUT after interrupt: 'hello so videos we do' - valid interruption  
+    20:00:56.473 DEBUG  livekit.agents     received user transcript {"user_transcript": "Hello? So videos we do.", "language": "en", "transcript_delay": 0.3855757713317871}
+    20:00:56.599 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+    20:00:56.693 INFO   intelligent-kelly  👤 USER STATE: listening → speaking  
+    20:01:02.763 INFO   intelligent-kelly  📝 TRANSCRIPT: 'one minute fifty two seconds' | Kelly speaking: False | Just interrupted: False  
+    20:01:02.769 INFO   intelligent-kelly  ✅ VALID INPUT while idle: 'one minute fifty two seconds'  
+    20:01:02.773 DEBUG  livekit.agents     received user transcript {"user_transcript": "One minute fifty two seconds", "language": "en", "transcript_delay": 0.08300256729125977}                              
+    20:01:05.375 INFO   intelligent-kelly  📝 TRANSCRIPT: 'video' | Kelly speaking: False | Just interrupted: False  
+    20:01:05.380 INFO   intelligent-kelly  ✅ VALID INPUT while idle: 'video'  
+    20:01:05.385 DEBUG  livekit.agents     received user transcript {"user_transcript": "video", "language": "en", "transcript_delay": 0.08710217475891113}
+    20:01:08.090 INFO   intelligent-kelly  👤 USER STATE: speaking → listening  
+   
+   Headset (AirBass Earbuds)   ▁ ▁ ▁ ▁ ▁ ▁ ▁ ▁ ▁ ▁ ▁ ▁ ▁ ▁
\ No newline at end of file

From 23451f6ecce576d58ac4b10f2e5ff5152923883e Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Mon, 2 Feb 2026 20:35:00 +0530
Subject: [PATCH 7/8] Refined read me

---
 examples/voice_agents/INTERREUPT_HANDLER_README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/examples/voice_agents/INTERREUPT_HANDLER_README.md b/examples/voice_agents/INTERREUPT_HANDLER_README.md
index c06aa11c4c..c2b729feff 100644
--- a/examples/voice_agents/INTERREUPT_HANDLER_README.md
+++ b/examples/voice_agents/INTERREUPT_HANDLER_README.md
@@ -75,10 +75,6 @@ LIVEKIT_API_SECRET="your-api-secret-here"
 LIVEKIT_API_KEY="your-api-key-here"
 LIVEKIT_URL="wss://your-project.livekit.cloud"
 
-# Optional - For model providers
-OPENAI_API_KEY="your-openai-key"
-DEEPGRAM_API_KEY="your-deepgram-key"
-CARTESIA_API_KEY="your-cartesia-key"
 ```
 
 To get LiveKit credentials:

From 44cbfb84724074d488ad42358a9f4a466f75a6a7 Mon Sep 17 00:00:00 2001
From: Riddhika Arora <riddhika05@users.noreply.github.com>
Date: Mon, 2 Feb 2026 20:38:19 +0530
Subject: [PATCH 8/8] Added video link to read me

---
 examples/voice_agents/INTERREUPT_HANDLER_README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/voice_agents/INTERREUPT_HANDLER_README.md b/examples/voice_agents/INTERREUPT_HANDLER_README.md
index c2b729feff..f4856acf6e 100644
--- a/examples/voice_agents/INTERREUPT_HANDLER_README.md
+++ b/examples/voice_agents/INTERREUPT_HANDLER_README.md
@@ -152,7 +152,7 @@ We skip long sentences (over 3 words) to avoid false positives like "I have no i
 
 ## Demo
 
-**[https://drive.google.com/file/d/1IHqKgqxAG2ZHRwWsDxhksyp3En5qv5iM/view?usp=sharing]**
+**[https://drive.google.com/file/d/1IHqKgqxAG2ZHRwWsDxhksyp3En5qv5iM/view?usp=drive_link]**
 
 The demo shows:
 - Agent continuing to talk when hearing "yeah", "okay", "uh-huh"