google-gemini · Kenxpx · Mar 21, 2026 · gemini-code-assist · Mar 21, 2026
diff --git a/quickstarts/Get_started_LiveAPI.py b/quickstarts/Get_started_LiveAPI.py
@@ -87,18 +87,27 @@
 # Live session configuration
 # Trigger tokens sent so that model does not hallucinate in long conversations
 # Sliding window to retain the context within the context window limit
-CONFIG = types.LiveConnectConfig(
-    response_modalities=["AUDIO"],
-    speech_config=types.SpeechConfig(
-        voice_config=types.VoiceConfig(
-            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name = "Zephyr")
-        )
-    ),
-    context_window_compression=types.ContextWindowCompressionConfig(
-        trigger_tokens = 25600,
-        sliding_window = types.SlidingWindow(target_tokens=12800),
-    ),
-)
+CONFIG = types.LiveConnectConfig(
+    response_modalities=["AUDIO"],
+    speech_config=types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Zephyr")
+        )
+    ),
+    realtime_input_config=types.RealtimeInputConfig(
+        activity_handling=types.ActivityHandling.NO_INTERRUPTION,
+        automatic_activity_detection=types.AutomaticActivityDetection(
+            start_of_speech_sensitivity=types.StartSensitivity.START_SENSITIVITY_HIGH,
+            end_of_speech_sensitivity=types.EndSensitivity.END_SENSITIVITY_HIGH,
+            silence_duration_ms=800,
+            prefix_padding_ms=300,
+        ),
+    ),
+    context_window_compression=types.ContextWindowCompressionConfig(
+        trigger_tokens=25600,
+        sliding_window=types.SlidingWindow(target_tokens=12800),
+    ),
+)
 
 pya = pyaudio.PyAudio()
 
@@ -235,16 +244,17 @@ async def capture_frames(self):
         finally:
             cap.release()
 
-    def _capture_screen(self):
-        sct = mss.mss()
-        monitor = sct.monitors[0]
-
-        i = sct.grab(monitor)
-
-        img = PIL.Image.frombytes("RGB", i.size, i.rgb)
-
-        image_io = io.BytesIO()
-        img.save(image_io, format="jpeg")
+    def _capture_screen(self):
+        sct = mss.mss()
+        monitor = sct.monitors[0]
+
+        i = sct.grab(monitor)
+
+        img = PIL.Image.frombytes("RGB", i.size, i.rgb)
+        img.thumbnail([1024, 1024])
+
+        image_io = io.BytesIO()
+        img.save(image_io, format="jpeg")
         image_io.seek(0)
 
         mime_type = "image/jpeg"
@@ -282,16 +292,16 @@ async def send_text(self):
         except asyncio.CancelledError:
             pass
 
-    async def send_realtime(self):
-        try:
-            while True:
-                msg = await self.out_queue.get()
-                if msg["mime_type"].startswith("audio/"):
-                    await self.session.send_realtime_input(audio=msg)
-                else:
-                    await self.session.send_realtime_input(media=msg)
-        except asyncio.CancelledError:
-            pass
+    async def send_realtime(self):
+        try:
+            while True:
+                msg = await self.out_queue.get()
+                if msg["mime_type"].startswith("audio/"):
+                    await self.session.send_realtime_input(audio=msg)
+                else:
+                    await self.session.send_realtime_input(video=msg)
+        except asyncio.CancelledError:
+            pass
 
     async def run(self):
         """Run all tasks to handle audio/video/text interaction"""