diff --git a/julia-agent/julia-ai/src/agent.py b/julia-agent/julia-ai/src/agent.py
index 9bf7c53..11ae0b4 100644
--- a/julia-agent/julia-ai/src/agent.py
+++ b/julia-agent/julia-ai/src/agent.py
@@ -312,13 +312,30 @@ class WellNuoLLMStream(llm.LLMStream):
 
 def prewarm(proc: JobProcess):
     """Preload VAD model for faster startup."""
-    # Increase min_silence_duration to prevent cutting off user speech during barge-in
-    # Default is 0.55s which is too short - user pauses between words get interpreted as end of speech
-    # 0.9s gives user more time to continue speaking without being cut off
+    # CRITICAL FIX: Prevent premature speech cutoff
+    #
+    # The VAD (Voice Activity Detection) determines when the user has finished speaking.
+    # Default settings are too aggressive and cut off speech during natural pauses.
+    #
+    # Key parameters:
+    # - min_silence_duration: How long to wait after silence before ending speech
+    #   Default 0.55s is WAY too short - people pause between sentences/thoughts
+    #   1.8s allows natural conversation flow without being cut off
+    #
+    # - min_speech_duration: Minimum speech length to be considered valid
+    #   Keeping it low (0.1s) allows short responses but filters noise
+    #
+    # - activation_threshold: Voice detection sensitivity (0-1)
+    #   Lower = more sensitive to quiet speech, but may pick up background noise
+    #   0.35 is a good balance for typical indoor environments
+    #
+    # - padding_duration: Audio padding around detected speech (default: 0.3)
+    #   Increased to 0.5s to capture soft word endings
     proc.userdata["vad"] = silero.VAD.load(
-        min_silence_duration=0.9,  # Wait 0.9s of silence before ending speech (default: 0.55)
-        min_speech_duration=0.05,  # Keep low for quick interruption detection (default: 0.05)
-        activation_threshold=0.4,  # Slightly lower for better sensitivity (default: 0.5)
+        min_silence_duration=1.8,     # Wait 1.8s of silence before ending speech (was 0.9s)
+        min_speech_duration=0.1,      # Minimum valid speech duration (was 0.05s)
+        activation_threshold=0.35,    # Slightly more sensitive for quiet speakers (was 0.4)
+        padding_duration=0.5,         # Extra audio padding around speech (default: 0.3)
     )
 
 
@@ -408,13 +425,22 @@ async def entrypoint(ctx: JobContext):
 
     # Deepgram for STT - better accuracy and faster than AssemblyAI
     # AssemblyAI was giving garbage like "shambhala balashambal" instead of actual speech
+    #
+    # CRITICAL FIX: Endpointing settings prevent premature speech cutoff
+    # - endpointing: Time in ms of silence before finalizing transcript
+    #   Default is ~500ms which is too aggressive for natural speech
+    #   1500ms (1.5s) allows for thinking pauses without cutting off
+    # - utterance_end_ms: Additional buffer for utterance detection
+    #   2000ms gives extra time for slow speakers or complex sentences
     session = AgentSession(
         # Deepgram Nova-2 model for best STT accuracy
         stt=deepgram.STT(
             model="nova-2-general",
             language="en-US",
-            smart_format=True,  # Better punctuation and formatting
-            no_delay=True,      # Faster response for real-time
+            smart_format=True,       # Better punctuation and formatting
+            no_delay=True,           # Faster response for real-time
+            endpointing=1500,        # Wait 1.5s of silence before finalizing (default: ~500ms)
+            utterance_end_ms=2000,   # Extra 2s buffer for utterance end detection
         ),
         # WellNuo voice_ask API for LLM with dynamic beneficiary data
         llm=WellNuoLLM(