diff --git a/apps/mesh/src/api/app.ts b/apps/mesh/src/api/app.ts
index f394d6ce56..c38c3387db 100644
--- a/apps/mesh/src/api/app.ts
+++ b/apps/mesh/src/api/app.ts
@@ -36,6 +36,7 @@ import oauthProxyRoutes, {
 import openaiCompatRoutes from "./routes/openai-compat";
 import proxyRoutes from "./routes/proxy";
 import publicConfigRoutes from "./routes/public-config";
+import transcribeRoutes from "./routes/transcribe";
 import selfRoutes from "./routes/self";
 import { shouldSkipMeshContext, SYSTEM_PATHS } from "./utils/paths";
 
@@ -588,6 +589,9 @@ export function createApp(options: CreateAppOptions = {}) {
   // OpenAI-compatible LLM API routes
   app.route("/api", openaiCompatRoutes);
 
+  // Audio transcription routes
+  app.route("/api", transcribeRoutes);
+
   // Public Events endpoint
   app.post("/org/:organizationId/events/:type", async (c) => {
     const orgId = c.req.param("organizationId");
diff --git a/apps/mesh/src/api/routes/transcribe.ts b/apps/mesh/src/api/routes/transcribe.ts
new file mode 100644
index 0000000000..f1dc004bf8
--- /dev/null
+++ b/apps/mesh/src/api/routes/transcribe.ts
@@ -0,0 +1,291 @@
+/**
+ * Transcription API Route
+ *
+ * Provides audio transcription functionality by:
+ * 1. Receiving audio via FormData (blob) or URL
+ * 2. Finding a connection with TRANSCRIPTION_BINDING
+ * 3. Converting audio blob to base64 (passed via 'audio' field) or using URL directly
+ * 4. Calling TRANSCRIBE_AUDIO and returning the result
+ */
+
+import {
+  TranscriptionBinding,
+  TRANSCRIPTION_BINDING,
+  SUPPORTED_AUDIO_FORMATS,
+  connectionImplementsBinding,
+  type Binder,
+} from "@decocms/bindings";
+import { Hono } from "hono";
+import { lookup } from "node:dns/promises";
+import type { MeshContext } from "../../core/mesh-context";
+import type { ConnectionEntity } from "../../tools/connection/schema";
+
+type Variables = {
+  meshContext: MeshContext;
+};
+
+const app = new Hono<{ Variables: Variables }>();
+
+const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
+
+/**
+ * Check if an IP address is private/internal
+ */
+function isPrivateIp(ip: string): boolean {
+  // IPv4 check
+  const ipv4Match = ip.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
+  if (ipv4Match) {
+    const [, a, b] = ipv4Match.map(Number);
+    return (
+      a === 10 || // 10.0.0.0/8
+      a === 127 || // 127.0.0.0/8 (loopback)
+      (a === 172 && b && b >= 16 && b <= 31) || // 172.16.0.0/12
+      (a === 192 && b === 168) || // 192.168.0.0/16
+      (a === 169 && b === 254) || // 169.254.0.0/16 (link-local, AWS metadata)
+      a === 0 // 0.0.0.0/8
+    );
+  }
+
+  // IPv6 check
+  const ipLower = ip.toLowerCase();
+  const ipv4MappedMatch = ipLower.match(/^::ffff:(\d{1,3}(?:\.\d{1,3}){3})$/);
+  if (ipv4MappedMatch && isPrivateIp(ipv4MappedMatch[1] ?? "")) {
+    return true;
+  }
+  if (
+    ipLower === "::1" || // loopback
+    ipLower.startsWith("fe80:") || // link-local
+    ipLower.startsWith("fc") || // unique local (fc00::/7)
+    ipLower.startsWith("fd") // unique local (fc00::/7)
+  ) {
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * Validate audioUrl to prevent SSRF attacks
+ * Checks URL format, scheme, and resolves DNS to verify IPs are public
+ */
+async function validateAudioUrl(
+  urlString: string,
+): Promise<{ valid: true } | { valid: false; error: string }> {
+  let url: URL;
+  try {
+    url = new URL(urlString);
+  } catch {
+    return { valid: false, error: "Invalid URL format" };
+  }
+
+  // Only allow HTTP/HTTPS schemes
+  if (url.protocol !== "http:" && url.protocol !== "https:") {
+    return { valid: false, error: "Only HTTP and HTTPS URLs are allowed" };
+  }
+
+  const hostname = url.hostname.toLowerCase();
+
+  // Block localhost and loopback addresses (string check)
+  if (hostname === "localhost" || hostname === "[::1]") {
+    return { valid: false, error: "Localhost URLs are not allowed" };
+  }
+
+  // If hostname is already an IP, check it directly
+  if (isPrivateIp(hostname)) {
+    return {
+      valid: false,
+      error: "Private or internal IP addresses are not allowed",
+    };
+  }
+
+  // Resolve DNS and check all returned IPs to prevent DNS rebinding
+  try {
+    const results = await lookup(hostname, { all: true });
+    for (const { address } of results) {
+      if (isPrivateIp(address)) {
+        return {
+          valid: false,
+          error: "URL resolves to a private or internal IP address",
+        };
+      }
+    }
+  } catch {
+    return { valid: false, error: "Failed to resolve hostname" };
+  }
+
+  return { valid: true };
+}
+
+/**
+ * Find a connection that implements a specific binding
+ */
+async function findConnectionWithBinding(
+  ctx: MeshContext,
+  organizationId: string,
+  binding: Binder,
+): Promise<ConnectionEntity | null> {
+  const connections = await ctx.storage.connections.list(organizationId);
+  return (
+    connections.find(
+      (conn) =>
+        conn.status === "active" && connectionImplementsBinding(conn, binding),
+    ) ?? null
+  );
+}
+
+/**
+ * Convert a Blob to base64 string
+ */
+async function blobToBase64(blob: Blob): Promise<string> {
+  const arrayBuffer = await blob.arrayBuffer();
+  return Buffer.from(arrayBuffer).toString("base64");
+}
+
+/**
+ * POST /:org/transcribe
+ *
+ * Transcribe audio to text using available transcription service.
+ *
+ * Request: FormData with:
+ * - audio: Blob (audio file)
+ * - audioUrl: string (optional, URL to audio file)
+ * - language: string (optional, ISO 639-1 language code)
+ *
+ * Response: { text, language, duration, confidence }
+ */
+app.post("/:org/transcribe", async (c) => {
+  const ctx = c.get("meshContext");
+  const orgSlug = c.req.param("org");
+
+  // 1. Validate auth
+  if (!ctx.auth.user?.id && !ctx.auth.apiKey?.id) {
+    return c.json({ error: "Authentication required" }, 401);
+  }
+
+  // 2. Validate organization
+  if (!ctx.organization) {
+    return c.json({ error: "Organization context required" }, 400);
+  }
+
+  if (ctx.organization.slug !== orgSlug && ctx.organization.id !== orgSlug) {
+    return c.json({ error: "Organization mismatch" }, 403);
+  }
+
+  const organizationId = ctx.organization.id;
+
+  // 3. Parse FormData
+  let formData: FormData;
+  try {
+    formData = await c.req.formData();
+  } catch {
+    return c.json({ error: "Invalid form data" }, 400);
+  }
+
+  const audioFile = formData.get("audio") as File | null;
+  const audioUrl = formData.get("audioUrl") as string | null;
+  const language = formData.get("language") as string | null;
+
+  if (!audioFile && !audioUrl) {
+    return c.json({ error: "Either audio file or audioUrl is required" }, 400);
+  }
+
+  // 4. Validate audioUrl if provided (prevent SSRF)
+  if (audioUrl) {
+    const urlValidation = await validateAudioUrl(audioUrl);
+    if (!urlValidation.valid) {
+      return c.json({ error: urlValidation.error }, 400);
+    }
+  }
+
+  // 5. Validate file size and format (if file provided)
+  if (audioFile) {
+    if (audioFile.size > MAX_FILE_SIZE) {
+      return c.json(
+        {
+          error: `File too large. Maximum size is ${MAX_FILE_SIZE / 1024 / 1024}MB`,
+        },
+        400,
+      );
+    }
+
+    const mimeType = audioFile.type.split(";")[0];
+    if (
+      !SUPPORTED_AUDIO_FORMATS.includes(
+        mimeType as (typeof SUPPORTED_AUDIO_FORMATS)[number],
+      )
+    ) {
+      return c.json(
+        {
+          error: `Unsupported audio format: ${mimeType}. Supported formats: ${SUPPORTED_AUDIO_FORMATS.join(", ")}`,
+        },
+        400,
+      );
+    }
+  }
+
+  // 5. Find transcription connection
+  const transcriptionConnection = await findConnectionWithBinding(
+    ctx,
+    organizationId,
+    TRANSCRIPTION_BINDING,
+  );
+
+  if (!transcriptionConnection) {
+    return c.json(
+      {
+        error:
+          "No transcription service configured. Please add a connection with transcription capabilities (e.g., OpenAI Whisper).",
+      },
+      400,
+    );
+  }
+
+  // 6. Convert audio to base64 if blob provided
+  let audioBase64: string | undefined;
+
+  if (audioFile && !audioUrl) {
+    try {
+      audioBase64 = await blobToBase64(audioFile);
+    } catch (error) {
+      console.error("[transcribe] Failed to convert audio to base64:", error);
+      return c.json(
+        {
+          error: `Failed to process audio: ${error instanceof Error ? error.message : "Unknown error"}`,
+        },
+        500,
+      );
+    }
+  }
+
+  // 7. Call transcription service
+  try {
+    const proxy = await ctx.createMCPProxy(transcriptionConnection);
+    const transcriptionClient = TranscriptionBinding.forClient(proxy);
+
+    const result = await transcriptionClient.TRANSCRIBE_AUDIO({
+      audio: audioBase64,
+      audioUrl: audioUrl ?? undefined,
+      mimeType: audioFile?.type,
+      language: language ?? undefined,
+    });
+
+    // 8. Return result
+    return c.json({
+      text: result.text,
+      language: result.language,
+      duration: result.duration,
+      confidence: result.confidence,
+    });
+  } catch (error) {
+    console.error("[transcribe] Transcription failed:", error);
+
+    return c.json(
+      {
+        error: `Transcription failed: ${error instanceof Error ? error.message : "Unknown error"}`,
+      },
+      500,
+    );
+  }
+});
+
+export default app;
diff --git a/apps/mesh/src/web/components/chat/context.tsx b/apps/mesh/src/web/components/chat/context.tsx
index ef3b42d9ac..bb1d9b5b4a 100644
--- a/apps/mesh/src/web/components/chat/context.tsx
+++ b/apps/mesh/src/web/components/chat/context.tsx
@@ -14,6 +14,7 @@ import {
   useProjectContext,
   useVirtualMCPs,
   SELF_MCP_ALIAS_ID,
+  useConnections,
 } from "@decocms/mesh-sdk";
 import type { Client } from "@modelcontextprotocol/sdk/client/index.js";
 import type {
@@ -35,6 +36,7 @@ import {
   useReducer,
 } from "react";
 import { toast } from "sonner";
+import { useBindingConnections } from "../../hooks/use-binding";
 import { useModelConnections } from "../../hooks/collections/use-llm";
 import { useThreadMessages } from "../../hooks/use-chat-store";
 import { useContext as useContextHook } from "../../hooks/use-context";
@@ -123,6 +125,9 @@ interface ChatContextValue {
   clearChatError: () => void;
   finishReason: string | null;
   clearFinishReason: () => void;
+
+  // Binding availability
+  hasTranscriptionBinding: boolean;
 }
 
 // ============================================================================
@@ -558,6 +563,14 @@ export function ChatProvider({
   // Always fetch messages for the active thread - if it's truly new, the query returns empty
   const initialMessages = useThreadMessages(stateActiveThreadId);
 
+  // Binding detection for transcription feature
+  const allConnections = useConnections();
+  const transcriptionConnections = useBindingConnections({
+    connections: allConnections,
+    binding: "TRANSCRIPTION",
+  });
+  const hasTranscriptionBinding = transcriptionConnections.length > 0;
+
   // Context prompt
   const contextPrompt = useContextHook(storedSelectedVirtualMcpId);
 
@@ -830,6 +843,9 @@ export function ChatProvider({
     clearChatError: chat.clearError,
     finishReason: chatState.finishReason,
     clearFinishReason,
+
+    // Binding availability
+    hasTranscriptionBinding,
   };
 
   return <ChatContext.Provider value={value}>{children}</ChatContext.Provider>;
diff --git a/apps/mesh/src/web/components/chat/input.tsx b/apps/mesh/src/web/components/chat/input.tsx
index c1197dcb94..d517e8a8cb 100644
--- a/apps/mesh/src/web/components/chat/input.tsx
+++ b/apps/mesh/src/web/components/chat/input.tsx
@@ -9,6 +9,11 @@ import {
 } from "@deco/ui/components/popover.tsx";
 import { cn } from "@deco/ui/lib/utils.ts";
 import { useNavigate } from "@tanstack/react-router";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipTrigger,
+} from "@deco/ui/components/tooltip.tsx";
 import {
   AlertCircle,
   AlertTriangle,
@@ -16,11 +21,15 @@ import {
   ChevronDown,
   CpuChip02,
   Edit01,
+  Microphone01,
   Stop,
+  StopCircle,
   XCircle,
 } from "@untitledui/icons";
 import type { FormEvent } from "react";
 import { useEffect, useRef, useState, type MouseEvent } from "react";
+import { toast } from "sonner";
+import { useAudioRecorder } from "../../hooks/use-audio-recorder";
 import { useChat } from "./context";
 import { isTiptapDocEmpty } from "./tiptap/utils";
 import { ChatHighlight } from "./index";
@@ -196,10 +205,32 @@ export function ChatInput() {
     clearChatError,
     finishReason,
     clearFinishReason,
+    hasTranscriptionBinding,
   } = useChat();
 
+  const { org } = useProjectContext();
+
   const tiptapRef = useRef<TiptapInputHandle | null>(null);
 
+  // Audio recording state
+  const {
+    isRecording,
+    startRecording,
+    stopRecording,
+    error: recordingError,
+    clearError: clearRecordingError,
+  } = useAudioRecorder({ maxDuration: 3 * 60 * 1000 }); // 3 minutes max
+  const [isTranscribing, setIsTranscribing] = useState(false);
+
+  // Show toast when recording error occurs
+  // oxlint-disable-next-line ban-use-effect/ban-use-effect
+  useEffect(() => {
+    if (recordingError) {
+      toast.error(recordingError.message);
+      clearRecordingError();
+    }
+  }, [recordingError, clearRecordingError]);
+
   const canSubmit =
     !isStreaming && !!selectedModel && !isTiptapDocEmpty(tiptapDoc);
 
@@ -236,6 +267,98 @@ export function ChatInput() {
     void sendMessage(doc);
   };
 
+  const handleRecordingToggle = async () => {
+    if (isTranscribing) return;
+
+    if (isRecording) {
+      const audioBlob = await stopRecording();
+      if (!audioBlob) {
+        toast.error("Failed to record audio");
+        return;
+      }
+
+      setIsTranscribing(true);
+      try {
+        const formData = new FormData();
+        formData.append("audio", audioBlob, "recording.webm");
+
+        const response = await fetch(`/api/${org.slug}/transcribe`, {
+          method: "POST",
+          body: formData,
+          credentials: "include",
+        });
+
+        if (!response.ok) {
+          const errorData = await response.json().catch(() => ({}));
+          throw new Error(
+            (errorData as { error?: string }).error || "Transcription failed",
+          );
+        }
+
+        const data = (await response.json()) as { text?: string };
+        if (data.text) {
+          // Append transcribed text to the existing input content
+          const transcribedText = data.text;
+
+          // If there's existing content, append to it; otherwise create new doc
+          if (tiptapDoc && tiptapDoc.content && tiptapDoc.content.length > 0) {
+            // Clone the existing document
+            const newContent = [...tiptapDoc.content];
+            const lastParagraphIndex = newContent.length - 1;
+            const lastParagraph = newContent[lastParagraphIndex];
+
+            // If last paragraph has content, append with a space; otherwise just add the text
+            if (
+              lastParagraph &&
+              lastParagraph.type === "paragraph" &&
+              lastParagraph.content &&
+              lastParagraph.content.length > 0
+            ) {
+              // Append to the last paragraph with a space separator
+              newContent[lastParagraphIndex] = {
+                ...lastParagraph,
+                content: [
+                  ...lastParagraph.content,
+                  { type: "text", text: ` ${transcribedText}` },
+                ],
+              };
+            } else {
+              // Last paragraph is empty, replace it with the transcribed text
+              newContent[lastParagraphIndex] = {
+                type: "paragraph",
+                content: [{ type: "text", text: transcribedText }],
+              };
+            }
+
+            setTiptapDoc({
+              type: "doc" as const,
+              content: newContent,
+            });
+          } else {
+            // No existing content, create new doc
+            setTiptapDoc({
+              type: "doc" as const,
+              content: [
+                {
+                  type: "paragraph",
+                  content: [{ type: "text", text: transcribedText }],
+                },
+              ],
+            });
+          }
+        }
+      } catch (err) {
+        toast.error(
+          err instanceof Error ? err.message : "Failed to transcribe audio",
+        );
+      } finally {
+        setIsTranscribing(false);
+      }
+    } else {
+      await startRecording();
+    }
+  };
+
   const color = selectedVirtualMcp
     ? getAgentColor(selectedVirtualMcp.id)
     : null;
@@ -367,6 +490,67 @@ export function ChatInput() {
                     selectedModel={selectedModel}
                     isStreaming={isStreaming}
                   />
+                  {/* Audio Recording Button - always visible, disabled if no transcription binding */}
+                  <Tooltip>
+                    <TooltipTrigger asChild>
+                      {/* Wrap in span to enable tooltip on disabled button */}
+                      <span className="inline-flex">
+                        <Button
+                          type="button"
+                          variant="ghost"
+                          size="icon"
+                          disabled={
+                            !hasTranscriptionBinding ||
+                            !selectedModel ||
+                            isStreaming ||
+                            isTranscribing
+                          }
+                          onClick={handleRecordingToggle}
+                          className={cn(
+                            "size-8 rounded-full transition-all relative",
+                            isRecording &&
+                              "text-destructive hover:text-destructive",
+                            !hasTranscriptionBinding &&
+                              "opacity-40 cursor-not-allowed",
+                          )}
+                        >
+                          {isTranscribing ? (
+                            <svg
+                              className="animate-spin size-5"
+                              viewBox="0 0 24 24"
+                              fill="none"
+                            >
+                              <circle
+                                className="opacity-25"
+                                cx="12"
+                                cy="12"
+                                r="10"
+                                stroke="currentColor"
+                                strokeWidth="4"
+                              />
+                              <path
+                                className="opacity-75"
+                                fill="currentColor"
+                                d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+                              />
+                            </svg>
+                          ) : isRecording ? (
+                            <>
+                              <StopCircle size={20} />
+                              <span className="absolute inset-0 rounded-full animate-ping bg-destructive/20" />
+                            </>
+                          ) : (
+                            <Microphone01 size={20} />
+                          )}
+                        </Button>
+                      </span>
+                    </TooltipTrigger>
+                    {!hasTranscriptionBinding && (
+                      <TooltipContent side="top" sideOffset={8}>
+                        Add a transcription MCP to enable voice input
+                      </TooltipContent>
+                    )}
+                  </Tooltip>
                   <Button
                     type={isStreaming ? "button" : "submit"}
                     onClick={(e: React.MouseEvent<HTMLButtonElement>) => {
diff --git a/apps/mesh/src/web/hooks/use-audio-recorder.ts b/apps/mesh/src/web/hooks/use-audio-recorder.ts
new file mode 100644
index 0000000000..8f7dd0ab70
--- /dev/null
+++ b/apps/mesh/src/web/hooks/use-audio-recorder.ts
@@ -0,0 +1,270 @@
+/**
+ * Audio Recorder Hook
+ *
+ * Provides audio recording functionality using the MediaRecorder API.
+ * Handles permission requests, recording state, and blob generation.
+ */
+
+import { useRef, useState } from "react";
+
+export interface UseAudioRecorderReturn {
+  /** Whether recording is currently in progress */
+  isRecording: boolean;
+  /** Whether the recorder is initializing (getting permissions) */
+  isPending: boolean;
+  /** Start recording audio */
+  startRecording: () => Promise<void>;
+  /** Stop recording and return the audio blob */
+  stopRecording: () => Promise<Blob | null>;
+  /** Current error, if any */
+  error: Error | null;
+  /** Clear the current error */
+  clearError: () => void;
+}
+
+export interface UseAudioRecorderOptions {
+  /** Maximum recording duration in milliseconds (default: 5 minutes) */
+  maxDuration?: number;
+  /** Preferred MIME type for recording */
+  mimeType?: string;
+}
+
+/**
+ * Preferred MIME types in order of preference
+ * These are commonly supported across browsers
+ */
+const PREFERRED_MIME_TYPES = [
+  "audio/webm;codecs=opus",
+  "audio/webm",
+  "audio/mp4",
+  "audio/ogg;codecs=opus",
+];
+
+/**
+ * Get the best supported MIME type for recording
+ */
+function getSupportedMimeType(preferredType?: string): string {
+  // Check preferred type first
+  if (preferredType && MediaRecorder.isTypeSupported(preferredType)) {
+    return preferredType;
+  }
+
+  // Find first supported type from our list
+  for (const type of PREFERRED_MIME_TYPES) {
+    if (MediaRecorder.isTypeSupported(type)) {
+      return type;
+    }
+  }
+
+  // Fallback to empty string (browser default)
+  return "";
+}
+
+/**
+ * Hook for recording audio using the MediaRecorder API
+ *
+ * @param options - Recording options
+ * @returns Recording state and control functions
+ *
+ * @example
+ * ```tsx
+ * const { isRecording, startRecording, stopRecording, error } = useAudioRecorder();
+ *
+ * const handleToggle = async () => {
+ *   if (isRecording) {
+ *     const blob = await stopRecording();
+ *     if (blob) {
+ *       // Do something with the audio blob
+ *     }
+ *   } else {
+ *     await startRecording();
+ *   }
+ * };
+ * ```
+ */
+export function useAudioRecorder(
+  options: UseAudioRecorderOptions = {},
+): UseAudioRecorderReturn {
+  const { maxDuration = 5 * 60 * 1000, mimeType: preferredMimeType } = options;
+
+  const [isRecording, setIsRecording] = useState(false);
+  const [isPending, setIsPending] = useState(false);
+  const [error, setError] = useState<Error | null>(null);
+
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const mediaStreamRef = useRef<MediaStream | null>(null);
+  const chunksRef = useRef<Blob[]>([]);
+  const timeoutRef = useRef<number | null>(null);
+  const resolveStopRef = useRef<((blob: Blob | null) => void) | null>(null);
+
+  const clearError = () => setError(null);
+
+  /**
+   * Cleanup all resources
+   */
+  const cleanup = () => {
+    // Clear timeout
+    if (timeoutRef.current !== null) {
+      clearTimeout(timeoutRef.current);
+      timeoutRef.current = null;
+    }
+
+    // Stop all tracks
+    if (mediaStreamRef.current) {
+      mediaStreamRef.current.getTracks().forEach((track) => track.stop());
+      mediaStreamRef.current = null;
+    }
+
+    // Clear recorder reference
+    mediaRecorderRef.current = null;
+    chunksRef.current = [];
+  };
+
+  /**
+   * Start recording audio
+   */
+  const startRecording = async (): Promise<void> => {
+    // Check browser support
+    if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
+      setError(new Error("Audio recording is not supported in this browser"));
+      return;
+    }
+
+    if (!window.MediaRecorder) {
+      setError(new Error("MediaRecorder API is not supported in this browser"));
+      return;
+    }
+
+    setIsPending(true);
+    setError(null);
+
+    try {
+      // Request microphone permission
+      const stream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          sampleRate: 44100,
+        },
+      });
+
+      mediaStreamRef.current = stream;
+
+      // Get supported MIME type
+      const mimeType = getSupportedMimeType(preferredMimeType);
+
+      // Create MediaRecorder
+      const recorder = new MediaRecorder(stream, {
+        mimeType: mimeType || undefined,
+      });
+
+      mediaRecorderRef.current = recorder;
+      chunksRef.current = [];
+
+      // Handle data available
+      recorder.ondataavailable = (event) => {
+        if (event.data.size > 0) {
+          chunksRef.current.push(event.data);
+        }
+      };
+
+      // Handle recording stop
+      recorder.onstop = () => {
+        const blob = new Blob(chunksRef.current, {
+          type: recorder.mimeType || "audio/webm",
+        });
+
+        // Resolve the stop promise with the blob
+        if (resolveStopRef.current) {
+          resolveStopRef.current(blob);
+          resolveStopRef.current = null;
+        }
+
+        cleanup();
+        setIsRecording(false);
+      };
+
+      // Handle errors
+      recorder.onerror = (event) => {
+        console.error("[useAudioRecorder] Recording error:", event);
+        setError(new Error("Recording failed"));
+
+        if (resolveStopRef.current) {
+          resolveStopRef.current(null);
+          resolveStopRef.current = null;
+        }
+
+        cleanup();
+        setIsRecording(false);
+      };
+
+      // Start recording
+      recorder.start(1000); // Collect data every second
+      setIsRecording(true);
+      setIsPending(false);
+
+      // Set maximum duration timeout
+      timeoutRef.current = window.setTimeout(() => {
+        if (mediaRecorderRef.current?.state === "recording") {
+          mediaRecorderRef.current.stop();
+        }
+      }, maxDuration);
+    } catch (err) {
+      cleanup();
+      setIsPending(false);
+
+      if (err instanceof DOMException) {
+        if (err.name === "NotAllowedError") {
+          setError(
+            new Error(
+              "Microphone access denied. Please allow microphone access and try again.",
+            ),
+          );
+        } else if (err.name === "NotFoundError") {
+          setError(
+            new Error(
+              "No microphone found. Please connect a microphone and try again.",
+            ),
+          );
+        } else {
+          setError(new Error(`Failed to access microphone: ${err.message}`));
+        }
+      } else {
+        setError(
+          err instanceof Error ? err : new Error("Failed to start recording"),
+        );
+      }
+    }
+  };
+
+  /**
+   * Stop recording and return the audio blob
+   */
+  const stopRecording = (): Promise<Blob | null> => {
+    return new Promise((resolve) => {
+      // Check recorder's actual state instead of React state to avoid stale closures
+      if (
+        !mediaRecorderRef.current ||
+        mediaRecorderRef.current.state !== "recording"
+      ) {
+        resolve(null);
+        return;
+      }
+
+      // Store resolve function to be called in onstop handler
+      resolveStopRef.current = resolve;
+
+      // Stop the recorder
+      mediaRecorderRef.current.stop();
+    });
+  };
+
+  return {
+    isRecording,
+    isPending,
+    startRecording,
+    stopRecording,
+    error,
+    clearError,
+  };
+}
diff --git a/apps/mesh/src/web/hooks/use-binding.ts b/apps/mesh/src/web/hooks/use-binding.ts
index b332839871..2a6be0618d 100644
--- a/apps/mesh/src/web/hooks/use-binding.ts
+++ b/apps/mesh/src/web/hooks/use-binding.ts
@@ -1,5 +1,9 @@
 import { z } from "zod";
-import { type Binder, createBindingChecker } from "@decocms/bindings";
+import {
+  type Binder,
+  createBindingChecker,
+  TRANSCRIPTION_BINDING,
+} from "@decocms/bindings";
 import {
   BaseCollectionEntitySchema,
   createCollectionBindings,
@@ -23,6 +27,7 @@ const BUILTIN_BINDINGS: Record<string, Binder> = {
   WORKFLOW_EXECUTION: WORKFLOW_EXECUTION_BINDING,
   ASSISTANTS: ASSISTANTS_BINDING,
   MCP: MCP_BINDING,
+  TRANSCRIPTION: TRANSCRIPTION_BINDING,
 };
 
 /**
diff --git a/packages/bindings/src/index.ts b/packages/bindings/src/index.ts
index 3ccaa3fc64..860530a1e3 100644
--- a/packages/bindings/src/index.ts
+++ b/packages/bindings/src/index.ts
@@ -108,3 +108,19 @@ export {
   type DeleteObjectsInput,
   type DeleteObjectsOutput,
 } from "./well-known/object-storage";
+
+// Re-export transcription binding types
+export {
+  TRANSCRIPTION_BINDING,
+  TranscriptionBinding,
+  TranscriptionInputSchema,
+  type TranscriptionInput,
+  TranscriptionOutputSchema,
+  type TranscriptionOutput,
+  TranscriptionWordSchema,
+  type TranscriptionWord,
+  TranscriptionSegmentSchema,
+  type TranscriptionSegment,
+  SUPPORTED_AUDIO_FORMATS,
+  type TranscriptionBindingType,
+} from "./well-known/transcription";
diff --git a/packages/bindings/src/well-known/transcription.ts b/packages/bindings/src/well-known/transcription.ts
new file mode 100644
index 0000000000..015ea05913
--- /dev/null
+++ b/packages/bindings/src/well-known/transcription.ts
@@ -0,0 +1,163 @@
+/**
+ * Transcription Well-Known Binding
+ *
+ * Defines the interface for audio transcription operations.
+ * Any MCP that implements this binding can provide audio-to-text
+ * transcription capabilities (e.g., OpenAI Whisper, Google Speech-to-Text).
+ *
+ * This binding includes:
+ * - TRANSCRIBE_AUDIO: Transcribe audio to text
+ */
+
+import { z } from "zod";
+import { bindingClient, type ToolBinder } from "../core/binder";
+
+/**
+ * Supported audio formats for transcription
+ */
+export const SUPPORTED_AUDIO_FORMATS = [
+  "audio/webm",
+  "audio/mp3",
+  "audio/mpeg",
+  "audio/mp4",
+  "audio/m4a",
+  "audio/wav",
+  "audio/ogg",
+  "audio/flac",
+  "video/webm",
+] as const;
+
+// ============================================================================
+// Tool Schemas
+// ============================================================================
+
+/**
+ * TRANSCRIBE_AUDIO Input Schema
+ */
+export const TranscriptionInputSchema = z
+  .object({
+    audio: z.string().optional().describe("Base64-encoded audio data"),
+    audioUrl: z
+      .string()
+      .url()
+      .optional()
+      .describe("URL pointing to the audio file"),
+    mimeType: z
+      .string()
+      .optional()
+      .describe("MIME type of the audio file (e.g., audio/webm, audio/mp3)"),
+    language: z
+      .string()
+      .optional()
+      .describe(
+        "Language hint for transcription (ISO 639-1 code, e.g., en, pt, es)",
+      ),
+    prompt: z
+      .string()
+      .optional()
+      .describe("Optional prompt to guide the transcription with context"),
+    includeTimestamps: z
+      .boolean()
+      .optional()
+      .describe("Whether to include word-level timestamps"),
+    includeSpeakerLabels: z
+      .boolean()
+      .optional()
+      .describe("Whether to identify and label different speakers"),
+  })
+  .refine((data) => data.audio !== undefined || data.audioUrl !== undefined, {
+    message: "Either 'audio' or 'audioUrl' must be provided",
+  });
+
+export type TranscriptionInput = z.infer<typeof TranscriptionInputSchema>;
+
+/**
+ * Word-level transcription detail
+ */
+export const TranscriptionWordSchema = z.object({
+  word: z.string().describe("The transcribed word"),
+  start: z.number().optional().describe("Start time in seconds"),
+  end: z.number().optional().describe("End time in seconds"),
+  confidence: z.number().optional().describe("Confidence score (0-1)"),
+  speaker: z
+    .string()
+    .optional()
+    .describe("Speaker label if diarization enabled"),
+});
+
+export type TranscriptionWord = z.infer<typeof TranscriptionWordSchema>;
+
+/**
+ * Segment-level transcription detail
+ */
+export const TranscriptionSegmentSchema = z.object({
+  text: z.string().describe("Transcribed text for this segment"),
+  start: z.number().optional().describe("Start time in seconds"),
+  end: z.number().optional().describe("End time in seconds"),
+  speaker: z
+    .string()
+    .optional()
+    .describe("Speaker label if diarization enabled"),
+  words: z
+    .array(TranscriptionWordSchema)
+    .optional()
+    .describe("Word-level details"),
+});
+
+export type TranscriptionSegment = z.infer<typeof TranscriptionSegmentSchema>;
+
+/**
+ * TRANSCRIBE_AUDIO Output Schema
+ */
+export const TranscriptionOutputSchema = z.object({
+  text: z.string().describe("The full transcribed text"),
+  language: z
+    .string()
+    .optional()
+    .describe("Detected or confirmed language (ISO 639-1 code)"),
+  duration: z.number().optional().describe("Duration of the audio in seconds"),
+  segments: z
+    .array(TranscriptionSegmentSchema)
+    .optional()
+    .describe("Segments with timestamps and optional speaker labels"),
+  confidence: z.number().optional().describe("Overall confidence score (0-1)"),
+  providerMetadata: z
+    .any()
+    .optional()
+    .describe("Additional provider-specific metadata"),
+});
+
+export type TranscriptionOutput = z.infer<typeof TranscriptionOutputSchema>;
+
+// ============================================================================
+// Binding Definition
+// ============================================================================
+
+/**
+ * Transcription Binding
+ *
+ * Defines the interface for audio transcription operations.
+ * Any MCP that implements this binding can be used for audio-to-text
+ * transcription in the chat interface.
+ *
+ * Required tools:
+ * - TRANSCRIBE_AUDIO: Transcribe audio to text
+ */
+export const TRANSCRIPTION_BINDING = [
+  {
+    name: "TRANSCRIBE_AUDIO" as const,
+    inputSchema: TranscriptionInputSchema,
+    outputSchema: TranscriptionOutputSchema,
+  } satisfies ToolBinder<
+    "TRANSCRIBE_AUDIO",
+    TranscriptionInput,
+    TranscriptionOutput
+  >,
+] as const;
+
+export type TranscriptionBindingType = typeof TRANSCRIPTION_BINDING;
+
+/**
+ * Transcription binding client for calling TRANSCRIBE_AUDIO
+ */
+export const TranscriptionBinding = bindingClient(TRANSCRIPTION_BINDING);