diff --git a/src/react/elements.ts b/src/react/elements.ts
index e3ff30b3..c620ca7a 100644
--- a/src/react/elements.ts
+++ b/src/react/elements.ts
@@ -2,6 +2,7 @@ import {
   resolveImageElement,
   resolveMusicElement,
   resolveSpeechElement,
+  resolveTalkingHeadElement,
   resolveVideoElement,
 } from "./resolve";
 import type { ResolvedElement } from "./resolved-element";
@@ -141,11 +142,14 @@ export function Speech(
 
 export function TalkingHead(
   props: TalkingHeadProps,
-): VargElement<"talking-head"> {
-  return createElement(
+): VargElement<"talking-head"> & PromiseLike<ResolvedElement<"talking-head">> {
+  const element = createElement(
     "talking-head",
     props as Record<string, unknown>,
-    props.children,
+    undefined,
+  );
+  return makeThenable(element, (el) =>
+    resolveTalkingHeadElement(el, el.props as unknown as TalkingHeadProps),
   );
 }
 
diff --git a/src/react/renderers/clip.ts b/src/react/renderers/clip.ts
index d52cc898..a4a5c4e5 100644
--- a/src/react/renderers/clip.ts
+++ b/src/react/renderers/clip.ts
@@ -26,6 +26,7 @@ import { renderSlider } from "./slider";
 import { renderSpeech } from "./speech";
 import { renderSubtitle } from "./subtitle";
 import { renderSwipe } from "./swipe";
+import { renderTalkingHead } from "./talking-head";
 import { renderTitle } from "./title";
 import { resolvePath } from "./utils";
 import { renderVideo } from "./video";
@@ -151,6 +152,27 @@ async function renderClipLayers(
         break;
       }
 
+      case "talking-head": {
+        pending.push({
+          type: "async",
+          promise: renderTalkingHead(
+            element as VargElement<"talking-head">,
+            ctx,
+          )
+            .then((file) => ctx.backend.resolvePath(file))
+            .then(
+              (path) =>
+                ({
+                  type: "video",
+                  path,
+                  resizeMode: "cover",
+                  mixVolume: 1,
+                }) as VideoLayer,
+            ),
+        });
+        break;
+      }
+
       case "music": {
         const props = element.props as MusicProps;
         pending.push({
diff --git a/src/react/renderers/talking-head.test.ts b/src/react/renderers/talking-head.test.ts
new file mode 100644
index 00000000..2400d68e
--- /dev/null
+++ b/src/react/renderers/talking-head.test.ts
@@ -0,0 +1,362 @@
+import { describe, expect, test } from "bun:test";
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type { ImageModelV3, SpeechModelV3 } from "@ai-sdk/provider";
+import { withCache } from "../../ai-sdk/cache";
+import { File } from "../../ai-sdk/file";
+import { fileCache } from "../../ai-sdk/file-cache";
+import { localBackend } from "../../ai-sdk/providers/editly";
+import type { VideoModelV3 } from "../../ai-sdk/video-model";
+import { Image, type Speech, TalkingHead } from "../elements";
+import { ResolvedElement } from "../resolved-element";
+import { renderClip } from "./clip";
+import type { RenderContext } from "./context";
+import { renderTalkingHead } from "./talking-head";
+
+function makeTempDir(): string {
+  return mkdtempSync(join(tmpdir(), "varg-talking-head-test-"));
+}
+
+function cleanupTempDir(dir: string) {
+  rmSync(dir, { recursive: true, force: true });
+}
+
+function createImageModel(): ImageModelV3 {
+  return {
+    specificationVersion: "v3",
+    provider: "test",
+    modelId: "test-image",
+    maxImagesPerCall: 1,
+    async doGenerate() {
+      return {
+        images: [new Uint8Array([0x89, 0x50, 0x4e, 0x47, 1, 2, 3])],
+        warnings: [],
+        response: {
+          timestamp: new Date(),
+          modelId: "test-image",
+          headers: undefined,
+        },
+      };
+    },
+  };
+}
+
+function createVideoModel(): VideoModelV3 {
+  return {
+    specificationVersion: "v3",
+    provider: "test",
+    modelId: "test-video",
+    maxVideosPerCall: 1,
+    async doGenerate() {
+      return {
+        videos: [new Uint8Array([0, 0, 0, 0x1c, 0x66, 0x74, 0x79, 0x70])],
+        warnings: [],
+        response: {
+          timestamp: new Date(),
+          modelId: "test-video",
+          headers: undefined,
+        },
+      };
+    },
+  };
+}
+
+function createSpeechModel(): SpeechModelV3 {
+  return {
+    specificationVersion: "v3",
+    provider: "test",
+    modelId: "test-speech",
+    async doGenerate() {
+      return {
+        audio: new Uint8Array([0xff, 0xfb, 0x90, 4, 5, 6]),
+        warnings: [],
+        response: {
+          timestamp: new Date(),
+          modelId: "test-speech",
+          headers: undefined,
+        },
+      };
+    },
+  };
+}
+
+type GenerateImageOptions = Parameters<RenderContext["generateImage"]>[0];
+type GenerateVideoOptions = Parameters<RenderContext["generateVideo"]>[0];
+
+function createContext(
+  cacheDir: string,
+  counters: { imageCalls: number; videoCalls: number; speechCalls: number },
+): RenderContext {
+  const storage = fileCache({ dir: cacheDir });
+
+  const generateImage = withCache(
+    async (_opts: GenerateImageOptions) => {
+      counters.imageCalls += 1;
+      return {
+        images: [
+          {
+            uint8Array: new Uint8Array([0x89, 0x50, 0x4e, 0x47, 1, 2, 3]),
+            mimeType: "image/png",
+          },
+        ],
+        warnings: [],
+      };
+    },
+    { storage },
+  );
+
+  const generateVideo = withCache(
+    async (_opts: GenerateVideoOptions) => {
+      counters.videoCalls += 1;
+      const data = new Uint8Array([0, 0, 0, 0x1c, 0x66, 0x74, 0x79, 0x70]);
+      return {
+        video: { uint8Array: data, mimeType: "video/mp4" },
+        videos: [{ uint8Array: data, mimeType: "video/mp4" }],
+        warnings: [],
+      };
+    },
+    { storage },
+  );
+
+  return {
+    width: 1080,
+    height: 1920,
+    fps: 30,
+    cache: storage,
+    generateImage: generateImage as unknown as RenderContext["generateImage"],
+    generateVideo: generateVideo as unknown as RenderContext["generateVideo"],
+    tempFiles: [],
+    pendingFiles: new Map<string, Promise<File>>(),
+    backend: localBackend,
+    generatedFiles: [],
+    defaults: {
+      image: createImageModel(),
+      video: createVideoModel(),
+      speech: createSpeechModel(),
+    },
+  };
+}
+
+/** Helper: create a mock resolved image element */
+function createResolvedImage(): ResolvedElement<"image"> {
+  const mockFile = File.fromGenerated({
+    uint8Array: new Uint8Array([0x89, 0x50, 0x4e, 0x47, 1, 2, 3]),
+    mediaType: "image/png",
+  });
+  return new ResolvedElement<"image">(
+    { type: "image", props: { prompt: "test" }, children: [] },
+    { file: mockFile, duration: 0 },
+  );
+}
+
+/** Helper: create a mock resolved speech element */
+function createResolvedSpeech(): ResolvedElement<"speech"> {
+  const mockFile = File.fromGenerated({
+    uint8Array: new Uint8Array([0xff, 0xfb, 0x90, 4, 5, 6]),
+    mediaType: "audio/mpeg",
+  });
+  return new ResolvedElement<"speech">(
+    { type: "speech", props: { voice: "rachel" }, children: ["Hello world!"] },
+    { file: mockFile, duration: 3 },
+  );
+}
+
+describe("TalkingHead element", () => {
+  test("creates correct element structure with image/audio props", () => {
+    const image = Image({
+      prompt: "young woman, casual outfit",
+      model: createImageModel(),
+    });
+    const element = TalkingHead({
+      image,
+      audio: createResolvedSpeech(),
+      model: createVideoModel(),
+    });
+
+    expect(element.type).toBe("talking-head");
+    expect(element.props.image).toBeDefined();
+    expect(element.props.audio).toBeDefined();
+  });
+
+  test("is thenable (awaitable)", () => {
+    const element = TalkingHead({
+      image: createResolvedImage() as unknown as ReturnType<typeof Image>,
+      audio: createResolvedSpeech() as unknown as ReturnType<typeof Speech>,
+      model: createVideoModel(),
+    });
+
+    expect(typeof (element as any).then).toBe("function");
+  });
+
+  test("clip renderer handles pre-resolved TalkingHead element", async () => {
+    const cacheDir = makeTempDir();
+    const counters = { imageCalls: 0, videoCalls: 0, speechCalls: 0 };
+
+    try {
+      const ctx = createContext(cacheDir, counters);
+
+      // Create a pre-resolved TalkingHead element (simulating `await TalkingHead(...)`)
+      const mockVideoFile = File.fromGenerated({
+        uint8Array: new Uint8Array([0, 0, 0, 0x1c, 0x66, 0x74, 0x79, 0x70]),
+        mediaType: "video/mp4",
+      });
+
+      const resolvedTalkingHead = new ResolvedElement<"talking-head">(
+        {
+          type: "talking-head",
+          props: {
+            image: createResolvedImage(),
+            audio: createResolvedSpeech(),
+          },
+          children: [],
+        },
+        {
+          file: mockVideoFile,
+          duration: 5,
+        },
+      );
+
+      // Use it inside a Clip
+      const clip = {
+        type: "clip" as const,
+        props: { duration: 5 },
+        children: [resolvedTalkingHead],
+      };
+
+      const result = await renderClip(clip as any, ctx);
+
+      // The clip should contain a video layer
+      expect(result.layers.length).toBeGreaterThan(0);
+      expect(result.layers.some((l) => l.type === "video")).toBe(true);
+    } finally {
+      cleanupTempDir(cacheDir);
+    }
+  });
+});
+
+describe("renderTalkingHead", () => {
+  test("throws when no image prop provided", async () => {
+    const cacheDir = makeTempDir();
+    const counters = { imageCalls: 0, videoCalls: 0, speechCalls: 0 };
+
+    try {
+      const ctx = createContext(cacheDir, counters);
+      const element = TalkingHead({
+        audio: createResolvedSpeech() as unknown as ReturnType<typeof Speech>,
+        model: createVideoModel(),
+        // no image — should throw
+      } as any);
+
+      await expect(renderTalkingHead(element as any, ctx)).rejects.toThrow(
+        "TalkingHead requires 'image' prop",
+      );
+    } finally {
+      cleanupTempDir(cacheDir);
+    }
+  });
+
+  test("throws when no audio prop provided", async () => {
+    const cacheDir = makeTempDir();
+    const counters = { imageCalls: 0, videoCalls: 0, speechCalls: 0 };
+
+    try {
+      const ctx = createContext(cacheDir, counters);
+      const element = TalkingHead({
+        image: createResolvedImage() as unknown as ReturnType<typeof Image>,
+        model: createVideoModel(),
+        // no audio — should throw
+      } as any);
+
+      await expect(renderTalkingHead(element as any, ctx)).rejects.toThrow(
+        "TalkingHead requires 'audio' prop",
+      );
+    } finally {
+      cleanupTempDir(cacheDir);
+    }
+  });
+
+  test("throws when no model and no defaults.video", async () => {
+    const cacheDir = makeTempDir();
+    const counters = { imageCalls: 0, videoCalls: 0, speechCalls: 0 };
+
+    try {
+      const ctx = createContext(cacheDir, counters);
+      // Remove defaults.video to test error
+      ctx.defaults = { image: createImageModel(), speech: createSpeechModel() };
+
+      const element = TalkingHead({
+        image: createResolvedImage() as unknown as ReturnType<typeof Image>,
+        audio: createResolvedSpeech() as unknown as ReturnType<typeof Speech>,
+        // no model
+      } as any);
+
+      await expect(renderTalkingHead(element as any, ctx)).rejects.toThrow(
+        "TalkingHead requires 'model' prop",
+      );
+    } finally {
+      cleanupTempDir(cacheDir);
+    }
+  });
+
+  test("renders with pre-resolved image and audio (skips generation)", async () => {
+    const cacheDir = makeTempDir();
+    const counters = { imageCalls: 0, videoCalls: 0, speechCalls: 0 };
+
+    try {
+      const ctx = createContext(cacheDir, counters);
+
+      const resolvedImage = createResolvedImage();
+      const resolvedSpeech = createResolvedSpeech();
+
+      const element = TalkingHead({
+        image: resolvedImage as unknown as ReturnType<typeof Image>,
+        audio: resolvedSpeech as unknown as ReturnType<typeof Speech>,
+        model: createVideoModel(),
+      } as any);
+
+      const result = await renderTalkingHead(element as any, ctx);
+
+      // Should NOT have called generateImage (image was pre-resolved)
+      expect(counters.imageCalls).toBe(0);
+      // Should have called generateVideo (for lipsync)
+      expect(counters.videoCalls).toBe(1);
+      expect(result).toBeDefined();
+    } finally {
+      cleanupTempDir(cacheDir);
+    }
+  });
+
+  test("renders with lazy (non-awaited) image element", async () => {
+    const cacheDir = makeTempDir();
+    const counters = { imageCalls: 0, videoCalls: 0, speechCalls: 0 };
+
+    try {
+      const ctx = createContext(cacheDir, counters);
+
+      // Lazy image — not awaited, so it's a raw VargElement
+      const lazyImage = Image({
+        prompt: "young woman, casual outfit",
+        model: createImageModel(),
+      });
+
+      const resolvedSpeech = createResolvedSpeech();
+
+      const element = TalkingHead({
+        image: lazyImage,
+        audio: resolvedSpeech as unknown as ReturnType<typeof Speech>,
+        model: createVideoModel(),
+      } as any);
+
+      const result = await renderTalkingHead(element as any, ctx);
+
+      // Should have called generateImage (lazy image needed rendering)
+      expect(counters.imageCalls).toBe(1);
+      // Should have called generateVideo (for lipsync)
+      expect(counters.videoCalls).toBe(1);
+      expect(result).toBeDefined();
+    } finally {
+      cleanupTempDir(cacheDir);
+    }
+  });
+});
diff --git a/src/react/renderers/talking-head.ts b/src/react/renderers/talking-head.ts
new file mode 100644
index 00000000..b7792c42
--- /dev/null
+++ b/src/react/renderers/talking-head.ts
@@ -0,0 +1,105 @@
+import type { File } from "../../ai-sdk/file";
+import { ResolvedElement } from "../resolved-element";
+import type { TalkingHeadProps, VargElement } from "../types";
+import type { RenderContext } from "./context";
+import { renderImage } from "./image";
+import { renderSpeech } from "./speech";
+import { renderVideo } from "./video";
+
+/**
+ * Render a TalkingHead element into a video file.
+ *
+ * Pipeline:
+ * 1. Resolve the character image from `image` prop (VargElement or ResolvedElement)
+ * 2. Resolve the speech audio from `audio` prop (VargElement or ResolvedElement)
+ * 3. Generate a lipsync video via `model` (image + audio → video)
+ *
+ * The result is a video File suitable for use as a VideoLayer.
+ */
+export async function renderTalkingHead(
+  element: VargElement<"talking-head">,
+  ctx: RenderContext,
+): Promise<File> {
+  // If already resolved via `await TalkingHead(...)`, reuse the pre-generated file
+  if (element instanceof ResolvedElement) {
+    ctx.generatedFiles.push(element.meta.file);
+    return element.meta.file;
+  }
+
+  const props = element.props as TalkingHeadProps;
+
+  const model = props.model ?? ctx.defaults?.video;
+  if (!model) {
+    throw new Error(
+      "TalkingHead requires 'model' prop (or set defaults.video in render options)",
+    );
+  }
+
+  if (!props.image) {
+    throw new Error("TalkingHead requires 'image' prop (an Image element)");
+  }
+
+  if (!props.audio) {
+    throw new Error("TalkingHead requires 'audio' prop (a Speech element)");
+  }
+
+  // Step 1 & 2: Resolve character image and speech audio in parallel
+  const [characterFile, speechFile] = await Promise.all([
+    resolveImageProp(props.image, ctx),
+    resolveAudioProp(props.audio, ctx),
+  ]);
+
+  // Step 3: Generate lipsync video (image + audio → video)
+  const lipsyncModel = props.lipsyncModel ?? model;
+  const characterImageData = await characterFile.arrayBuffer();
+  const speechAudioData = await speechFile.arrayBuffer();
+
+  // Create a synthetic video element for the lipsync generation
+  const videoElement: VargElement<"video"> = {
+    type: "video",
+    props: {
+      prompt: {
+        images: [characterImageData],
+        audio: speechAudioData,
+      },
+      model: lipsyncModel,
+      keepAudio: true,
+      providerOptions: { fal: { resolution: props.resolution ?? "720p" } },
+    },
+    children: [],
+  };
+
+  return renderVideo(videoElement, ctx);
+}
+
+/**
+ * Resolve an image prop — either a pre-resolved ResolvedElement<"image">
+ * or a lazy VargElement<"image"> that needs rendering.
+ */
+async function resolveImageProp(
+  image: VargElement<"image">,
+  ctx: RenderContext,
+): Promise<File> {
+  if (image instanceof ResolvedElement) {
+    ctx.generatedFiles.push(image.meta.file);
+    return image.meta.file;
+  }
+
+  return renderImage(image, ctx);
+}
+
+/**
+ * Resolve an audio prop — either a pre-resolved ResolvedElement<"speech">
+ * or a lazy VargElement<"speech"> that needs rendering.
+ */
+async function resolveAudioProp(
+  audio: VargElement<"speech">,
+  ctx: RenderContext,
+): Promise<File> {
+  if (audio instanceof ResolvedElement) {
+    ctx.generatedFiles.push(audio.meta.file);
+    return audio.meta.file;
+  }
+
+  return renderSpeech(audio, ctx);
+}
diff --git a/src/react/resolve.ts b/src/react/resolve.ts
index 2df8dd7b..28600cd2 100644
--- a/src/react/resolve.ts
+++ b/src/react/resolve.ts
@@ -37,6 +37,7 @@ import type {
   ImageProps,
   MusicProps,
   SpeechProps,
+  TalkingHeadProps,
   VargElement,
 } from "./types";
 
@@ -748,3 +749,95 @@ export async function resolveMusicElement(
     duration,
   });
 }
+
+// ---------------------------------------------------------------------------
+// TalkingHead
+// ---------------------------------------------------------------------------
+/**
+ * Resolve a TalkingHead element by combining a pre-resolved image and speech
+ * into a lipsync video. Returns a ResolvedElement<"talking-head"> wrapping the
+ * final video.
+ *
+ * Pipeline:
+ * 1. Resolve the image from `image` prop (generate or reuse pre-resolved)
+ * 2. Resolve the speech from `audio` prop (generate or reuse pre-resolved)
+ * 3. Generate lipsync video from image + audio via `model`
+ */
+export async function resolveTalkingHeadElement(
+  element: VargElement<"talking-head">,
+  props: TalkingHeadProps,
+): Promise<ResolvedElement<"talking-head">> {
+  const model = props.model;
+  if (!model) {
+    throw new Error(
+      "await TalkingHead() requires 'model' prop for lipsync video generation",
+    );
+  }
+
+  if (!props.image) {
+    throw new Error(
+      "await TalkingHead() requires 'image' prop (an Image element).",
+    );
+  }
+
+  if (!props.audio) {
+    throw new Error(
+      "await TalkingHead() requires 'audio' prop (a Speech element).",
+    );
+  }
+
+  // Step 1: Resolve image — if it's a ResolvedElement, use its file directly;
+  // otherwise resolve the lazy Image element via generateImage.
+  const resolvedImage =
+    props.image instanceof ResolvedElement
+      ? props.image
+      : await resolveImageElement(props.image, props.image.props as ImageProps);
+  const characterBytes = new Uint8Array(await resolvedImage.file.arrayBuffer());
+
+  // Step 2: Resolve speech — same pattern.
+  const resolvedSpeech =
+    props.audio instanceof ResolvedElement
+      ? props.audio
+      : await resolveSpeechElement(
+          props.audio,
+          props.audio.props as SpeechProps,
+        );
+  const speechBytes = new Uint8Array(await resolvedSpeech.file.arrayBuffer());
+
+  // Step 3: Generate lipsync video (image + audio → video)
+  const lipsyncModel = props.lipsyncModel ?? model;
+  const generateVideo = getCachedGenerateVideo();
+
+  const { video } = await generateVideo({
+    model: lipsyncModel as Parameters<typeof generateVideoRaw>[0]["model"],
+    prompt: {
+      images: [characterBytes],
+      audio: speechBytes,
+    },
+    duration: 0, // duration determined by audio length
+  });
+
+  const mediaType = video.mimeType ?? "video/mp4";
+  const modelId =
+    typeof lipsyncModel === "string" ? lipsyncModel : lipsyncModel.modelId;
+
+  const promptLabel =
+    getTextContent(element.children) ?? "talking-head lipsync";
+
+  const file = File.fromGenerated({
+    uint8Array: video.uint8Array,
+    mediaType,
+    url: (video as { url?: string }).url,
+  }).withMetadata({
+    type: "video",
+    model: modelId,
+    prompt: `talking-head: ${promptLabel.slice(0, 100)}`,
+  });
+
+  const duration = await probeDuration(file);
+
+  return new ResolvedElement(element, {
+    file,
+    duration,
+  });
+}
diff --git a/src/react/types.ts b/src/react/types.ts
index 012c11c2..dd1e2756 100644
--- a/src/react/types.ts
+++ b/src/react/types.ts
@@ -198,16 +198,20 @@ export interface SpeechProps extends BaseProps, VolumeProps {
 }
 
 export interface TalkingHeadProps extends BaseProps {
-  character?: string;
-  src?: string;
-  voice?: string;
+  /** Pre-resolved or lazy image element to use as the character face. */
+  image?: VargElement<"image">;
+  /** Pre-resolved or lazy speech element to use as the audio track. */
+  audio?: VargElement<"speech">;
+  /** Lipsync video model (e.g. fal.videoModel("sync-v2-pro")). */
   model?: VideoModelV3;
+  /** Separate lipsync model override (defaults to `model`). */
   lipsyncModel?: VideoModelV3;
+  /** Video resolution for lipsync generation (default: "720p") */
+  resolution?: "480p" | "720p" | "1080p";
   position?:
     | Position
     | { left?: string; right?: string; top?: string; bottom?: string };
   size?: { width: string; height: string };
-  children?: string;
 }
 
 export interface TitleProps extends BaseProps {