diff --git a/README.md b/README.md
index 8b1a7942..759befc3 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ view inside the editor.
 
 ## Features
 
-- Supports streaming both iOS simulators and Android emulators
+- Supports streaming iOS simulators and Android emulators, including WebRTC audio
 - Full simulator control & inspection using private iOS accessibility APIs and Android UIAutomator - available using `simdeck` CLI
 - Real-time screen `describe` command using accessibility view tree - available in token-efficient format for agents
 - Profiling built-in: CPU, memory, disk writes, network throughput, hang signals, and stack sampling
diff --git a/docs/api/rest.md b/docs/api/rest.md
index 5e5828fa..14469902 100644
--- a/docs/api/rest.md
+++ b/docs/api/rest.md
@@ -173,7 +173,7 @@ Performance query parameters:
 | `GET`  | `/api/simulators/{udid}/control`      | Alias for input control WebSocket      |
 | `POST` | `/api/simulators/{udid}/refresh`      | Request a fresh frame or keyframe      |
 
-For normal clients, copy the browser behavior instead of hand-coding a raw decoder. The UI supports WebRTC first and H.264 WebSocket fallback.
+For normal clients, copy the browser behavior instead of hand-coding a raw decoder. The UI supports WebRTC first and H.264 WebSocket fallback. WebRTC carries H.264 video and, when the offer includes an audio receiver, an Opus simulator-audio track sourced from the selected simulator or emulator process tree. The H.264 WebSocket fallback is video-only.
 
 Minimal WebRTC request:
 
@@ -194,7 +194,16 @@ Response:
 ```json
 {
   "type": "answer",
-  "sdp": "v=0..."
+  "sdp": "v=0...",
+  "audio": {
+    "codec": "opus",
+    "sampleRate": 48000,
+    "channels": 2
+  },
+  "video": {
+    "width": 1179,
+    "height": 2556
+  }
 }
 ```
 
diff --git a/docs/guide/video.md b/docs/guide/video.md
index 372ca1b4..b1a4ee4c 100644
--- a/docs/guide/video.md
+++ b/docs/guide/video.md
@@ -4,6 +4,10 @@ SimDeck streams live device video to the browser. Local sessions default to high
 
 iOS simulator H.264 uses VideoToolbox for hardware encoding and x264 for software encoding.
 
+WebRTC streams also include simulator audio. The browser menu exposes a Sound
+toggle so viewers can keep playback muted until they want to hear the device.
+H.264 WebSocket fallback remains video-only.
+
 ## When encoding runs
 
 SimDeck starts encoding when a browser stream needs H.264 frames. The server
@@ -73,6 +77,17 @@ simdeck service restart --video-codec software --low-latency
 
 The browser tries WebRTC first. If WebRTC cannot render a frame, the UI can fall back to H.264 over WebSocket when the browser supports WebCodecs.
 
+Audio is carried on the WebRTC path using a browser-native Opus track. On
+macOS 14.2 and newer, SimDeck uses Core Audio process taps over the selected
+simulator or emulator process tree, then routes that tap through a private
+aggregate device into the WebRTC audio track. If macOS has not granted system
+audio recording access, video still streams and the server logs the
+audio-capture failure. While the tap is being read, Core Audio mutes the tapped
+simulator process at the hardware output; browser playback is controlled by the
+Sound toggle. Android emulators launched by SimDeck are started with host audio
+enabled, so restart older no-audio emulator processes before testing Android
+sound.
+
 Force a mode while debugging:
 
 ```text
diff --git a/packages/client/index.html b/packages/client/index.html
index be7538db..72c8f4b7 100644
--- a/packages/client/index.html
+++ b/packages/client/index.html
@@ -24,5 +24,29 @@
   <body>
     <div id="root"></div>
     <script type="module" src="/src/main.jsx"></script>
+    <script>
+      (() => {
+        const timeout = window.setTimeout(() => {
+          const root = document.getElementById("root");
+          if (!root || root.childElementCount > 0 || root.textContent.trim()) {
+            return;
+          }
+          const key = "simdeck-root-watchdog-reload-at";
+          const lastReloadAt = Number(sessionStorage.getItem(key) || "0");
+          if (Date.now() - lastReloadAt < 30000) {
+            return;
+          }
+          sessionStorage.setItem(key, String(Date.now()));
+          const url = new URL(window.location.href);
+          url.searchParams.set("_simdeckReload", String(Date.now()));
+          window.location.replace(url);
+        }, 4000);
+        window.addEventListener(
+          "beforeunload",
+          () => window.clearTimeout(timeout),
+          { once: true },
+        );
+      })();
+    </script>
   </body>
 </html>
diff --git a/packages/client/src/app/AppShell.tsx b/packages/client/src/app/AppShell.tsx
index a670cea6..a685842d 100644
--- a/packages/client/src/app/AppShell.tsx
+++ b/packages/client/src/app/AppShell.tsx
@@ -54,7 +54,10 @@ import {
   simulatorUsesInsetChromeButtons,
 } from "../features/simulators/simulatorDisplay";
 import { useSimulatorList } from "../features/simulators/useSimulatorList";
-import { sendWebRtcControlMessage } from "../features/stream/streamWorkerClient";
+import {
+  sendWebRtcControlMessage,
+  setActiveStreamAudioMuted,
+} from "../features/stream/streamWorkerClient";
 import type {
   StreamConfig,
   StreamEncoder,
@@ -560,6 +563,8 @@ export function AppShell({
   const [streamTransport, setStreamTransport] = useState<StreamTransport>(
     initialStreamTransportRef.current,
   );
+  const [streamAudioMuted, setStreamAudioMuted] = useState(true);
+  const streamAudioMutedRef = useRef(streamAudioMuted);
   const [streamConfigApplyKey, setStreamConfigApplyKey] = useState(0);
   const [streamConfigReady, setStreamConfigReady] = useState(false);
   const [touchIndicators, setTouchIndicators] = useState<TouchIndicator[]>([]);
@@ -812,6 +817,7 @@ export function AppShell({
     streamBackend,
     streamCanvasKey,
   } = useLiveStream({
+    audioMuted: streamAudioMuted,
     canvasElement: streamCanvasElement,
     paused: !streamConfigReady,
     remote: remoteStream,
@@ -877,6 +883,17 @@ export function AppShell({
     [remoteStream],
   );
 
+  const toggleStreamAudioMuted = useCallback(() => {
+    const next = !streamAudioMutedRef.current;
+    streamAudioMutedRef.current = next;
+    setActiveStreamAudioMuted(next);
+    setStreamAudioMuted(next);
+  }, []);
+
+  useEffect(() => {
+    streamAudioMutedRef.current = streamAudioMuted;
+  }, [streamAudioMuted]);
+
   useEffect(() => {
     if (
       !selectedSimulator ||
@@ -2931,6 +2948,7 @@ export function AppShell({
         onStreamFpsChange={updateStreamFps}
         onStreamQualityChange={updateStreamQuality}
         onStreamTransportChange={updateStreamTransport}
+        onToggleStreamAudioMuted={toggleStreamAudioMuted}
         onShutdown={() => {
           if (!selectedSimulator) {
             return;
@@ -2989,6 +3007,7 @@ export function AppShell({
           !selectedSimulatorTransitionKind,
         )}
         streamConfig={streamConfig}
+        streamAudioMuted={streamAudioMuted}
         streamTransport={streamTransport}
         simulatorMenuOpen={simulatorMenuOpen}
         simulatorMenuRef={simulatorMenuRef}
diff --git a/packages/client/src/features/accessibility/AccessibilityOverlay.test.ts b/packages/client/src/features/accessibility/AccessibilityOverlay.test.ts
index c9880849..2fe246f4 100644
--- a/packages/client/src/features/accessibility/AccessibilityOverlay.test.ts
+++ b/packages/client/src/features/accessibility/AccessibilityOverlay.test.ts
@@ -28,6 +28,15 @@ describe("accessibilityDomTagName", () => {
       }),
     ).toBe("simdeck-range-and-filter-bar");
   });
+
+  it("falls back when component metadata is not string-like", () => {
+    expect(
+      accessibilityDomTagName({
+        source: "in-app-inspector",
+        type: { kind: "Button" } as unknown as string,
+      }),
+    ).toBe("simdeck-element");
+  });
 });
 
 describe("AccessibilityOverlay", () => {
@@ -103,4 +112,42 @@ describe("AccessibilityOverlay", () => {
     expect(markup).not.toContain("; disabled");
     expect(markup).not.toContain(" title=");
   });
+
+  it("renders object-shaped accessibility metadata without crashing", () => {
+    const markup = renderToStaticMarkup(
+      createElement(AccessibilityOverlay, {
+        hoveredId: null,
+        roots: [
+          {
+            frame: { height: 844, width: 390, x: 0, y: 0 },
+            role: "application",
+            children: [
+              {
+                AXLabel: { localized: "Continue" } as unknown as string,
+                AXValue: 42 as unknown as string,
+                frame: { height: 48, width: 180, x: 105, y: 720 },
+                nativeScript: {
+                  testID: 123,
+                  type: { kind: "Button" },
+                },
+                placeholder: false as unknown as string,
+                source: "nativescript",
+                sourceLocation: {
+                  file: { path: "/app/app.component.ts" } as unknown as string,
+                  line: 12,
+                },
+                type: { kind: "Button" } as unknown as string,
+              },
+            ],
+          },
+        ],
+        selectedId: "",
+      }),
+    );
+
+    expect(markup).toContain("<simdeck-element");
+    expect(markup).toContain('data-test-id="123"');
+    expect(markup).toContain('data-simdeck-accessibility-value="42"');
+    expect(markup).not.toContain("[object Object]");
+  });
 });
diff --git a/packages/client/src/features/accessibility/AccessibilityOverlay.tsx b/packages/client/src/features/accessibility/AccessibilityOverlay.tsx
index 8e1adcae..9586bd8a 100644
--- a/packages/client/src/features/accessibility/AccessibilityOverlay.tsx
+++ b/packages/client/src/features/accessibility/AccessibilityOverlay.tsx
@@ -292,9 +292,12 @@ export function accessibilityDomTagName(node: AccessibilityNode): string {
   return `simdeck-${component}`;
 }
 
-function cleanTagPart(value: string | null | undefined): string | null {
-  const kebab = value
-    ?.trim()
+function cleanTagPart(value: unknown): string | null {
+  const text = cleanAccessibilityText(value);
+  if (!text) {
+    return null;
+  }
+  const kebab = text
     .replace(/([a-z0-9])([A-Z])/g, "$1-$2")
     .replace(/[^A-Za-z0-9]+/g, "-")
     .replace(/^-+|-+$/g, "")
@@ -441,8 +444,12 @@ function accessibilityStateSummary(
   return state.join(", ");
 }
 
-function cleanAccessibilityText(
-  value: string | null | undefined,
-): string | null {
-  return value?.trim() || null;
+function cleanAccessibilityText(value: unknown): string | null {
+  if (typeof value === "string") {
+    return value.trim() || null;
+  }
+  if (typeof value === "number" || typeof value === "boolean") {
+    return String(value);
+  }
+  return null;
 }
diff --git a/packages/client/src/features/accessibility/accessibilityTree.test.ts b/packages/client/src/features/accessibility/accessibilityTree.test.ts
index 5a84f43d..7a1a89a0 100644
--- a/packages/client/src/features/accessibility/accessibilityTree.test.ts
+++ b/packages/client/src/features/accessibility/accessibilityTree.test.ts
@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
 
 import type { AccessibilityNode } from "../../api/types";
 import {
+  accessibilityIdentifier,
   accessibilityKind,
   buildAccessibilityTree,
   findAccessibilityItemAtPoint,
@@ -216,6 +217,24 @@ describe("primaryAccessibilityText", () => {
       }),
     ).toBe("~/assets/album-midnight.jpg");
   });
+
+  it("ignores object-shaped accessibility text fields without crashing", () => {
+    const node: AccessibilityNode = {
+      AXIdentifier: { id: "continue-button" } as unknown as string,
+      AXLabel: { localized: "Continue" } as unknown as string,
+      AXUniqueId: { value: "unique" } as unknown as string,
+      AXValue: 42 as unknown as string,
+      role: { name: "button" } as unknown as string,
+      source: "in-app-inspector",
+      title: { value: "Continue" } as unknown as string,
+      type: { kind: "Button" } as unknown as string,
+    };
+
+    expect(() => buildAccessibilityTree([node])).not.toThrow();
+    expect(primaryAccessibilityText(node)).toBe("42");
+    expect(accessibilityIdentifier(node)).toBe("");
+    expect(accessibilityKind(node)).toBe("Element");
+  });
 });
 
 describe("findAccessibilityItemAtPoint", () => {
diff --git a/packages/client/src/features/accessibility/accessibilityTree.ts b/packages/client/src/features/accessibility/accessibilityTree.ts
index 5c1290fd..a6c99615 100644
--- a/packages/client/src/features/accessibility/accessibilityTree.ts
+++ b/packages/client/src/features/accessibility/accessibilityTree.ts
@@ -579,9 +579,14 @@ function frameContainsPoint(
   );
 }
 
-function cleanText(value: string | null | undefined): string | null {
-  const trimmed = value?.trim();
-  return trimmed ? trimmed : null;
+function cleanText(value: unknown): string | null {
+  if (typeof value === "string") {
+    return value.trim() || null;
+  }
+  if (typeof value === "number" || typeof value === "boolean") {
+    return String(value);
+  }
+  return null;
 }
 
 function displayAccessibilityKind(
diff --git a/packages/client/src/features/simulators/SimulatorMenu.tsx b/packages/client/src/features/simulators/SimulatorMenu.tsx
index 0347f7bf..c6c35e49 100644
--- a/packages/client/src/features/simulators/SimulatorMenu.tsx
+++ b/packages/client/src/features/simulators/SimulatorMenu.tsx
@@ -34,6 +34,7 @@ interface SimulatorMenuProps {
   onStreamFpsChange: (fps: StreamFps) => void;
   onStreamQualityChange: (quality: StreamQualityPreset) => void;
   onStreamTransportChange: (transport: StreamTransport) => void;
+  onToggleStreamAudioMuted: () => void;
   onToggleAppearance: () => void;
   onToggleDebug: () => void;
   onToggleMenu: () => void;
@@ -47,6 +48,7 @@ interface SimulatorMenuProps {
   showBootButton: boolean;
   showStopButton: boolean;
   streamConfig: StreamConfig;
+  streamAudioMuted: boolean;
   streamTransport: StreamTransport;
   touchOverlayVisible: boolean;
 }
@@ -74,6 +76,7 @@ export function SimulatorMenu({
   onStreamFpsChange,
   onStreamQualityChange,
   onStreamTransportChange,
+  onToggleStreamAudioMuted,
   onToggleAppearance,
   onToggleDebug,
   onToggleMenu,
@@ -87,6 +90,7 @@ export function SimulatorMenu({
   showBootButton,
   showStopButton,
   streamConfig,
+  streamAudioMuted,
   streamTransport,
   touchOverlayVisible,
 }: SimulatorMenuProps) {
@@ -200,6 +204,14 @@ export function SimulatorMenu({
                     )}
                   </select>
                 </label>
+                <label className="menu-toggle">
+                  <input
+                    checked={!streamAudioMuted}
+                    onChange={() => onToggleStreamAudioMuted()}
+                    type="checkbox"
+                  />
+                  <span>Sound</span>
+                </label>
               </div>
               <div className="menu-divider" />
               <div className="menu-actions">
diff --git a/packages/client/src/features/simulators/simulatorDisplay.test.ts b/packages/client/src/features/simulators/simulatorDisplay.test.ts
index 4965e490..40461ccc 100644
--- a/packages/client/src/features/simulators/simulatorDisplay.test.ts
+++ b/packages/client/src/features/simulators/simulatorDisplay.test.ts
@@ -30,6 +30,17 @@ describe("simulatorDisplay", () => {
     ).toBe("watchOS 26.0");
   });
 
+  it("ignores non-string runtime metadata", () => {
+    expect(
+      simulatorRuntimeLabel(
+        simulator({
+          runtimeIdentifier: { identifier: "unexpected" } as unknown as string,
+          runtimeName: null as unknown as string,
+        }),
+      ),
+    ).toBe("");
+  });
+
   it("enables native chrome for Apple Watch simulators", () => {
     expect(
       shouldRenderNativeChrome(
diff --git a/packages/client/src/features/simulators/simulatorDisplay.ts b/packages/client/src/features/simulators/simulatorDisplay.ts
index d1fa89bf..9cbaaf87 100644
--- a/packages/client/src/features/simulators/simulatorDisplay.ts
+++ b/packages/client/src/features/simulators/simulatorDisplay.ts
@@ -65,13 +65,14 @@ function simulatorMetadataText(simulator: SimulatorMetadata): string {
     simulator.runtimeName,
     simulator.runtimeIdentifier,
   ]
-    .filter(Boolean)
+    .map(metadataTextValue)
+    .filter((value): value is string => Boolean(value))
     .join(" ")
     .toLowerCase();
 }
 
-function formatRuntimeLabel(value: string | undefined): string | null {
-  const trimmed = value?.trim();
+function formatRuntimeLabel(value: unknown): string | null {
+  const trimmed = metadataTextValue(value);
   if (!trimmed) {
     return null;
   }
@@ -89,3 +90,13 @@ function formatRuntimeLabel(value: string | undefined): string | null {
   }
   return trimmed;
 }
+
+function metadataTextValue(value: unknown): string | null {
+  if (typeof value === "string") {
+    return value.trim() || null;
+  }
+  if (typeof value === "number" || typeof value === "boolean") {
+    return String(value);
+  }
+  return null;
+}
diff --git a/packages/client/src/features/stream/streamTypes.ts b/packages/client/src/features/stream/streamTypes.ts
index 4e9b6099..acee1ae2 100644
--- a/packages/client/src/features/stream/streamTypes.ts
+++ b/packages/client/src/features/stream/streamTypes.ts
@@ -1,6 +1,7 @@
 import type { Size } from "../viewport/types";
 
 export interface StreamConnectTarget {
+  audioMuted?: boolean;
   clientId?: string;
   platform?: string;
   remote?: boolean;
diff --git a/packages/client/src/features/stream/streamWorkerClient.ts b/packages/client/src/features/stream/streamWorkerClient.ts
index b49842a4..200e594e 100644
--- a/packages/client/src/features/stream/streamWorkerClient.ts
+++ b/packages/client/src/features/stream/streamWorkerClient.ts
@@ -97,6 +97,10 @@ export function sendWebRtcStreamControl(options: {
   );
 }
 
+export function setActiveStreamAudioMuted(muted: boolean) {
+  activeStreamClient?.setAudioMuted(muted);
+}
+
 function sendStreamQualityConfig(config: StreamConfig): boolean {
   const encoded = JSON.stringify({
     config: streamQualityPayload(config),
@@ -230,6 +234,7 @@ function compareVideoToImage(
 export function buildStreamTarget(
   udid: string,
   options: {
+    audioMuted?: boolean;
     clientId?: string;
     platform?: string;
     remote?: boolean;
@@ -238,6 +243,7 @@ export function buildStreamTarget(
   } = {},
 ): StreamConnectTarget {
   return {
+    audioMuted: options.audioMuted,
     clientId: options.clientId,
     platform: options.platform,
     remote: options.remote,
@@ -290,6 +296,7 @@ interface StreamClientBackend {
   disconnect(): void;
   applyStreamConfig?(config?: StreamConfig): void | Promise<void>;
   sendControl?(payload: unknown): boolean;
+  setAudioMuted?(muted: boolean): void;
 }
 
 export interface VisualArtifactSample {
@@ -389,6 +396,11 @@ interface WebCodecsVideoDecoderConstructor {
 }
 
 interface WebRtcAnswerPayload extends RTCSessionDescriptionInit {
+  audio?: {
+    channels?: number;
+    codec?: string;
+    sampleRate?: number;
+  };
   video?: {
     height?: number;
     width?: number;
@@ -1295,6 +1307,8 @@ function hexByte(byte: number): string {
 }
 
 class WebRtcStreamClient implements StreamClientBackend {
+  private audioElement: HTMLAudioElement | null = null;
+  private audioMuted = true;
   private animationFrame = 0;
   private canvas: HTMLCanvasElement | null = null;
   private canvasContext: CanvasRenderingContext2D | null = null;
@@ -1408,6 +1422,7 @@ class WebRtcStreamClient implements StreamClientBackend {
     this.shouldReconnect = true;
     this.remoteMode = Boolean(target.remote);
     this.streamTarget = target;
+    this.audioMuted = target.audioMuted ?? true;
     if (!wasReconnecting) {
       this.reconnectDelayMs = WEBRTC_RECONNECT_BASE_DELAY_MS;
     }
@@ -1435,6 +1450,14 @@ class WebRtcStreamClient implements StreamClientBackend {
       const useRgbaTransport = shouldUseLocalAndroidRgbaWebRtc(target);
       this.rgbaMode = useRgbaTransport;
       this.attachDiagnostics(peerConnection, target, generation);
+      const audioTransceiver = peerConnection.addTransceiver("audio", {
+        direction: "recvonly",
+      });
+      configureAudioReceiverCodecPreferences(audioTransceiver);
+      configureLowLatencyReceiver(
+        audioTransceiver.receiver,
+        receiverBufferSeconds(target),
+      );
       if (!useRgbaTransport) {
         this.startReceiverStatsPolling(peerConnection, target, generation);
         const transceiver = peerConnection.addTransceiver("video", {
@@ -1485,17 +1508,21 @@ class WebRtcStreamClient implements StreamClientBackend {
       };
 
       peerConnection.ontrack = (event) => {
-        if (useRgbaTransport) {
+        if (generation !== this.connectGeneration) {
           return;
         }
-        if (generation !== this.connectGeneration) {
+        if (event.track.kind === "audio") {
+          this.attachAudioTrack(event.track, generation);
+          return;
+        }
+        if (useRgbaTransport || event.track.kind !== "video") {
           return;
         }
         event.track.contentHint = "motion";
         for (const receiver of peerConnection.getReceivers()) {
           configureLowLatencyReceiver(receiver, receiverBufferSeconds(target));
         }
-        const stream = event.streams[0] ?? new MediaStream([event.track]);
+        const stream = new MediaStream([event.track]);
         const video = document.createElement("video");
         video.autoplay = true;
         video.className = "stream-video";
@@ -1606,6 +1633,19 @@ class WebRtcStreamClient implements StreamClientBackend {
     return sendDataChannelMessage(this.controlChannel, JSON.stringify(payload));
   }
 
+  setAudioMuted(muted: boolean) {
+    this.audioMuted = muted;
+    if (!this.audioElement) {
+      return;
+    }
+    this.audioElement.muted = muted;
+    if (!muted) {
+      void this.audioElement.play().catch(() => {
+        // Some browsers require the menu click that unmutes to happen in the page.
+      });
+    }
+  }
+
   async applyStreamConfig(config?: StreamConfig) {
     if (!config) {
       return;
@@ -1703,6 +1743,12 @@ class WebRtcStreamClient implements StreamClientBackend {
       this.video.remove();
     }
     this.video = null;
+    this.audioElement?.pause();
+    if (this.audioElement) {
+      this.audioElement.srcObject = null;
+      this.audioElement.remove();
+    }
+    this.audioElement = null;
     this.reportedVideoHeight = 0;
     this.reportedVideoWidth = 0;
     this.controlChannel?.close();
@@ -2122,6 +2168,36 @@ class WebRtcStreamClient implements StreamClientBackend {
     }
   }
 
+  private attachAudioTrack(track: MediaStreamTrack, generation: number) {
+    this.audioElement?.pause();
+    if (this.audioElement) {
+      this.audioElement.srcObject = null;
+      this.audioElement.remove();
+    }
+    const audio = document.createElement("audio");
+    audio.autoplay = true;
+    audio.muted = this.audioMuted;
+    audio.preload = "auto";
+    audio.srcObject = new MediaStream([track]);
+    audio.style.display = "none";
+    document.body.appendChild(audio);
+    this.audioElement = audio;
+    const startPlayback = () => {
+      if (
+        generation !== this.connectGeneration ||
+        audio !== this.audioElement
+      ) {
+        return;
+      }
+      void audio.play().catch(() => {
+        // Muted autoplay is best effort; unmuting from the menu retries playback.
+      });
+    };
+    audio.addEventListener("loadedmetadata", startPlayback);
+    audio.addEventListener("canplay", startPlayback);
+    startPlayback();
+  }
+
   private attachRgbaDataChannel(channel: RTCDataChannel, generation: number) {
     this.rgbaChannel?.close();
     this.rgbaChannel = channel;
@@ -2756,6 +2832,26 @@ function configureReceiverCodecPreferences(transceiver: RTCRtpTransceiver) {
   ]);
 }
 
+function configureAudioReceiverCodecPreferences(
+  transceiver: RTCRtpTransceiver,
+) {
+  if (!transceiver.setCodecPreferences) {
+    return;
+  }
+  const capabilities = RTCRtpReceiver.getCapabilities("audio");
+  const codecs = capabilities?.codecs ?? [];
+  const preferred = codecs.filter(
+    (codec) => codec.mimeType.toLowerCase() === "audio/opus",
+  );
+  if (preferred.length === 0) {
+    return;
+  }
+  transceiver.setCodecPreferences([
+    ...preferred,
+    ...codecs.filter((codec) => codec.mimeType.toLowerCase() !== "audio/opus"),
+  ]);
+}
+
 function safariBaselineH264Offer(
   offer: RTCSessionDescriptionInit,
 ): RTCSessionDescriptionInit {
@@ -3033,6 +3129,10 @@ export class StreamWorkerClient {
     );
   }
 
+  setAudioMuted(muted: boolean) {
+    this.backend?.setAudioMuted?.(muted);
+  }
+
   applyStreamConfig(config?: StreamConfig) {
     try {
       const result = this.backend?.applyStreamConfig?.(config);
diff --git a/packages/client/src/features/stream/useLiveStream.ts b/packages/client/src/features/stream/useLiveStream.ts
index 77990731..350c4f53 100644
--- a/packages/client/src/features/stream/useLiveStream.ts
+++ b/packages/client/src/features/stream/useLiveStream.ts
@@ -28,6 +28,7 @@ const CLIENT_TELEMETRY_ID_STORAGE_KEY = "simdeck.streamClientId";
 const VISUAL_ARTIFACT_TELEMETRY_INTERVAL_MS = 30000;
 
 interface UseLiveStreamOptions {
+  audioMuted?: boolean;
   canvasElement: HTMLCanvasElement | null;
   paused?: boolean;
   remote?: boolean;
@@ -108,6 +109,7 @@ function isViewerForeground(canvasVisible: boolean): boolean {
 }
 
 export function useLiveStream({
+  audioMuted = true,
   canvasElement,
   paused = false,
   remote = false,
@@ -370,6 +372,7 @@ export function useLiveStream({
     workerClient.connect(
       buildStreamTarget(simulator.udid, {
         clientId: clientTelemetryIdRef.current,
+        audioMuted,
         platform: simulator.platform,
         remote,
         streamConfig,
@@ -387,8 +390,13 @@ export function useLiveStream({
     paused,
     remote,
     streamTransport,
+    audioMuted,
   ]);
 
+  useEffect(() => {
+    workerClientRef.current?.setAudioMuted(audioMuted);
+  }, [audioMuted]);
+
   useEffect(() => {
     if (!simulator?.udid || paused) {
       return;
diff --git a/packages/client/src/features/toolbar/Toolbar.tsx b/packages/client/src/features/toolbar/Toolbar.tsx
index 128eb85a..ad72b4d9 100644
--- a/packages/client/src/features/toolbar/Toolbar.tsx
+++ b/packages/client/src/features/toolbar/Toolbar.tsx
@@ -51,6 +51,7 @@ interface ToolbarProps {
   onStreamFpsChange: (fps: StreamFps) => void;
   onStreamQualityChange: (quality: StreamQualityPreset) => void;
   onStreamTransportChange: (transport: StreamTransport) => void;
+  onToggleStreamAudioMuted: () => void;
   onToggleAppearance: () => void;
   onToggleDebug: () => void;
   onToggleDevTools: () => void;
@@ -71,6 +72,7 @@ interface ToolbarProps {
   showBootButton: boolean;
   showStopButton: boolean;
   streamConfig: StreamConfig;
+  streamAudioMuted: boolean;
   streamTransport: StreamTransport;
   touchOverlayVisible: boolean;
   menuOpen: boolean;
@@ -114,6 +116,7 @@ export function Toolbar({
   onStreamFpsChange,
   onStreamQualityChange,
   onStreamTransportChange,
+  onToggleStreamAudioMuted,
   onToggleAppearance,
   onToggleDebug,
   onToggleDevTools,
@@ -133,6 +136,7 @@ export function Toolbar({
   showBootButton,
   showStopButton,
   streamConfig,
+  streamAudioMuted,
   streamTransport,
   simulatorMenuOpen,
   simulatorMenuRef,
@@ -194,6 +198,7 @@ export function Toolbar({
           onStreamFpsChange={onStreamFpsChange}
           onStreamQualityChange={onStreamQualityChange}
           onStreamTransportChange={onStreamTransportChange}
+          onToggleStreamAudioMuted={onToggleStreamAudioMuted}
           onToggleAppearance={onToggleAppearance}
           onToggleDebug={onToggleDebug}
           onToggleMenu={onToggleMenu}
@@ -208,6 +213,7 @@ export function Toolbar({
           showStopButton={showStopButton}
           canInstallApp={canInstallApp}
           streamConfig={streamConfig}
+          streamAudioMuted={streamAudioMuted}
           streamTransport={streamTransport}
           touchOverlayVisible={touchOverlayVisible}
         />
diff --git a/packages/client/src/styles/components.css b/packages/client/src/styles/components.css
index 7fb17266..2c7ca799 100644
--- a/packages/client/src/styles/components.css
+++ b/packages/client/src/styles/components.css
@@ -413,6 +413,22 @@
   text-transform: uppercase;
 }
 
+.menu-toggle {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  min-height: 30px;
+  color: var(--text);
+  font-size: 12px;
+}
+
+.menu-toggle input {
+  width: 15px;
+  height: 15px;
+  flex: 0 0 auto;
+  accent-color: var(--accent);
+}
+
 .menu-select {
   width: 100%;
   height: 32px;
diff --git a/packages/server/build.rs b/packages/server/build.rs
index 7ecccf1d..6a9ca7c4 100644
--- a/packages/server/build.rs
+++ b/packages/server/build.rs
@@ -101,7 +101,9 @@ fn main() {
         "Foundation",
         "Accelerate",
         "AppKit",
+        "AudioToolbox",
         "AVFoundation",
+        "CoreAudio",
         "CoreImage",
         "CoreGraphics",
         "CoreMedia",
diff --git a/packages/server/native/bridge/XCWNativeBridge.h b/packages/server/native/bridge/XCWNativeBridge.h
index 35ffc647..cd6d6fca 100644
--- a/packages/server/native/bridge/XCWNativeBridge.h
+++ b/packages/server/native/bridge/XCWNativeBridge.h
@@ -28,7 +28,15 @@ typedef struct xcw_native_frame {
     xcw_native_shared_bytes data;
 } xcw_native_frame;
 
+typedef struct xcw_native_audio_sample {
+    uint64_t timestamp_us;
+    uint32_t sample_rate;
+    uint16_t channels;
+    xcw_native_shared_bytes data;
+} xcw_native_audio_sample;
+
 typedef void (*xcw_native_frame_callback)(const xcw_native_frame * _Nonnull frame, void * _Nullable user_data);
+typedef void (*xcw_native_audio_callback)(const xcw_native_audio_sample * _Nonnull sample, void * _Nullable user_data);
 
 void xcw_native_initialize_app(void);
 void xcw_native_run_main_loop_slice(double duration_seconds);
@@ -109,6 +117,10 @@ void xcw_native_h264_encoder_destroy(void * _Nullable handle);
 bool xcw_native_h264_encoder_encode_rgba(void * _Nonnull handle, const uint8_t * _Nonnull rgba, size_t length, uint32_t width, uint32_t height, uint64_t timestamp_us, char * _Nullable * _Nullable error_message);
 void xcw_native_h264_encoder_request_keyframe(void * _Nonnull handle);
 
+void * _Nullable xcw_native_audio_capture_create(const int32_t * _Nullable process_ids, size_t process_count, xcw_native_audio_callback _Nullable callback, void * _Nullable user_data, char * _Nullable * _Nullable error_message);
+bool xcw_native_audio_capture_update_processes(void * _Nonnull handle, const int32_t * _Nullable process_ids, size_t process_count, char * _Nullable * _Nullable error_message);
+void xcw_native_audio_capture_destroy(void * _Nullable handle);
+
 void xcw_native_free_string(char * _Nullable value);
 void xcw_native_free_bytes(xcw_native_owned_bytes bytes);
 void xcw_native_release_shared_bytes(xcw_native_shared_bytes bytes);
diff --git a/packages/server/native/bridge/XCWNativeBridge.m b/packages/server/native/bridge/XCWNativeBridge.m
index 1c2746d7..8292ce6b 100644
--- a/packages/server/native/bridge/XCWNativeBridge.m
+++ b/packages/server/native/bridge/XCWNativeBridge.m
@@ -8,8 +8,15 @@
 #import "XCWSimctl.h"
 
 #import <AppKit/AppKit.h>
+#import <AudioToolbox/AudioToolbox.h>
+#import <CoreAudio/CoreAudio.h>
+#import <CoreAudio/AudioHardwareTapping.h>
+#import <CoreAudio/CATapDescription.h>
 #import <CoreFoundation/CoreFoundation.h>
+#import <CoreMedia/CoreMedia.h>
 #import <CoreVideo/CoreVideo.h>
+#include <math.h>
+#include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -315,6 +322,820 @@ - (void)invalidate {
 
 @end
 
+static NSString *XCWAudioDictionaryKey(const char *key) {
+    return [NSString stringWithUTF8String:key] ?: @"";
+}
+
+static NSString *XCWAudioOSStatusString(OSStatus status) {
+    UInt32 code = CFSwapInt32HostToBig((UInt32)status);
+    char text[5] = {0};
+    memcpy(text, &code, 4);
+    BOOL printable = YES;
+    for (NSUInteger index = 0; index < 4; index++) {
+        if (text[index] < 32 || text[index] > 126) {
+            printable = NO;
+            break;
+        }
+    }
+    if (printable) {
+        return [NSString stringWithFormat:@"%d ('%s')", (int)status, text];
+    }
+    return [NSString stringWithFormat:@"%d", (int)status];
+}
+
+static NSError *XCWAudioCaptureError(NSInteger code, NSString *description) {
+    return [NSError errorWithDomain:@"SimDeck.AudioCapture"
+                               code:code
+                           userInfo:@{ NSLocalizedDescriptionKey: description ?: @"Audio capture failed." }];
+}
+
+static NSError *XCWAudioCaptureStatusError(NSInteger code, NSString *operation, OSStatus status) {
+    return XCWAudioCaptureError(code, [NSString stringWithFormat:@"%@ failed with OSStatus %@.", operation, XCWAudioOSStatusString(status)]);
+}
+
+static const uint32_t XCWOpusSampleRate = 48000;
+static const uint16_t XCWOpusChannels = 2;
+static const UInt32 XCWOpusFramesPerPacket = 960;
+static const UInt32 XCWOpusBitRate = 96000;
+static const UInt32 XCWOpusFallbackMaxPacketBytes = 1500;
+static const NSUInteger XCWAudioProcessStableRefreshes = 3;
+static const OSStatus XCWAudioConverterNoDataStatus = -1;
+
+static int16_t XCWClampPCM16(double value) {
+    if (!isfinite(value)) {
+        return 0;
+    }
+    if (value <= -1.0) {
+        return INT16_MIN;
+    }
+    if (value >= 1.0) {
+        return INT16_MAX;
+    }
+    return (int16_t)lrint(value * 32767.0);
+}
+
+static int16_t XCWReadPCM16Sample(const AudioBufferList *bufferList,
+                                  const AudioStreamBasicDescription *asbd,
+                                  NSUInteger frame,
+                                  NSUInteger channel) {
+    if (bufferList == NULL || asbd == NULL || bufferList->mNumberBuffers == 0) {
+        return 0;
+    }
+
+    const UInt32 bitsPerChannel = asbd->mBitsPerChannel;
+    const NSUInteger bytesPerSample = MAX((NSUInteger)bitsPerChannel / 8, 1);
+    const BOOL nonInterleaved = (asbd->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0;
+    const BOOL isFloat = (asbd->mFormatFlags & kAudioFormatFlagIsFloat) != 0;
+    const BOOL isSigned = (asbd->mFormatFlags & kAudioFormatFlagIsSignedInteger) != 0;
+    const BOOL isBigEndian = (asbd->mFormatFlags & kAudioFormatFlagIsBigEndian) != 0;
+    const NSUInteger sourceChannels = MAX((NSUInteger)asbd->mChannelsPerFrame, 1);
+    const NSUInteger bufferIndex = nonInterleaved
+        ? MIN(channel, (NSUInteger)bufferList->mNumberBuffers - 1)
+        : 0;
+    const NSUInteger channelInBuffer = nonInterleaved ? 0 : MIN(channel, sourceChannels - 1);
+    const AudioBuffer buffer = bufferList->mBuffers[bufferIndex];
+    if (buffer.mData == NULL || buffer.mDataByteSize == 0) {
+        return 0;
+    }
+
+    const NSUInteger fallbackBytesPerFrame = bytesPerSample * (nonInterleaved ? 1 : sourceChannels);
+    const NSUInteger bytesPerFrame = MAX((NSUInteger)asbd->mBytesPerFrame, fallbackBytesPerFrame);
+    const NSUInteger offset = frame * bytesPerFrame + channelInBuffer * bytesPerSample;
+    if (offset + bytesPerSample > buffer.mDataByteSize) {
+        return 0;
+    }
+
+    const uint8_t *sample = (const uint8_t *)buffer.mData + offset;
+    if (isFloat && bytesPerSample == sizeof(float)) {
+        float value = 0.0f;
+        memcpy(&value, sample, sizeof(value));
+        return XCWClampPCM16((double)value);
+    }
+    if (isFloat && bytesPerSample == sizeof(double)) {
+        double value = 0.0;
+        memcpy(&value, sample, sizeof(value));
+        return XCWClampPCM16(value);
+    }
+    if (bytesPerSample == sizeof(int16_t)) {
+        uint16_t raw = 0;
+        memcpy(&raw, sample, sizeof(raw));
+        if (isBigEndian) {
+            raw = CFSwapInt16BigToHost(raw);
+        }
+        return (int16_t)raw;
+    }
+    if (bytesPerSample == sizeof(int32_t)) {
+        uint32_t raw = 0;
+        memcpy(&raw, sample, sizeof(raw));
+        if (isBigEndian) {
+            raw = CFSwapInt32BigToHost(raw);
+        }
+        return (int16_t)(((int32_t)raw) >> 16);
+    }
+    if (bytesPerSample == sizeof(uint8_t)) {
+        if (isSigned) {
+            return (int16_t)(((int8_t)sample[0]) << 8);
+        }
+        return (int16_t)(((int)sample[0] - 128) << 8);
+    }
+
+    return 0;
+}
+
+static NSUInteger XCWAudioFrameCount(const AudioBufferList *bufferList,
+                                     const AudioStreamBasicDescription *asbd) {
+    if (bufferList == NULL || asbd == NULL || bufferList->mNumberBuffers == 0) {
+        return 0;
+    }
+    const AudioBuffer buffer = bufferList->mBuffers[0];
+    if (buffer.mData == NULL || buffer.mDataByteSize == 0) {
+        return 0;
+    }
+    const NSUInteger bytesPerSample = MAX((NSUInteger)asbd->mBitsPerChannel / 8, 1);
+    const BOOL nonInterleaved = (asbd->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0;
+    const NSUInteger sourceChannels = MAX((NSUInteger)asbd->mChannelsPerFrame, 1);
+    const NSUInteger fallbackBytesPerFrame = bytesPerSample * (nonInterleaved ? 1 : sourceChannels);
+    const NSUInteger bytesPerFrame = MAX((NSUInteger)asbd->mBytesPerFrame, fallbackBytesPerFrame);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+    return (NSUInteger)buffer.mDataByteSize / bytesPerFrame;
+}
+
+static NSData *XCWPCM16InterleavedDataFromAudioBufferList(const AudioBufferList *bufferList,
+                                                          const AudioStreamBasicDescription *asbd,
+                                                          uint32_t *sampleRate,
+                                                          uint16_t *channels) {
+    if (bufferList == NULL || asbd == NULL || asbd->mFormatID != kAudioFormatLinearPCM) {
+        return nil;
+    }
+    const NSUInteger frameCount = XCWAudioFrameCount(bufferList, asbd);
+    const NSUInteger sourceChannels = MAX((NSUInteger)asbd->mChannelsPerFrame, 1);
+    const NSUInteger outputChannels = MIN(sourceChannels, (NSUInteger)2);
+    if (frameCount == 0 || outputChannels == 0) {
+        return nil;
+    }
+
+    NSMutableData *output = [NSMutableData dataWithLength:frameCount * outputChannels * sizeof(int16_t)];
+    int16_t *outputSamples = (int16_t *)output.mutableBytes;
+    for (NSUInteger frame = 0; frame < frameCount; frame++) {
+        for (NSUInteger channel = 0; channel < outputChannels; channel++) {
+            outputSamples[frame * outputChannels + channel] = XCWReadPCM16Sample(bufferList, asbd, frame, channel);
+        }
+    }
+
+    if (sampleRate != NULL) {
+        *sampleRate = (uint32_t)llround(asbd->mSampleRate > 0 ? asbd->mSampleRate : 48000.0);
+    }
+    if (channels != NULL) {
+        *channels = (uint16_t)outputChannels;
+    }
+    return output;
+}
+
+static uint64_t XCWAudioTimestampUS(const AudioTimeStamp *timeStamp) {
+    if (timeStamp != NULL && (timeStamp->mFlags & kAudioTimeStampHostTimeValid) != 0 && timeStamp->mHostTime != 0) {
+        return AudioConvertHostTimeToNanos(timeStamp->mHostTime) / 1000;
+    }
+    return (uint64_t)llround([[NSDate date] timeIntervalSince1970] * 1000000.0);
+}
+
+static AudioObjectID XCWAudioProcessObjectIDForPID(pid_t pid) {
+    if (pid <= 0) {
+        return kAudioObjectUnknown;
+    }
+    AudioObjectPropertyAddress address = {
+        .mSelector = kAudioHardwarePropertyTranslatePIDToProcessObject,
+        .mScope = kAudioObjectPropertyScopeGlobal,
+        .mElement = kAudioObjectPropertyElementMain,
+    };
+    AudioObjectID processObjectID = kAudioObjectUnknown;
+    UInt32 size = sizeof(processObjectID);
+    OSStatus status = AudioObjectGetPropertyData(kAudioObjectSystemObject,
+                                                 &address,
+                                                 sizeof(pid),
+                                                 &pid,
+                                                 &size,
+                                                 &processObjectID);
+    if (status != noErr) {
+        return kAudioObjectUnknown;
+    }
+    return processObjectID;
+}
+
+static NSArray<NSNumber *> *XCWAudioProcessObjectIDsForProcessIDs(const int32_t *processIDs,
+                                                                  size_t processCount) {
+    NSMutableSet<NSNumber *> *seen = [NSMutableSet set];
+    NSMutableArray<NSNumber *> *objects = [NSMutableArray array];
+    for (size_t index = 0; index < processCount; index++) {
+        pid_t pid = (pid_t)processIDs[index];
+        if (pid <= 0) {
+            continue;
+        }
+        AudioObjectID objectID = XCWAudioProcessObjectIDForPID(pid);
+        if (objectID == kAudioObjectUnknown) {
+            continue;
+        }
+        NSNumber *boxed = @(objectID);
+        if ([seen containsObject:boxed]) {
+            continue;
+        }
+        [seen addObject:boxed];
+        [objects addObject:boxed];
+    }
+    [objects sortUsingSelector:@selector(compare:)];
+    return objects;
+}
+
+static CATapDescription *XCWAudioTapDescription(NSArray<NSNumber *> *processObjectIDs) API_AVAILABLE(macos(14.2)) {
+    CATapDescription *description = [[CATapDescription alloc] initStereoMixdownOfProcesses:processObjectIDs];
+    description.name = @"SimDeck Simulator Audio";
+    description.privateTap = YES;
+    description.muteBehavior = CATapMutedWhenTapped;
+    description.mixdown = YES;
+    description.mono = NO;
+    description.exclusive = NO;
+    return description;
+}
+
+static NSString *XCWAudioTapUID(AudioObjectID tapID, NSError * _Nullable __autoreleasing *error) {
+    AudioObjectPropertyAddress address = {
+        .mSelector = kAudioTapPropertyUID,
+        .mScope = kAudioObjectPropertyScopeGlobal,
+        .mElement = kAudioObjectPropertyElementMain,
+    };
+    CFStringRef uid = NULL;
+    UInt32 size = sizeof(uid);
+    OSStatus status = AudioObjectGetPropertyData(tapID, &address, 0, NULL, &size, &uid);
+    if (status != noErr || uid == NULL) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(22, @"Read Core Audio tap UID", status);
+        }
+        return nil;
+    }
+    return CFBridgingRelease(uid);
+}
+
+static BOOL XCWAudioGetObjectStreamFormat(AudioObjectID objectID,
+                                          AudioObjectPropertySelector selector,
+                                          AudioObjectPropertyScope scope,
+                                          AudioStreamBasicDescription *asbd) {
+    if (asbd == NULL || objectID == kAudioObjectUnknown) {
+        return NO;
+    }
+    AudioObjectPropertyAddress address = {
+        .mSelector = selector,
+        .mScope = scope,
+        .mElement = kAudioObjectPropertyElementMain,
+    };
+    UInt32 size = sizeof(*asbd);
+    OSStatus status = AudioObjectGetPropertyData(objectID, &address, 0, NULL, &size, asbd);
+    return status == noErr && asbd->mSampleRate > 0 && asbd->mChannelsPerFrame > 0;
+}
+
+typedef struct XCWOpusInputContext {
+    const uint8_t *bytes;
+    UInt32 byteCount;
+    UInt32 bytesPerFrame;
+    UInt16 channels;
+    UInt32 consumedBytes;
+} XCWOpusInputContext;
+
+static OSStatus XCWOpusEncoderInputProc(AudioConverterRef inAudioConverter,
+                                        UInt32 *ioNumberDataPackets,
+                                        AudioBufferList *ioData,
+                                        AudioStreamPacketDescription **outDataPacketDescription,
+                                        void *inUserData) {
+    (void)inAudioConverter;
+    if (outDataPacketDescription != NULL) {
+        *outDataPacketDescription = NULL;
+    }
+    if (ioNumberDataPackets == NULL || ioData == NULL || inUserData == NULL) {
+        return paramErr;
+    }
+    XCWOpusInputContext *context = (XCWOpusInputContext *)inUserData;
+    if (context->bytes == NULL || context->bytesPerFrame == 0 || context->consumedBytes >= context->byteCount) {
+        *ioNumberDataPackets = 0;
+        return XCWAudioConverterNoDataStatus;
+    }
+
+    UInt32 availableBytes = context->byteCount - context->consumedBytes;
+    UInt32 availablePackets = availableBytes / context->bytesPerFrame;
+    UInt32 packets = MIN(*ioNumberDataPackets, availablePackets);
+    if (packets == 0) {
+        *ioNumberDataPackets = 0;
+        return XCWAudioConverterNoDataStatus;
+    }
+
+    UInt32 bytes = packets * context->bytesPerFrame;
+    ioData->mNumberBuffers = 1;
+    ioData->mBuffers[0].mNumberChannels = context->channels;
+    ioData->mBuffers[0].mDataByteSize = bytes;
+    ioData->mBuffers[0].mData = (void *)(context->bytes + context->consumedBytes);
+    context->consumedBytes += bytes;
+    *ioNumberDataPackets = packets;
+    return noErr;
+}
+
+@interface XCWOpusAudioEncoder : NSObject
+
+@property (nonatomic, readonly) uint16_t channels;
+
+- (NSArray<NSData *> *)encodePCM:(NSData *)pcm
+                      sampleRate:(uint32_t)sampleRate
+                        channels:(uint16_t)channels
+                           error:(NSError * _Nullable __autoreleasing *)error;
+- (void)invalidate;
+
+@end
+
+@implementation XCWOpusAudioEncoder {
+    AudioConverterRef _converter;
+    NSMutableData *_pendingPCM;
+    uint32_t _inputSampleRate;
+    uint16_t _inputChannels;
+    UInt32 _inputBytesPerFrame;
+    UInt32 _maxOutputPacketSize;
+    NSUInteger _inputFramesPerOpusPacket;
+}
+
+- (instancetype)init {
+    self = [super init];
+    if (self == nil) {
+        return nil;
+    }
+    _pendingPCM = [NSMutableData data];
+    _channels = XCWOpusChannels;
+    return self;
+}
+
+- (void)dealloc {
+    [self invalidate];
+}
+
+- (NSArray<NSData *> *)encodePCM:(NSData *)pcm
+                      sampleRate:(uint32_t)sampleRate
+                        channels:(uint16_t)channels
+                           error:(NSError * _Nullable __autoreleasing *)error {
+    if (pcm.length == 0 || sampleRate == 0 || channels == 0) {
+        return @[];
+    }
+    if (_converter == NULL || _inputSampleRate != sampleRate || _inputChannels != channels) {
+        [self invalidate];
+        if (![self configureWithSampleRate:sampleRate channels:channels error:error]) {
+            return @[];
+        }
+    }
+
+    [_pendingPCM appendData:pcm];
+    NSMutableArray<NSData *> *packets = [NSMutableArray array];
+    while ([self pendingFrameCount] >= _inputFramesPerOpusPacket) {
+        NSData *packet = [self encodeNextPacket:error];
+        if (packet == nil) {
+            break;
+        }
+        if (packet.length > 0) {
+            [packets addObject:packet];
+        }
+    }
+    return packets;
+}
+
+- (BOOL)configureWithSampleRate:(uint32_t)sampleRate
+                       channels:(uint16_t)channels
+                          error:(NSError * _Nullable __autoreleasing *)error {
+    _inputSampleRate = sampleRate;
+    _inputChannels = channels;
+    _inputBytesPerFrame = MAX((UInt32)channels * (UInt32)sizeof(int16_t), 1);
+    _inputFramesPerOpusPacket = MAX((NSUInteger)llround(((double)sampleRate * (double)XCWOpusFramesPerPacket) / (double)XCWOpusSampleRate), (NSUInteger)1);
+
+    AudioStreamBasicDescription input = {0};
+    input.mSampleRate = sampleRate;
+    input.mFormatID = kAudioFormatLinearPCM;
+    input.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
+    input.mBytesPerPacket = _inputBytesPerFrame;
+    input.mFramesPerPacket = 1;
+    input.mBytesPerFrame = _inputBytesPerFrame;
+    input.mChannelsPerFrame = channels;
+    input.mBitsPerChannel = 16;
+
+    AudioStreamBasicDescription output = {0};
+    output.mSampleRate = XCWOpusSampleRate;
+    output.mFormatID = kAudioFormatOpus;
+    output.mChannelsPerFrame = XCWOpusChannels;
+    output.mFramesPerPacket = XCWOpusFramesPerPacket;
+
+    OSStatus status = AudioConverterNew(&input, &output, &_converter);
+    if (status != noErr || _converter == NULL) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(31, @"Create Core Audio Opus encoder", status);
+        }
+        _converter = NULL;
+        return NO;
+    }
+
+    UInt32 bitRate = XCWOpusBitRate;
+    (void)AudioConverterSetProperty(_converter, kAudioConverterEncodeBitRate, sizeof(bitRate), &bitRate);
+
+    _maxOutputPacketSize = 0;
+    UInt32 propertySize = sizeof(_maxOutputPacketSize);
+    status = AudioConverterGetProperty(_converter,
+                                       kAudioConverterPropertyMaximumOutputPacketSize,
+                                       &propertySize,
+                                       &_maxOutputPacketSize);
+    if (status != noErr || _maxOutputPacketSize == 0) {
+        _maxOutputPacketSize = XCWOpusFallbackMaxPacketBytes;
+    }
+    _maxOutputPacketSize = MIN(MAX(_maxOutputPacketSize, (UInt32)256), (UInt32)4096);
+    [_pendingPCM setLength:0];
+    return YES;
+}
+
+- (NSUInteger)pendingFrameCount {
+    if (_inputBytesPerFrame == 0) {
+        return 0;
+    }
+    return _pendingPCM.length / _inputBytesPerFrame;
+}
+
+- (NSData *)encodeNextPacket:(NSError * _Nullable __autoreleasing *)error {
+    if (_converter == NULL || _inputBytesPerFrame == 0 || _inputFramesPerOpusPacket == 0) {
+        return nil;
+    }
+    const NSUInteger inputBytes = MIN(_pendingPCM.length, _inputFramesPerOpusPacket * (NSUInteger)_inputBytesPerFrame);
+    if (inputBytes == 0 || inputBytes > UINT32_MAX) {
+        return nil;
+    }
+
+    XCWOpusInputContext context = {
+        .bytes = (const uint8_t *)_pendingPCM.bytes,
+        .byteCount = (UInt32)inputBytes,
+        .bytesPerFrame = _inputBytesPerFrame,
+        .channels = _inputChannels,
+        .consumedBytes = 0,
+    };
+    NSMutableData *output = [NSMutableData dataWithLength:_maxOutputPacketSize];
+    AudioBufferList outputBuffer = {
+        .mNumberBuffers = 1,
+        .mBuffers = {
+            {
+                .mNumberChannels = XCWOpusChannels,
+                .mDataByteSize = _maxOutputPacketSize,
+                .mData = output.mutableBytes,
+            },
+        },
+    };
+    UInt32 outputPackets = 1;
+    AudioStreamPacketDescription packetDescription = {0};
+    OSStatus status = AudioConverterFillComplexBuffer(_converter,
+                                                      XCWOpusEncoderInputProc,
+                                                      &context,
+                                                      &outputPackets,
+                                                      &outputBuffer,
+                                                      &packetDescription);
+    if (context.consumedBytes > 0 && context.consumedBytes <= _pendingPCM.length) {
+        [_pendingPCM replaceBytesInRange:NSMakeRange(0, context.consumedBytes)
+                               withBytes:NULL
+                                  length:0];
+    }
+    if (status == XCWAudioConverterNoDataStatus || outputPackets == 0 || outputBuffer.mBuffers[0].mDataByteSize == 0) {
+        if (context.consumedBytes == 0) {
+            return nil;
+        }
+        return [NSData data];
+    }
+    if (status != noErr) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(32, @"Encode Opus audio packet", status);
+        }
+        return nil;
+    }
+    output.length = outputBuffer.mBuffers[0].mDataByteSize;
+    return output;
+}
+
+- (void)invalidate {
+    if (_converter != NULL) {
+        AudioConverterDispose(_converter);
+        _converter = NULL;
+    }
+    [_pendingPCM setLength:0];
+    _inputSampleRate = 0;
+    _inputChannels = 0;
+    _inputBytesPerFrame = 0;
+    _maxOutputPacketSize = 0;
+    _inputFramesPerOpusPacket = 0;
+}
+
+@end
+
+@class XCWNativeAudioCapture;
+static OSStatus XCWNativeAudioDeviceIOProc(AudioObjectID inDevice,
+                                           const AudioTimeStamp *inNow,
+                                           const AudioBufferList *inInputData,
+                                           const AudioTimeStamp *inInputTime,
+                                           AudioBufferList *outOutputData,
+                                           const AudioTimeStamp *inOutputTime,
+                                           void *inClientData);
+
+@interface XCWNativeAudioCapture : NSObject
+
+- (instancetype)initWithAudioCallback:(xcw_native_audio_callback)callback
+                             userData:(void *)userData;
+- (BOOL)startWithProcessIDs:(const int32_t *)processIDs
+                      count:(size_t)processCount
+                      error:(NSError * _Nullable __autoreleasing *)error;
+- (BOOL)updateProcessIDs:(const int32_t *)processIDs
+                   count:(size_t)processCount
+                   error:(NSError * _Nullable __autoreleasing *)error;
+- (BOOL)applyProcessIDs:(const int32_t *)processIDs
+                  count:(size_t)processCount
+       requireProcesses:(BOOL)requireProcesses
+        debounceChanges:(BOOL)debounceChanges
+                  error:(NSError * _Nullable __autoreleasing *)error;
+- (BOOL)shouldApplyProcessObjectIDs:(NSArray<NSNumber *> *)processObjectIDs;
+- (void)clearPendingProcessObjectIDs;
+- (void)invalidate;
+- (void)handleInputData:(const AudioBufferList *)inputData
+              inputTime:(const AudioTimeStamp *)inputTime;
+
+@end
+
+@implementation XCWNativeAudioCapture {
+    xcw_native_audio_callback _callback;
+    void *_callbackUserData;
+    BOOL _invalidated;
+    AudioObjectID _tapID;
+    AudioObjectID _aggregateDeviceID;
+    AudioDeviceIOProcID _ioProcID;
+    AudioStreamBasicDescription _streamDescription;
+    NSArray<NSNumber *> *_processObjectIDs;
+    NSArray<NSNumber *> *_pendingProcessObjectIDs;
+    NSUInteger _pendingProcessObjectIDRefreshes;
+    XCWOpusAudioEncoder *_opusEncoder;
+}
+
+- (instancetype)initWithAudioCallback:(xcw_native_audio_callback)callback
+                             userData:(void *)userData {
+    self = [super init];
+    if (self == nil) {
+        return nil;
+    }
+    _callback = callback;
+    _callbackUserData = userData;
+    _tapID = kAudioObjectUnknown;
+    _aggregateDeviceID = kAudioObjectUnknown;
+    _ioProcID = NULL;
+    _processObjectIDs = @[];
+    _pendingProcessObjectIDs = nil;
+    _pendingProcessObjectIDRefreshes = 0;
+    _opusEncoder = [[XCWOpusAudioEncoder alloc] init];
+    return self;
+}
+
+- (void)dealloc {
+    [self invalidate];
+}
+
+- (BOOL)startWithProcessIDs:(const int32_t *)processIDs
+                      count:(size_t)processCount
+                      error:(NSError * _Nullable __autoreleasing *)error {
+    return [self applyProcessIDs:processIDs count:processCount requireProcesses:YES debounceChanges:NO error:error];
+}
+
+- (BOOL)updateProcessIDs:(const int32_t *)processIDs
+                   count:(size_t)processCount
+                   error:(NSError * _Nullable __autoreleasing *)error {
+    return [self applyProcessIDs:processIDs count:processCount requireProcesses:NO debounceChanges:YES error:error];
+}
+
+- (BOOL)applyProcessIDs:(const int32_t *)processIDs
+                  count:(size_t)processCount
+       requireProcesses:(BOOL)requireProcesses
+        debounceChanges:(BOOL)debounceChanges
+                  error:(NSError * _Nullable __autoreleasing *)error {
+    if (@available(macOS 14.2, *)) {
+        NSArray<NSNumber *> *processObjectIDs = XCWAudioProcessObjectIDsForProcessIDs(processIDs, processCount);
+        if (_aggregateDeviceID != kAudioObjectUnknown && [_processObjectIDs isEqualToArray:processObjectIDs]) {
+            [self clearPendingProcessObjectIDs];
+            return YES;
+        }
+        if (debounceChanges && _aggregateDeviceID != kAudioObjectUnknown && ![self shouldApplyProcessObjectIDs:processObjectIDs]) {
+            return YES;
+        }
+        if (processObjectIDs.count == 0) {
+            [self clearPendingProcessObjectIDs];
+            [self stopGraph];
+            if (requireProcesses && error != NULL) {
+                *error = XCWAudioCaptureError(20, @"No simulator audio processes are currently connected to Core Audio.");
+            }
+            return !requireProcesses;
+        }
+        [self stopGraph];
+        return [self startGraphWithProcessObjectIDs:processObjectIDs error:error];
+    }
+
+    if (error != NULL) {
+        *error = XCWAudioCaptureError(21, @"Per-simulator audio capture requires macOS 14.2 or newer.");
+    }
+    return NO;
+}
+
+- (BOOL)shouldApplyProcessObjectIDs:(NSArray<NSNumber *> *)processObjectIDs {
+    if (_pendingProcessObjectIDs != nil && [_pendingProcessObjectIDs isEqualToArray:processObjectIDs]) {
+        _pendingProcessObjectIDRefreshes += 1;
+    } else {
+        _pendingProcessObjectIDs = [processObjectIDs copy];
+        _pendingProcessObjectIDRefreshes = 1;
+    }
+    return _pendingProcessObjectIDRefreshes >= XCWAudioProcessStableRefreshes;
+}
+
+- (void)clearPendingProcessObjectIDs {
+    _pendingProcessObjectIDs = nil;
+    _pendingProcessObjectIDRefreshes = 0;
+}
+
+- (BOOL)startGraphWithProcessObjectIDs:(NSArray<NSNumber *> *)processObjectIDs
+                                 error:(NSError * _Nullable __autoreleasing *)error API_AVAILABLE(macos(14.2)) {
+    CATapDescription *tapDescription = XCWAudioTapDescription(processObjectIDs);
+    OSStatus status = AudioHardwareCreateProcessTap(tapDescription, &_tapID);
+    if (status != noErr || _tapID == kAudioObjectUnknown) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(23, @"Create Core Audio process tap", status);
+        }
+        _tapID = kAudioObjectUnknown;
+        return NO;
+    }
+
+    NSError *tapUIDError = nil;
+    NSString *tapUID = XCWAudioTapUID(_tapID, &tapUIDError);
+    if (tapUID.length == 0) {
+        if (error != NULL) {
+            *error = tapUIDError ?: XCWAudioCaptureError(24, @"Core Audio process tap did not expose a UID.");
+        }
+        [self stopGraph];
+        return NO;
+    }
+
+    NSString *aggregateUID = [NSString stringWithFormat:@"dev.simdeck.audio.%@", NSUUID.UUID.UUIDString];
+    NSDictionary *aggregateDescription = @{
+        XCWAudioDictionaryKey(kAudioAggregateDeviceNameKey): @"SimDeck Simulator Audio",
+        XCWAudioDictionaryKey(kAudioAggregateDeviceUIDKey): aggregateUID,
+        XCWAudioDictionaryKey(kAudioAggregateDeviceIsPrivateKey): @YES,
+        XCWAudioDictionaryKey(kAudioAggregateDeviceTapListKey): @[
+            @{ XCWAudioDictionaryKey(kAudioSubTapUIDKey): tapUID }
+        ],
+    };
+    status = AudioHardwareCreateAggregateDevice((__bridge CFDictionaryRef)aggregateDescription, &_aggregateDeviceID);
+    if (status != noErr || _aggregateDeviceID == kAudioObjectUnknown) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(25, @"Create Core Audio aggregate device", status);
+        }
+        [self stopGraph];
+        return NO;
+    }
+
+    CFArrayRef tapList = (__bridge CFArrayRef)@[ tapUID ];
+    AudioObjectPropertyAddress tapListAddress = {
+        .mSelector = kAudioAggregateDevicePropertyTapList,
+        .mScope = kAudioObjectPropertyScopeGlobal,
+        .mElement = kAudioObjectPropertyElementMain,
+    };
+    status = AudioObjectSetPropertyData(_aggregateDeviceID,
+                                        &tapListAddress,
+                                        0,
+                                        NULL,
+                                        sizeof(tapList),
+                                        &tapList);
+    if (status != noErr) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(26, @"Attach Core Audio tap to aggregate device", status);
+        }
+        [self stopGraph];
+        return NO;
+    }
+
+    memset(&_streamDescription, 0, sizeof(_streamDescription));
+    if (!XCWAudioGetObjectStreamFormat(_aggregateDeviceID, kAudioDevicePropertyStreamFormat, kAudioObjectPropertyScopeInput, &_streamDescription) &&
+        !XCWAudioGetObjectStreamFormat(_tapID, kAudioTapPropertyFormat, kAudioObjectPropertyScopeGlobal, &_streamDescription)) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureError(27, @"Core Audio tap did not expose a readable linear PCM format.");
+        }
+        [self stopGraph];
+        return NO;
+    }
+
+    status = AudioDeviceCreateIOProcID(_aggregateDeviceID,
+                                       XCWNativeAudioDeviceIOProc,
+                                       (__bridge void *)self,
+                                       &_ioProcID);
+    if (status != noErr || _ioProcID == NULL) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(28, @"Create Core Audio tap IOProc", status);
+        }
+        [self stopGraph];
+        return NO;
+    }
+
+    status = AudioDeviceStart(_aggregateDeviceID, _ioProcID);
+    if (status != noErr) {
+        if (error != NULL) {
+            *error = XCWAudioCaptureStatusError(29, @"Start Core Audio tap device", status);
+        }
+        [self stopGraph];
+        return NO;
+    }
+
+    _processObjectIDs = [processObjectIDs copy];
+    [self clearPendingProcessObjectIDs];
+    return YES;
+}
+
+- (void)stopGraph {
+    if (_aggregateDeviceID != kAudioObjectUnknown && _ioProcID != NULL) {
+        AudioDeviceStop(_aggregateDeviceID, _ioProcID);
+        AudioDeviceDestroyIOProcID(_aggregateDeviceID, _ioProcID);
+        _ioProcID = NULL;
+    }
+    if (_aggregateDeviceID != kAudioObjectUnknown) {
+        AudioHardwareDestroyAggregateDevice(_aggregateDeviceID);
+        _aggregateDeviceID = kAudioObjectUnknown;
+    }
+    if (_tapID != kAudioObjectUnknown) {
+        AudioHardwareDestroyProcessTap(_tapID);
+        _tapID = kAudioObjectUnknown;
+    }
+    _processObjectIDs = @[];
+    [self clearPendingProcessObjectIDs];
+    memset(&_streamDescription, 0, sizeof(_streamDescription));
+    [_opusEncoder invalidate];
+}
+
+- (void)invalidate {
+    _invalidated = YES;
+    [self stopGraph];
+}
+
+- (void)handleInputData:(const AudioBufferList *)inputData
+              inputTime:(const AudioTimeStamp *)inputTime {
+    if (_invalidated || _callback == NULL || inputData == NULL) {
+        return;
+    }
+
+    AudioStreamBasicDescription streamDescription = _streamDescription;
+    uint32_t sampleRate = 0;
+    uint16_t channels = 0;
+    NSData *pcm = XCWPCM16InterleavedDataFromAudioBufferList(inputData, &streamDescription, &sampleRate, &channels);
+    if (pcm.length == 0 || sampleRate == 0 || channels == 0) {
+        return;
+    }
+
+    NSError *encodeError = nil;
+    NSArray<NSData *> *packets = [_opusEncoder encodePCM:pcm
+                                              sampleRate:sampleRate
+                                                channels:channels
+                                                   error:&encodeError];
+    if (encodeError != nil) {
+        NSLog(@"SimDeck audio capture failed to encode Opus packet: %@", encodeError.localizedDescription);
+        return;
+    }
+
+    uint64_t timestampUS = XCWAudioTimestampUS(inputTime);
+    for (NSData *packet in packets) {
+        if (packet.length == 0) {
+            continue;
+        }
+        xcw_native_audio_sample sample = {
+            .timestamp_us = timestampUS,
+            .sample_rate = XCWOpusSampleRate,
+            .channels = _opusEncoder.channels,
+            .data = XCWSharedBytesFromData(packet),
+        };
+        _callback(&sample, _callbackUserData);
+    }
+}
+
+@end
+
+static OSStatus XCWNativeAudioDeviceIOProc(AudioObjectID inDevice,
+                                           const AudioTimeStamp *inNow,
+                                           const AudioBufferList *inInputData,
+                                           const AudioTimeStamp *inInputTime,
+                                           AudioBufferList *outOutputData,
+                                           const AudioTimeStamp *inOutputTime,
+                                           void *inClientData) {
+    (void)inDevice;
+    (void)inNow;
+    (void)outOutputData;
+    (void)inOutputTime;
+    @autoreleasepool {
+        XCWNativeAudioCapture *capture = (__bridge XCWNativeAudioCapture *)inClientData;
+        [capture handleInputData:inInputData inputTime:inInputTime];
+    }
+    return noErr;
+}
+
 static XCWNativeH264Encoder *XCWNativeH264EncoderFromHandle(void *handle) {
     return (__bridge XCWNativeH264Encoder *)handle;
 }
@@ -1367,6 +2188,46 @@ void xcw_native_h264_encoder_request_keyframe(void *handle) {
     }
 }
 
+void *xcw_native_audio_capture_create(const int32_t *process_ids, size_t process_count, xcw_native_audio_callback callback, void *user_data, char **error_message) {
+    @autoreleasepool {
+        XCWNativeAudioCapture *capture = [[XCWNativeAudioCapture alloc] initWithAudioCallback:callback
+                                                                                    userData:user_data];
+        NSError *error = nil;
+        BOOL ok = [capture startWithProcessIDs:process_ids count:process_count error:&error];
+        if (!ok) {
+            XCWSetErrorMessage(error_message, error);
+            return NULL;
+        }
+        return (__bridge_retained void *)capture;
+    }
+}
+
+bool xcw_native_audio_capture_update_processes(void *handle, const int32_t *process_ids, size_t process_count, char **error_message) {
+    if (handle == NULL) {
+        XCWSetErrorMessage(error_message, XCWAudioCaptureError(30, @"Audio capture handle is null."));
+        return false;
+    }
+    @autoreleasepool {
+        XCWNativeAudioCapture *capture = (__bridge XCWNativeAudioCapture *)handle;
+        NSError *error = nil;
+        BOOL ok = [capture updateProcessIDs:process_ids count:process_count error:&error];
+        if (!ok) {
+            XCWSetErrorMessage(error_message, error);
+        }
+        return ok;
+    }
+}
+
+void xcw_native_audio_capture_destroy(void *handle) {
+    if (handle == NULL) {
+        return;
+    }
+    @autoreleasepool {
+        XCWNativeAudioCapture *capture = CFBridgingRelease(handle);
+        [capture invalidate];
+    }
+}
+
 void xcw_native_free_string(char *value) {
     if (value != NULL) {
         free(value);
diff --git a/packages/server/native_stubs.c b/packages/server/native_stubs.c
index 7b3ae92a..266975b6 100644
--- a/packages/server/native_stubs.c
+++ b/packages/server/native_stubs.c
@@ -30,8 +30,17 @@ typedef struct {
   xcw_native_shared_bytes data;
 } xcw_native_frame;
 
+typedef struct {
+  uint64_t timestamp_us;
+  uint32_t sample_rate;
+  uint16_t channels;
+  xcw_native_shared_bytes data;
+} xcw_native_audio_sample;
+
 typedef void (*xcw_native_frame_callback)(const xcw_native_frame *frame,
                                           void *user_data);
+typedef void (*xcw_native_audio_callback)(const xcw_native_audio_sample *sample,
+                                          void *user_data);
 
 static char *xcw_strdup(const char *value) {
   if (value == NULL) {
@@ -585,6 +594,34 @@ bool xcw_native_h264_encoder_encode_rgba(void *handle, const uint8_t *rgba,
 
 void xcw_native_h264_encoder_request_keyframe(void *handle) { (void)handle; }
 
+void *xcw_native_audio_capture_create(const int32_t *process_ids,
+                                      uintptr_t process_count,
+                                      xcw_native_audio_callback callback,
+                                      void *user_data,
+                                      char **error_message) {
+  (void)process_ids;
+  (void)process_count;
+  (void)callback;
+  (void)user_data;
+  xcw_set_error(error_message,
+                "Audio capture is only available in the macOS native bridge.");
+  return NULL;
+}
+
+bool xcw_native_audio_capture_update_processes(void *handle,
+                                               const int32_t *process_ids,
+                                               uintptr_t process_count,
+                                               char **error_message) {
+  (void)handle;
+  (void)process_ids;
+  (void)process_count;
+  xcw_set_error(error_message,
+                "Audio capture is only available in the macOS native bridge.");
+  return false;
+}
+
+void xcw_native_audio_capture_destroy(void *handle) { (void)handle; }
+
 void xcw_native_free_string(char *value) { free(value); }
 
 void xcw_native_free_bytes(xcw_native_owned_bytes bytes) { free(bytes.data); }
diff --git a/packages/server/src/android.rs b/packages/server/src/android.rs
index adc2c794..92100e1a 100644
--- a/packages/server/src/android.rs
+++ b/packages/server/src/android.rs
@@ -330,7 +330,6 @@ impl AndroidBridge {
             "-avd",
             &avd_name,
             window_mode,
-            "-no-audio",
             "-gpu",
             "swiftshader_indirect",
         ];
diff --git a/packages/server/src/native/ffi.rs b/packages/server/src/native/ffi.rs
index 8a8a47b5..043df61c 100644
--- a/packages/server/src/native/ffi.rs
+++ b/packages/server/src/native/ffi.rs
@@ -26,10 +26,22 @@ pub struct xcw_native_frame {
     pub data: xcw_native_shared_bytes,
 }
 
+#[repr(C)]
+pub struct xcw_native_audio_sample {
+    pub timestamp_us: u64,
+    pub sample_rate: u32,
+    pub channels: u16,
+    pub data: xcw_native_shared_bytes,
+}
+
 #[allow(non_camel_case_types)]
 pub type xcw_native_frame_callback =
     unsafe extern "C" fn(frame: *const xcw_native_frame, user_data: *mut c_void);
 
+#[allow(non_camel_case_types)]
+pub type xcw_native_audio_callback =
+    unsafe extern "C" fn(sample: *const xcw_native_audio_sample, user_data: *mut c_void);
+
 unsafe extern "C" {
     pub fn simdeck_camera_list_webcams_json(error_message: *mut *mut c_char) -> *mut c_char;
     pub fn simdeck_camera_start(
@@ -351,6 +363,21 @@ unsafe extern "C" {
     ) -> bool;
     pub fn xcw_native_h264_encoder_request_keyframe(handle: *mut c_void);
 
+    pub fn xcw_native_audio_capture_create(
+        process_ids: *const i32,
+        process_count: usize,
+        callback: Option<xcw_native_audio_callback>,
+        user_data: *mut c_void,
+        error_message: *mut *mut c_char,
+    ) -> *mut c_void;
+    pub fn xcw_native_audio_capture_update_processes(
+        handle: *mut c_void,
+        process_ids: *const i32,
+        process_count: usize,
+        error_message: *mut *mut c_char,
+    ) -> bool;
+    pub fn xcw_native_audio_capture_destroy(handle: *mut c_void);
+
     pub fn xcw_native_free_string(value: *mut c_char);
     pub fn xcw_native_free_bytes(bytes: xcw_native_owned_bytes);
     pub fn xcw_native_release_shared_bytes(bytes: xcw_native_shared_bytes);
diff --git a/packages/server/src/transport/webrtc.rs b/packages/server/src/transport/webrtc.rs
index 2391ceda..bf774738 100644
--- a/packages/server/src/transport/webrtc.rs
+++ b/packages/server/src/transport/webrtc.rs
@@ -11,17 +11,18 @@ use crate::native::ffi;
 use crate::transport::packet::{FramePacket, SharedFrame};
 use bytes::{BufMut, Bytes, BytesMut};
 use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, VecDeque};
+use std::collections::{BTreeSet, HashMap, VecDeque};
 use std::ffi::{c_void, CStr};
+use std::process::Command;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};
 use std::time::Duration;
-use tokio::sync::{broadcast, mpsc};
+use tokio::sync::{broadcast, mpsc, watch};
 use tokio::task;
 use tokio::time::{self, Instant};
 use tracing::{info, warn};
 use webrtc::api::interceptor_registry::register_default_interceptors;
-use webrtc::api::media_engine::{MediaEngine, MIME_TYPE_H264};
+use webrtc::api::media_engine::{MediaEngine, MIME_TYPE_H264, MIME_TYPE_OPUS};
 use webrtc::api::APIBuilder;
 use webrtc::data_channel::data_channel_init::RTCDataChannelInit;
 use webrtc::data_channel::data_channel_message::DataChannelMessage;
@@ -29,6 +30,7 @@ use webrtc::data_channel::data_channel_state::RTCDataChannelState;
 use webrtc::data_channel::RTCDataChannel;
 use webrtc::ice_transport::ice_server::RTCIceServer;
 use webrtc::interceptor::registry::Registry;
+use webrtc::media::Sample as WebRtcSample;
 use webrtc::peer_connection::configuration::RTCConfiguration;
 use webrtc::peer_connection::peer_connection_state::RTCPeerConnectionState;
 use webrtc::peer_connection::policy::ice_transport_policy::RTCIceTransportPolicy;
@@ -44,6 +46,7 @@ use webrtc::rtp_transceiver::rtp_codec::{
 };
 use webrtc::rtp_transceiver::RTCPFeedback;
 use webrtc::track::track_local::track_local_static_rtp::TrackLocalStaticRTP;
+use webrtc::track::track_local::track_local_static_sample::TrackLocalStaticSample;
 use webrtc::track::track_local::TrackLocal;
 use webrtc::track::track_local::TrackLocalWriter;
 
@@ -57,6 +60,7 @@ const WEBRTC_MAX_LOCAL_STREAM_FPS: u32 = 240;
 const WEBRTC_WRITE_TIMEOUT: Duration = Duration::from_millis(120);
 const WEBRTC_REALTIME_WRITE_TIMEOUT: Duration = Duration::from_millis(45);
 const WEBRTC_REALTIME_KEYFRAME_WRITE_TIMEOUT: Duration = Duration::from_millis(90);
+const WEBRTC_AUDIO_WRITE_TIMEOUT: Duration = Duration::from_millis(120);
 const WEBRTC_INITIAL_KEYFRAME_TIMEOUT: Duration = Duration::from_secs(5);
 const WEBRTC_FAST_ICE_GATHER_TIMEOUT: Duration = Duration::from_millis(250);
 const WEBRTC_FULL_ICE_GATHER_TIMEOUT: Duration = Duration::from_secs(3);
@@ -72,6 +76,14 @@ const ANDROID_WEBRTC_RGBA_VERSION: u8 = 1;
 const ANDROID_WEBRTC_RGBA_FORMAT_RGBA8888: u8 = 1;
 const ANDROID_WEBRTC_RGBA_BUFFERED_FRAME_LIMIT: usize = 2;
 const ANDROID_WEBRTC_FPS: u64 = 30;
+const WEBRTC_AUDIO_PROCESS_REFRESH_INTERVAL: Duration = Duration::from_secs(1);
+const WEBRTC_AUDIO_SAMPLE_RATE: u32 = 48_000;
+const WEBRTC_AUDIO_CHANNELS: u16 = 2;
+const WEBRTC_AUDIO_FRAME_DURATION: Duration = Duration::from_millis(20);
+const WEBRTC_AUDIO_SILENCE_TIMEOUT: Duration = Duration::from_millis(18);
+const WEBRTC_OPUS_SILENCE_PACKET: &[u8] = &[
+    0x28, 0x0B, 0xE4, 0x89, 0x1A, 0x2C, 0x08, 0x8A, 0xAE, 0xF8, 0x3A, 0xEC,
+];
 static WEBRTC_MEDIA_STREAMS: OnceLock<Mutex<HashMap<String, Vec<WebRtcMediaStreamToken>>>> =
     OnceLock::new();
 const MAX_WEBRTC_MEDIA_STREAMS_PER_UDID: usize = 16;
@@ -100,9 +112,19 @@ pub struct WebRtcAnswerPayload {
     pub sdp: String,
     #[serde(rename = "type")]
     pub kind: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub audio: Option<WebRtcAudioMetadata>,
     pub video: WebRtcVideoMetadata,
 }
 
+#[derive(Debug, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct WebRtcAudioMetadata {
+    pub channels: u16,
+    pub codec: String,
+    pub sample_rate: u32,
+}
+
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct WebRtcVideoMetadata {
@@ -206,6 +228,7 @@ pub async fn create_answer(
             "WebRTC preview requires H.264. Restart SimDeck with `--video-codec auto`, `hardware`, or `software`.",
         ));
     }
+    let wants_audio = sdp_has_media_type(&payload.sdp, "audio");
 
     let h264_fmtp_line = h264_sdp_fmtp_line(&codec, &payload.sdp);
     let mut media_engine = MediaEngine::default();
@@ -225,6 +248,9 @@ pub async fn create_answer(
             RTPCodecType::Video,
         )
         .map_err(|error| AppError::internal(format!("register WebRTC H.264 codec: {error}")))?;
+    if wants_audio {
+        register_opus_audio_codec(&mut media_engine)?;
+    }
     let mut registry = Registry::new();
     registry = register_default_interceptors(registry, &mut media_engine)
         .map_err(|error| AppError::internal(format!("register WebRTC interceptors: {error}")))?;
@@ -261,6 +287,21 @@ pub async fn create_answer(
         ),
     }
 
+    let audio_track = if wants_audio {
+        let track = Arc::new(TrackLocalStaticSample::new(
+            opus_audio_codec_capability(),
+            "simdeck-audio".to_owned(),
+            "simdeck".to_owned(),
+        ));
+        let audio_sender = peer_connection
+            .add_track(track.clone() as Arc<dyn TrackLocal + Send + Sync>)
+            .await
+            .map_err(|error| AppError::internal(format!("add WebRTC audio track: {error}")))?;
+        tokio::spawn(async move { while audio_sender.read_rtcp().await.is_ok() {} });
+        Some(track)
+    } else {
+        None
+    };
     let video_track = Arc::new(TrackLocalStaticRTP::new(
         RTCRtpCodecCapability {
             mime_type: MIME_TYPE_H264.to_owned(),
@@ -346,6 +387,7 @@ pub async fn create_answer(
             first_frame,
             peer_connection,
             video_track,
+            audio_track,
             cancellation_token,
             cancellation,
             stream_control_rx,
@@ -356,6 +398,11 @@ pub async fn create_answer(
     Ok(WebRtcAnswerPayload {
         sdp: local_description.sdp,
         kind: "answer".to_owned(),
+        audio: wants_audio.then(|| WebRtcAudioMetadata {
+            channels: WEBRTC_AUDIO_CHANNELS,
+            codec: "opus".to_owned(),
+            sample_rate: WEBRTC_AUDIO_SAMPLE_RATE,
+        }),
         video: WebRtcVideoMetadata {
             width: first_frame_width,
             height: first_frame_height,
@@ -368,6 +415,7 @@ async fn create_android_rgba_answer(
     udid: String,
     payload: WebRtcOfferPayload,
 ) -> Result<WebRtcAnswerPayload, AppError> {
+    let wants_audio = sdp_has_media_type(&payload.sdp, "audio");
     let source = AndroidWebRtcSource::start(
         state.android.clone(),
         state.metrics.clone(),
@@ -387,7 +435,22 @@ async fn create_android_rgba_answer(
         ice_transport_policy_label()
     );
 
-    let api = APIBuilder::new().build();
+    let api = if wants_audio {
+        let mut media_engine = MediaEngine::default();
+        register_opus_audio_codec(&mut media_engine)?;
+        let mut registry = Registry::new();
+        registry = register_default_interceptors(registry, &mut media_engine).map_err(|error| {
+            AppError::internal(format!(
+                "register Android RGBA WebRTC interceptors: {error}"
+            ))
+        })?;
+        APIBuilder::new()
+            .with_media_engine(media_engine)
+            .with_interceptor_registry(registry)
+            .build()
+    } else {
+        APIBuilder::new().build()
+    };
     let peer_connection = Arc::new(
         api.new_peer_connection(RTCConfiguration {
             ice_servers: ice_servers(),
@@ -417,6 +480,23 @@ async fn create_android_rgba_answer(
         )
         .await
         .map_err(|error| AppError::internal(format!("create RGBA WebRTC data channel: {error}")))?;
+    let audio_track = if wants_audio {
+        let track = Arc::new(TrackLocalStaticSample::new(
+            opus_audio_codec_capability(),
+            "simdeck-audio".to_owned(),
+            "simdeck".to_owned(),
+        ));
+        let audio_sender = peer_connection
+            .add_track(track.clone() as Arc<dyn TrackLocal + Send + Sync>)
+            .await
+            .map_err(|error| {
+                AppError::internal(format!("add Android RGBA WebRTC audio track: {error}"))
+            })?;
+        tokio::spawn(async move { while audio_sender.read_rtcp().await.is_ok() {} });
+        Some(track)
+    } else {
+        None
+    };
 
     let fast_gather =
         has_sdp_candidate_type(&payload.sdp, "host") && ice_transport_policy_label() == "all";
@@ -467,6 +547,7 @@ async fn create_android_rgba_answer(
             source,
             peer_connection,
             rgba_channel,
+            audio_track,
             cancellation_token,
             cancellation,
             stream_control_rx,
@@ -477,6 +558,11 @@ async fn create_android_rgba_answer(
     Ok(WebRtcAnswerPayload {
         sdp: local_description.sdp,
         kind: "answer".to_owned(),
+        audio: wants_audio.then(|| WebRtcAudioMetadata {
+            channels: WEBRTC_AUDIO_CHANNELS,
+            codec: "opus".to_owned(),
+            sample_rate: WEBRTC_AUDIO_SAMPLE_RATE,
+        }),
         video: WebRtcVideoMetadata {
             width: 0,
             height: 0,
@@ -582,6 +668,13 @@ fn summarize_sdp_candidate_types(sdp: &str) -> String {
     format!("host={host},srflx={srflx},prflx={prflx},relay={relay},other={other}")
 }
 
+fn sdp_has_media_type(sdp: &str, media_type: &str) -> bool {
+    let prefix = format!("m={media_type} ");
+    sdp.lines()
+        .map(str::trim_start)
+        .any(|line| line.starts_with(&prefix))
+}
+
 fn redact_candidate_address(address: &str) -> String {
     if address.is_empty() {
         return String::new();
@@ -1259,6 +1352,29 @@ fn h264_rtcp_feedback() -> Vec<RTCPFeedback> {
     ]
 }
 
+fn opus_audio_codec_capability() -> RTCRtpCodecCapability {
+    RTCRtpCodecCapability {
+        mime_type: MIME_TYPE_OPUS.to_owned(),
+        clock_rate: WEBRTC_AUDIO_SAMPLE_RATE,
+        channels: WEBRTC_AUDIO_CHANNELS,
+        sdp_fmtp_line: "minptime=10;useinbandfec=1;stereo=1;sprop-stereo=1".to_owned(),
+        rtcp_feedback: Vec::new(),
+    }
+}
+
+fn register_opus_audio_codec(media_engine: &mut MediaEngine) -> Result<(), AppError> {
+    media_engine
+        .register_codec(
+            RTCRtpCodecParameters {
+                capability: opus_audio_codec_capability(),
+                payload_type: 111,
+                ..Default::default()
+            },
+            RTPCodecType::Audio,
+        )
+        .map_err(|error| AppError::internal(format!("register WebRTC Opus codec: {error}")))
+}
+
 fn rtcp_packet_requests_keyframe(packet: &(dyn RtcpPacket + Send + Sync)) -> bool {
     packet.as_any().is::<PictureLossIndication>() || packet.as_any().is::<FullIntraRequest>()
 }
@@ -1425,6 +1541,442 @@ fn ice_transport_policy() -> RTCIceTransportPolicy {
     }
 }
 
+#[derive(Clone)]
+struct SimulatorAudioCapture {
+    inner: Arc<SimulatorAudioCaptureInner>,
+}
+
+struct SimulatorAudioCaptureInner {
+    handle: AtomicUsize,
+    callback_user_data: AtomicUsize,
+    sender: watch::Sender<Option<SharedEncodedAudioSample>>,
+}
+
+#[derive(Debug)]
+struct EncodedAudioSample {
+    sample_rate: u32,
+    channels: u16,
+    data: Bytes,
+}
+
+type SharedEncodedAudioSample = Arc<EncodedAudioSample>;
+
+impl SimulatorAudioCapture {
+    fn start(
+        process_ids: &[i32],
+        sender: watch::Sender<Option<SharedEncodedAudioSample>>,
+    ) -> Result<Self, AppError> {
+        if process_ids.is_empty() {
+            return Err(AppError::native(
+                "No simulator audio process IDs were available.",
+            ));
+        }
+        let inner = Arc::new(SimulatorAudioCaptureInner {
+            handle: AtomicUsize::new(0),
+            callback_user_data: AtomicUsize::new(0),
+            sender,
+        });
+        let user_data = Weak::into_raw(Arc::downgrade(&inner)) as *mut c_void;
+        let mut error = std::ptr::null_mut();
+        let handle = unsafe {
+            ffi::xcw_native_audio_capture_create(
+                process_ids.as_ptr(),
+                process_ids.len(),
+                Some(host_audio_capture_callback),
+                user_data,
+                &mut error,
+            )
+        };
+        if handle.is_null() {
+            unsafe {
+                let _ = Weak::from_raw(user_data as *const SimulatorAudioCaptureInner);
+            }
+            return Err(unsafe { take_native_error(error) }
+                .unwrap_or_else(|| AppError::native("Unable to start simulator audio capture.")));
+        }
+        inner.handle.store(handle as usize, Ordering::Release);
+        inner
+            .callback_user_data
+            .store(user_data as usize, Ordering::Release);
+        Ok(Self { inner })
+    }
+
+    fn update_processes(&self, process_ids: &[i32]) -> Result<(), AppError> {
+        if process_ids.is_empty() {
+            return Ok(());
+        }
+        let handle = self.inner.handle.load(Ordering::Acquire);
+        if handle == 0 {
+            return Err(AppError::native(
+                "Simulator audio capture handle was already closed.",
+            ));
+        }
+        let mut error = std::ptr::null_mut();
+        let ok = unsafe {
+            ffi::xcw_native_audio_capture_update_processes(
+                handle as *mut c_void,
+                process_ids.as_ptr(),
+                process_ids.len(),
+                &mut error,
+            )
+        };
+        if ok {
+            Ok(())
+        } else {
+            Err(unsafe { take_native_error(error) }.unwrap_or_else(|| {
+                AppError::native("Unable to update simulator audio capture processes.")
+            }))
+        }
+    }
+}
+
+impl Drop for SimulatorAudioCaptureInner {
+    fn drop(&mut self) {
+        let handle = self.handle.load(Ordering::Acquire);
+        let callback_user_data = self.callback_user_data.load(Ordering::Acquire);
+        unsafe {
+            if handle != 0 {
+                ffi::xcw_native_audio_capture_destroy(handle as *mut c_void);
+            }
+            if callback_user_data != 0 {
+                let _ = Weak::from_raw(callback_user_data as *const SimulatorAudioCaptureInner);
+            }
+        }
+    }
+}
+
+unsafe extern "C" fn host_audio_capture_callback(
+    sample: *const ffi::xcw_native_audio_sample,
+    user_data: *mut c_void,
+) {
+    if sample.is_null() || user_data.is_null() {
+        return;
+    }
+
+    let weak = unsafe { Weak::from_raw(user_data as *const SimulatorAudioCaptureInner) };
+    if let Some(inner) = weak.upgrade() {
+        unsafe {
+            inner.handle_audio_sample(&*sample);
+        }
+    }
+    let _ = Weak::into_raw(weak);
+}
+
+impl SimulatorAudioCaptureInner {
+    unsafe fn handle_audio_sample(&self, sample: &ffi::xcw_native_audio_sample) {
+        if sample.sample_rate == 0 || sample.channels == 0 {
+            unsafe {
+                ffi::xcw_native_release_shared_bytes(sample.data);
+            }
+            return;
+        }
+        let Some(data) = (unsafe { copy_native_shared_bytes(sample.data) }) else {
+            return;
+        };
+        if data.is_empty() {
+            return;
+        }
+        let packet = Arc::new(EncodedAudioSample {
+            sample_rate: sample.sample_rate,
+            channels: sample.channels,
+            data,
+        });
+        self.sender.send_replace(Some(packet));
+    }
+}
+
+fn spawn_simulator_audio_stream(
+    state: AppState,
+    udid: String,
+    audio_track: Arc<TrackLocalStaticSample>,
+    mut cancellation: broadcast::Receiver<()>,
+) {
+    tokio::spawn(async move {
+        let (sample_tx, mut sample_rx) = watch::channel(None);
+        let (audio_stop_tx, _) = broadcast::channel(1);
+        let mut capture_cancellation = cancellation.resubscribe();
+        let mut capture_stop = audio_stop_tx.subscribe();
+        let capture_state = state.clone();
+        let capture_udid = udid.clone();
+        tokio::spawn(async move {
+            let mut capture: Option<SimulatorAudioCapture> = None;
+            let mut refresh = time::interval(WEBRTC_AUDIO_PROCESS_REFRESH_INTERVAL);
+            loop {
+                tokio::select! {
+                    _ = capture_cancellation.recv() => break,
+                    _ = capture_stop.recv() => break,
+                    _ = refresh.tick() => {
+                        let process_ids = match resolve_simulator_audio_process_ids(capture_state.clone(), &capture_udid).await {
+                            Ok(process_ids) => process_ids,
+                            Err(error) => {
+                                warn!("WebRTC audio process discovery failed for {capture_udid}: {error}");
+                                continue;
+                            }
+                        };
+                        if process_ids.is_empty() {
+                            capture = None;
+                            continue;
+                        }
+                        if let Some(active_capture) = capture.as_ref().cloned() {
+                            let update_process_ids = process_ids.clone();
+                            let update_result = task::spawn_blocking(move || {
+                                active_capture.update_processes(&update_process_ids)
+                            }).await;
+                            let update_result = match update_result {
+                                Ok(result) => result,
+                                Err(error) => Err(AppError::internal(format!(
+                                    "Failed to join audio capture update task: {error}"
+                                ))),
+                            };
+                            if let Err(error) = update_result {
+                                warn!("WebRTC audio capture update failed for {capture_udid}: {error}");
+                                capture = None;
+                            }
+                            continue;
+                        }
+                        let tx = sample_tx.clone();
+                        match task::spawn_blocking(move || SimulatorAudioCapture::start(&process_ids, tx)).await {
+                            Ok(Ok(new_capture)) => {
+                                capture = Some(new_capture);
+                            }
+                            Ok(Err(error)) => {
+                                warn!("WebRTC audio capture unavailable for {capture_udid}: {error}");
+                            }
+                            Err(error) => {
+                                warn!("WebRTC audio capture task failed for {capture_udid}: {error}");
+                            }
+                        }
+                    }
+                }
+            }
+        });
+        let mut silence = time::interval(WEBRTC_AUDIO_FRAME_DURATION);
+        silence.set_missed_tick_behavior(time::MissedTickBehavior::Delay);
+        let mut last_audio_write_at: Option<Instant> = None;
+        loop {
+            tokio::select! {
+                _ = cancellation.recv() => break,
+                _ = silence.tick() => {
+                    if last_audio_write_at.is_some_and(|instant| instant.elapsed() < WEBRTC_AUDIO_SILENCE_TIMEOUT) {
+                        continue;
+                    }
+                    match write_webrtc_audio_sample(&audio_track, Bytes::from_static(WEBRTC_OPUS_SILENCE_PACKET)).await {
+                        Ok(true) => {
+                            last_audio_write_at = Some(Instant::now());
+                        }
+                        Ok(false) => {}
+                        Err(error) => {
+                            warn!("WebRTC audio write failed for {udid}: {error}");
+                            let _ = audio_stop_tx.send(());
+                            return;
+                        }
+                    }
+                }
+                sample = sample_rx.changed() => {
+                    if sample.is_err() {
+                        break;
+                    }
+                    let Some(sample) = sample_rx.borrow_and_update().clone() else {
+                        continue;
+                    };
+                    if sample.sample_rate != WEBRTC_AUDIO_SAMPLE_RATE || sample.channels != WEBRTC_AUDIO_CHANNELS {
+                        warn!(
+                            "Ignoring unexpected WebRTC Opus audio packet format for {udid}: {} Hz, {} channels",
+                            sample.sample_rate,
+                            sample.channels
+                        );
+                        continue;
+                    }
+                    match write_webrtc_audio_sample(&audio_track, sample.data.clone()).await {
+                        Ok(true) => {
+                            last_audio_write_at = Some(Instant::now());
+                        }
+                        Ok(false) => {}
+                        Err(error) => {
+                            warn!("WebRTC audio write failed for {udid}: {error}");
+                            let _ = audio_stop_tx.send(());
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+        let _ = audio_stop_tx.send(());
+    });
+}
+
+async fn write_webrtc_audio_sample(
+    audio_track: &TrackLocalStaticSample,
+    data: Bytes,
+) -> Result<bool, String> {
+    let sample = WebRtcSample {
+        data,
+        duration: WEBRTC_AUDIO_FRAME_DURATION,
+        ..Default::default()
+    };
+    match time::timeout(
+        WEBRTC_AUDIO_WRITE_TIMEOUT,
+        audio_track.write_sample(&sample),
+    )
+    .await
+    {
+        Ok(Ok(())) => Ok(true),
+        Ok(Err(error)) => Err(error.to_string()),
+        Err(_) => Ok(false),
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct HostAudioProcess {
+    pid: i32,
+    parent_pid: i32,
+    command: String,
+}
+
+async fn resolve_simulator_audio_process_ids(
+    state: AppState,
+    udid: &str,
+) -> Result<Vec<i32>, AppError> {
+    let udid = udid.to_owned();
+    task::spawn_blocking(move || simulator_audio_process_ids_blocking(&state, &udid))
+        .await
+        .map_err(|error| {
+            AppError::internal(format!(
+                "Failed to join audio process discovery task: {error}"
+            ))
+        })?
+}
+
+fn simulator_audio_process_ids_blocking(
+    state: &AppState,
+    udid: &str,
+) -> Result<Vec<i32>, AppError> {
+    let processes = list_host_audio_processes()?;
+    let root_processes = if android::is_android_id(udid) {
+        android_audio_root_process_ids(udid, &processes)?
+    } else {
+        let bridge = state.registry.bridge().clone();
+        let simulator = bridge
+            .simulator(udid)?
+            .ok_or_else(|| AppError::not_found(format!("Unknown simulator `{udid}`.")))?;
+        let data_path = simulator
+            .data_path
+            .as_str()
+            .filter(|value| !value.trim().is_empty())
+            .map(ToOwned::to_owned);
+        ios_simulator_audio_root_process_ids(udid, data_path.as_deref(), &processes)
+    };
+    Ok(process_tree_process_ids(&processes, root_processes))
+}
+
+fn list_host_audio_processes() -> Result<Vec<HostAudioProcess>, AppError> {
+    let output = Command::new("ps")
+        .args(["-axo", "pid=,ppid=,command="])
+        .output()
+        .map_err(|error| AppError::native(format!("Unable to list host processes: {error}")))?;
+    if !output.status.success() {
+        return Err(AppError::native("Unable to list host processes."));
+    }
+    Ok(String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .filter_map(parse_host_audio_process_line)
+        .collect())
+}
+
+fn parse_host_audio_process_line(line: &str) -> Option<HostAudioProcess> {
+    let trimmed = line.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+    let mut parts = trimmed.split_whitespace();
+    let pid = parts.next()?.parse::<i32>().ok()?;
+    let parent_pid = parts.next()?.parse::<i32>().ok()?;
+    let command = parts.collect::<Vec<_>>().join(" ");
+    if command.is_empty() {
+        return None;
+    }
+    Some(HostAudioProcess {
+        pid,
+        parent_pid,
+        command,
+    })
+}
+
+fn ios_simulator_audio_root_process_ids(
+    udid: &str,
+    data_path: Option<&str>,
+    processes: &[HostAudioProcess],
+) -> BTreeSet<i32> {
+    let device_path = data_path
+        .and_then(|path| path.strip_suffix("/data"))
+        .filter(|path| !path.is_empty());
+    processes
+        .iter()
+        .filter(|process| {
+            !is_simulator_audio_probe_process(&process.command)
+                && (process.command.contains(udid)
+                    || data_path.is_some_and(|path| process.command.contains(path))
+                    || device_path.is_some_and(|path| process.command.contains(path)))
+        })
+        .map(|process| process.pid)
+        .collect()
+}
+
+fn android_audio_root_process_ids(
+    udid: &str,
+    processes: &[HostAudioProcess],
+) -> Result<BTreeSet<i32>, AppError> {
+    let avd_name = android::avd_from_id(udid)?;
+    let avd_arg = format!("-avd {avd_name}");
+    let avd_at_arg = format!("@{avd_name}");
+    let avd_dir = format!(".android/avd/{avd_name}.avd");
+    Ok(processes
+        .iter()
+        .filter(|process| {
+            let command = process.command.as_str();
+            !is_simulator_audio_probe_process(command)
+                && (command.contains(&avd_arg)
+                    || command.contains(&avd_at_arg)
+                    || command.contains(&avd_dir))
+        })
+        .map(|process| process.pid)
+        .collect())
+}
+
+fn process_tree_process_ids(processes: &[HostAudioProcess], roots: BTreeSet<i32>) -> Vec<i32> {
+    let mut by_parent: HashMap<i32, Vec<i32>> = HashMap::new();
+    for process in processes {
+        by_parent
+            .entry(process.parent_pid)
+            .or_default()
+            .push(process.pid);
+    }
+
+    let mut selected = roots;
+    let mut stack = selected.iter().copied().collect::<Vec<_>>();
+    while let Some(parent_pid) = stack.pop() {
+        if let Some(children) = by_parent.get(&parent_pid) {
+            for child_pid in children {
+                if selected.insert(*child_pid) {
+                    stack.push(*child_pid);
+                }
+            }
+        }
+    }
+    selected.into_iter().collect()
+}
+
+fn is_simulator_audio_probe_process(command: &str) -> bool {
+    let executable = command
+        .split_whitespace()
+        .next()
+        .and_then(|value| value.rsplit('/').next())
+        .unwrap_or_default();
+    executable == "simctl"
+        || executable == "xcrun" && command.contains(" simctl ")
+        || executable == "ps"
+}
+
 #[derive(Clone)]
 pub(crate) struct AndroidWebRtcSource {
     inner: Arc<AndroidWebRtcSourceInner>,
@@ -1904,6 +2456,7 @@ struct WebRtcMediaStream {
     first_frame: SharedFrame,
     peer_connection: Arc<webrtc::peer_connection::RTCPeerConnection>,
     video_track: Arc<TrackLocalStaticRTP>,
+    audio_track: Option<Arc<TrackLocalStaticSample>>,
     cancellation_token: broadcast::Sender<()>,
     cancellation: broadcast::Receiver<()>,
     stream_control_rx: mpsc::UnboundedReceiver<WebRtcStreamCommand>,
@@ -1915,6 +2468,7 @@ struct WebRtcRgbaStream {
     udid: String,
     peer_connection: Arc<webrtc::peer_connection::RTCPeerConnection>,
     rgba_channel: Arc<RTCDataChannel>,
+    audio_track: Option<Arc<TrackLocalStaticSample>>,
     cancellation_token: broadcast::Sender<()>,
     cancellation: broadcast::Receiver<()>,
     stream_control_rx: mpsc::UnboundedReceiver<WebRtcStreamCommand>,
@@ -1928,6 +2482,7 @@ impl WebRtcRgbaStream {
             udid,
             peer_connection,
             rgba_channel,
+            audio_track,
             cancellation_token,
             mut cancellation,
             mut stream_control_rx,
@@ -1937,6 +2492,14 @@ impl WebRtcRgbaStream {
         let mut peer_disconnected_since: Option<time::Instant> = None;
         let mut sequence = 0u64;
         let _guard = WebRtcMetricsGuard::new(state.metrics.clone());
+        if let Some(audio_track) = audio_track {
+            spawn_simulator_audio_stream(
+                state.clone(),
+                udid.clone(),
+                audio_track,
+                cancellation_token.subscribe(),
+            );
+        }
         rgba_channel.on_open(Box::new({
             let udid = udid.clone();
             move || {
@@ -2048,6 +2611,7 @@ impl WebRtcMediaStream {
             first_frame,
             peer_connection,
             video_track,
+            audio_track,
             cancellation_token,
             mut cancellation,
             mut stream_control_rx,
@@ -2067,6 +2631,14 @@ impl WebRtcMediaStream {
         let mut waiting_for_keyframe = false;
         let mut peer_disconnected_since: Option<time::Instant> = None;
         let _guard = WebRtcMetricsGuard::new(state.metrics.clone());
+        if let Some(audio_track) = audio_track {
+            spawn_simulator_audio_stream(
+                state.clone(),
+                udid.clone(),
+                audio_track,
+                cancellation_token.subscribe(),
+            );
+        }
         let first_frame_duration = send_timing.duration_for(&first_frame, realtime_stream);
 
         match write_frame_sample_with_timeout(
@@ -2645,10 +3217,12 @@ mod tests {
     use super::{
         android_rgba_webrtc_frame_chunks, append_avcc_parameter_sets, append_length_prefixed_nalus,
         h264_annex_b_sample, h264_frame_has_idr, h264_frame_is_decoder_sync, h264_sdp_fmtp_line,
-        is_annex_b, is_h264_codec, rtcp_packet_requests_keyframe, rtp_packet_pacing,
-        WebRtcMetricsGuard, WebRtcSendTiming, ANDROID_WEBRTC_RGBA_CHUNK_BYTES,
-        ANDROID_WEBRTC_RGBA_CHUNK_HEADER_BYTES, ANDROID_WEBRTC_RGBA_CHUNK_MAGIC,
-        ANDROID_WEBRTC_RGBA_FORMAT_RGBA8888, ANDROID_WEBRTC_RGBA_VERSION, ANNEX_B_START_CODE,
+        is_annex_b, is_h264_codec, opus_audio_codec_capability, rtcp_packet_requests_keyframe,
+        rtp_packet_pacing, sdp_has_media_type, WebRtcMetricsGuard, WebRtcSendTiming,
+        ANDROID_WEBRTC_RGBA_CHUNK_BYTES, ANDROID_WEBRTC_RGBA_CHUNK_HEADER_BYTES,
+        ANDROID_WEBRTC_RGBA_CHUNK_MAGIC, ANDROID_WEBRTC_RGBA_FORMAT_RGBA8888,
+        ANDROID_WEBRTC_RGBA_VERSION, ANNEX_B_START_CODE, WEBRTC_AUDIO_CHANNELS,
+        WEBRTC_AUDIO_SAMPLE_RATE, WEBRTC_OPUS_SILENCE_PACKET,
     };
     use crate::android;
     use crate::metrics::counters::Metrics;
@@ -2705,6 +3279,110 @@ mod tests {
         assert!(!rtcp_packet_requests_keyframe(&SenderReport::default()));
     }
 
+    #[test]
+    fn detects_audio_m_lines_in_browser_offers() {
+        assert!(sdp_has_media_type(
+            "v=0\r\nm=audio 9 UDP/TLS/RTP/SAVPF 0\r\nm=video 9 UDP/TLS/RTP/SAVPF 96\r\n",
+            "audio"
+        ));
+        assert!(!sdp_has_media_type(
+            "v=0\r\nm=video 9 UDP/TLS/RTP/SAVPF 96\r\n",
+            "audio"
+        ));
+    }
+
+    #[test]
+    fn opus_audio_codec_uses_browser_native_wideband_settings() {
+        let codec = opus_audio_codec_capability();
+
+        assert_eq!(codec.mime_type, "audio/opus");
+        assert_eq!(codec.clock_rate, WEBRTC_AUDIO_SAMPLE_RATE);
+        assert_eq!(codec.channels, WEBRTC_AUDIO_CHANNELS);
+        assert!(codec.sdp_fmtp_line.contains("stereo=1"));
+        assert!(codec.sdp_fmtp_line.contains("useinbandfec=1"));
+    }
+
+    #[test]
+    fn opus_silence_packet_uses_real_low_bitrate_audio_frame() {
+        assert_eq!(WEBRTC_OPUS_SILENCE_PACKET.len(), 12);
+        assert_ne!(WEBRTC_OPUS_SILENCE_PACKET, &[0xF8, 0xFF, 0xFE]);
+    }
+
+    #[test]
+    fn parses_host_audio_process_lines_with_commands_containing_spaces() {
+        assert_eq!(
+            super::parse_host_audio_process_line("  42     1 /tmp/My App.app/My App --flag value"),
+            Some(super::HostAudioProcess {
+                pid: 42,
+                parent_pid: 1,
+                command: "/tmp/My App.app/My App --flag value".to_owned(),
+            })
+        );
+    }
+
+    #[test]
+    fn ios_audio_process_discovery_includes_device_descendants() {
+        let processes = vec![
+            super::HostAudioProcess {
+                pid: 10,
+                parent_pid: 1,
+                command: "/Library/Developer/CoreSimulator/Profiles/Runtimes/iOS.simruntime/Contents/Resources/RuntimeRoot/usr/libexec/launchd_sim /Users/me/Library/Developer/CoreSimulator/Devices/UDID-1/data"
+                    .to_owned(),
+            },
+            super::HostAudioProcess {
+                pid: 11,
+                parent_pid: 10,
+                command: "/Applications/Fixture.app/Fixture".to_owned(),
+            },
+            super::HostAudioProcess {
+                pid: 12,
+                parent_pid: 1,
+                command: "/usr/bin/xcrun simctl spawn UDID-1 launchctl print user/501"
+                    .to_owned(),
+            },
+        ];
+        let roots = super::ios_simulator_audio_root_process_ids(
+            "UDID-1",
+            Some("/Users/me/Library/Developer/CoreSimulator/Devices/UDID-1/data"),
+            &processes,
+        );
+
+        assert_eq!(
+            super::process_tree_process_ids(&processes, roots),
+            vec![10, 11]
+        );
+    }
+
+    #[test]
+    fn android_audio_process_discovery_includes_emulator_descendants() {
+        let processes = vec![
+            super::HostAudioProcess {
+                pid: 20,
+                parent_pid: 1,
+                command:
+                    "/Users/me/Library/Android/sdk/emulator/emulator -avd Pixel_8_API_36 -no-window"
+                        .to_owned(),
+            },
+            super::HostAudioProcess {
+                pid: 21,
+                parent_pid: 20,
+                command: "qemu-system-aarch64 -some-child-arg".to_owned(),
+            },
+            super::HostAudioProcess {
+                pid: 22,
+                parent_pid: 1,
+                command: "/Users/me/Library/Android/sdk/emulator/emulator -avd Other".to_owned(),
+            },
+        ];
+        let roots =
+            super::android_audio_root_process_ids("android:Pixel_8_API_36", &processes).unwrap();
+
+        assert_eq!(
+            super::process_tree_process_ids(&processes, roots),
+            vec![20, 21]
+        );
+    }
+
     #[test]
     fn realtime_h264_advertises_retransmission_feedback() {
         let feedback = super::h264_rtcp_feedback();
diff --git a/scripts/e2e-webrtc-reliability.mjs b/scripts/e2e-webrtc-reliability.mjs
index 6d161d1c..de3f500b 100644
--- a/scripts/e2e-webrtc-reliability.mjs
+++ b/scripts/e2e-webrtc-reliability.mjs
@@ -28,6 +28,9 @@ const maxInteractionLatencyMs = Number(
   process.env.SIMDECK_E2E_MAX_INTERACTION_LATENCY_MS ?? 750,
 );
 const interactionsEnabled = process.env.SIMDECK_E2E_INTERACTIONS !== "0";
+const audioEnabled = process.env.SIMDECK_E2E_ENABLE_AUDIO === "1";
+const capturePeerSnapshot =
+  process.env.SIMDECK_E2E_CAPTURE_PEER_SNAPSHOT === "1";
 const maxPeerDisconnectedMs = Number(
   process.env.SIMDECK_E2E_MAX_PEER_DISCONNECTED_MS ?? 1000,
 );
@@ -41,6 +44,7 @@ const minVideoHeight = Number(process.env.SIMDECK_E2E_MIN_VIDEO_HEIGHT ?? 0);
 const minDecodedFps = Number(process.env.SIMDECK_E2E_MIN_DECODED_FPS ?? 0);
 const minPresentedFps = Number(process.env.SIMDECK_E2E_MIN_PRESENTED_FPS ?? 0);
 const minReceivedFps = Number(process.env.SIMDECK_E2E_MIN_RECEIVED_FPS ?? 0);
+const minAudioPackets = Number(process.env.SIMDECK_E2E_MIN_AUDIO_PACKETS ?? 0);
 const visualSampleIntervalMs = Number(
   process.env.SIMDECK_E2E_VISUAL_SAMPLE_INTERVAL_MS ?? 5000,
 );
@@ -559,12 +563,20 @@ try {
   if (warmupMs > 0) {
     await sleep(warmupMs);
   }
+  let streamAudioEnabled = false;
+  if (audioEnabled) {
+    streamAudioEnabled = await enableStreamAudio(cdp);
+    await sleep(500);
+  }
 
   const initialMetrics = await fetchJson(endpoint("/api/metrics"));
   const initialStreams = findClientStreams(initialMetrics, clientId);
   const initialPage = latestByKind(initialStreams, "page") ?? {};
   const initialWebRtc = latestByKind(initialStreams, "webrtc") ?? {};
   const directStatsStart = await collectDirectWebRtcStats(cdp);
+  const peerConnectionStart = capturePeerSnapshot
+    ? await collectPeerConnectionSnapshot(cdp)
+    : [];
   let maxObservedFrameGapMs = 0;
   let maxObservedDecodeQueue = 0;
 
@@ -652,6 +664,9 @@ try {
   const finalPage = latestByKind(finalStreams, "page") ?? {};
   const finalWebRtc = latestByKind(finalStreams, "webrtc") ?? {};
   const directStatsEnd = await collectDirectWebRtcStats(cdp);
+  const peerConnectionEnd = capturePeerSnapshot
+    ? await collectPeerConnectionSnapshot(cdp)
+    : [];
   const failures = [];
 
   const renderedDelta =
@@ -673,6 +688,8 @@ try {
     directStatsEnd.packetsReceived - directStatsStart.packetsReceived;
   const directPresentedDelta =
     directStatsEnd.totalVideoFrames - directStatsStart.totalVideoFrames;
+  const directAudioPacketsDelta =
+    directStatsEnd.audioPacketsReceived - directStatsStart.audioPacketsReceived;
   const observedDurationSeconds = Math.max(
     0.001,
     (directStatsEnd.timestampMs - directStatsStart.timestampMs) / 1000,
@@ -738,6 +755,11 @@ try {
       `received packet fps ${receivedFps.toFixed(2)} did not meet minimum ${minReceivedFps}`,
     );
   }
+  if (minAudioPackets > 0 && directAudioPacketsDelta < minAudioPackets) {
+    failures.push(
+      `audio RTP packets ${directAudioPacketsDelta} did not meet minimum ${minAudioPackets}`,
+    );
+  }
   if (maxPeerDisconnectedObservedMs > maxPeerDisconnectedMs) {
     failures.push(
       `peer disconnected for ${maxPeerDisconnectedObservedMs}ms, exceeded ${maxPeerDisconnectedMs}ms`,
@@ -794,6 +816,7 @@ try {
     initialPage,
     initialWebRtc,
     observedDurationSeconds,
+    ...(capturePeerSnapshot ? { peerConnectionEnd, peerConnectionStart } : {}),
     decodedFps,
     presentedFps,
     receivedFps,
@@ -803,8 +826,11 @@ try {
     maxPeerDisconnectedObservedMs,
     maxInteractionLatencyMs,
     maxDecoderDrops,
+    minAudioPackets,
     warmupMs,
     interactionsEnabled,
+    audioEnabled,
+    streamAudioEnabled,
     visualSamplesEnabled,
     interactionLatencies,
     presentedInteractionLatencies,
@@ -823,6 +849,7 @@ try {
     renderedDelta,
     decodedDelta,
     receivedDelta,
+    directAudioPacketsDelta,
     droppedDelta,
     reconnectDelta,
     streams: finalStreams.map((stream) => ({
@@ -853,6 +880,27 @@ try {
   await rm(profileDir, { force: true, recursive: true });
 }
 
+async function enableStreamAudio(cdp) {
+  return evaluate(
+    cdp,
+    `
+    (async () => {
+      const menuButton = [...document.querySelectorAll("button")]
+        .find((button) => button.title === "Open menu" || button.getAttribute("aria-label") === "Open menu" || button.textContent?.trim() === "Open menu");
+      menuButton?.click();
+      await new Promise((resolve) => requestAnimationFrame(() => resolve()));
+      const soundLabel = [...document.querySelectorAll("label")]
+        .find((label) => label.textContent?.trim() === "Sound");
+      const input = soundLabel?.querySelector("input[type='checkbox']");
+      if (input && !input.checked) {
+        input.click();
+      }
+      return Boolean(input?.checked);
+    })()
+  `,
+  );
+}
+
 async function writeSummary(summary) {
   if (!outputJsonPath) {
     return;
@@ -973,6 +1021,12 @@ async function collectDirectWebRtcStats(cdp) {
     `
     (async () => {
       const totals = {
+        audioBytesReceived: 0,
+        audioConcealedSamples: 0,
+        audioJitter: 0,
+        audioPacketsLost: 0,
+        audioPacketsReceived: 0,
+        audioSilentConcealedSamples: 0,
         framesDecoded: 0,
       framesDropped: 0,
       jitter: 0,
@@ -986,6 +1040,14 @@ async function collectDirectWebRtcStats(cdp) {
       for (const pc of window.__simdeckPeerConnections || []) {
         const reports = await pc.getStats();
         for (const report of reports.values()) {
+          if (report.type === "inbound-rtp" && (report.kind === "audio" || report.mediaType === "audio")) {
+            totals.audioBytesReceived += report.bytesReceived || 0;
+            totals.audioConcealedSamples += report.concealedSamples || 0;
+            totals.audioJitter = Math.max(totals.audioJitter, report.jitter || 0);
+            totals.audioPacketsLost += report.packetsLost || 0;
+            totals.audioPacketsReceived += report.packetsReceived || 0;
+            totals.audioSilentConcealedSamples += report.silentConcealedSamples || 0;
+          }
           if (report.type === "inbound-rtp" && (report.kind === "video" || report.mediaType === "video")) {
             totals.framesDecoded += report.framesDecoded || 0;
             totals.framesDropped += report.framesDropped || 0;
@@ -1007,6 +1069,66 @@ async function collectDirectWebRtcStats(cdp) {
   );
 }
 
+async function collectPeerConnectionSnapshot(cdp) {
+  return evaluate(
+    cdp,
+    `
+    (() => {
+      const sectionSummaries = (sdp) => String(sdp || "")
+        .split(/\\r?\\nm=/)
+        .map((section, index) => {
+          const text = index === 0 ? section : "m=" + section;
+          const lines = text.split(/\\r?\\n/).filter(Boolean);
+          return lines.filter((line) =>
+            line.startsWith("m=") ||
+            line.startsWith("a=mid:") ||
+            line === "a=sendonly" ||
+            line === "a=recvonly" ||
+            line === "a=sendrecv" ||
+            line === "a=inactive" ||
+            line.startsWith("a=rtpmap:") ||
+            line.startsWith("a=fmtp:") ||
+            line.startsWith("a=ssrc:") ||
+            line.startsWith("a=msid:")
+          );
+        });
+      return (window.__simdeckPeerConnections || []).map((pc) => ({
+        connectionState: pc.connectionState,
+        iceConnectionState: pc.iceConnectionState,
+        localDescription: {
+          sections: sectionSummaries(pc.localDescription?.sdp || ""),
+          type: pc.localDescription?.type || "",
+        },
+        remoteDescription: {
+          sections: sectionSummaries(pc.remoteDescription?.sdp || ""),
+          type: pc.remoteDescription?.type || "",
+        },
+        signalingState: pc.signalingState,
+        transceivers: pc.getTransceivers().map((transceiver) => ({
+          currentDirection: transceiver.currentDirection || "",
+          direction: transceiver.direction,
+          mid: transceiver.mid,
+          receiverTrack: transceiver.receiver?.track ? {
+            enabled: transceiver.receiver.track.enabled,
+            id: transceiver.receiver.track.id,
+            kind: transceiver.receiver.track.kind,
+            muted: transceiver.receiver.track.muted,
+            readyState: transceiver.receiver.track.readyState,
+          } : null,
+          senderTrack: transceiver.sender?.track ? {
+            enabled: transceiver.sender.track.enabled,
+            id: transceiver.sender.track.id,
+            kind: transceiver.sender.track.kind,
+            muted: transceiver.sender.track.muted,
+            readyState: transceiver.sender.track.readyState,
+          } : null,
+        })),
+      }));
+    })()
+  `,
+  );
+}
+
 async function waitForDecodedFrameAfterInteraction(
   cdp,
   baselineFramesDecoded,