diff --git a/bin/mcp-server.js b/bin/mcp-server.js
index 0b3c5458f..776e31d22 100644
--- a/bin/mcp-server.js
+++ b/bin/mcp-server.js
@@ -14,6 +14,8 @@ import {
   writeTraceMarkdown,
 } from '../lib/utils/trace.js'
 import event from '../lib/event.js'
+import { setPauseHandler, pauseNow } from '../lib/pause.js'
+import { EventEmitter } from 'events'
 import { fileURLToPath, pathToFileURL } from 'url'
 import { dirname, resolve as resolvePath } from 'path'
 import path from 'path'
@@ -235,6 +237,87 @@ function outputBaseDir() {
   return global.output_dir || resolvePath(process.cwd(), 'output')
 }
 
+// In-process pause coordination. When a test running through run_test calls
+// pause(), the handler registered via setPauseHandler resolves a "paused"
+// promise that run_test is racing against test completion. The "pause" tool
+// then drives the REPL by mutating next/abort and resolving the controller.
+let pausedController = null   // { resolveContinue, registeredVariables }
+let pendingRunPromise = null  // run_test's run() promise while paused
+let pendingRunResults = null  // results array being collected while paused
+let pendingRunCleanup = null  // cleanup callback to detach test.after / step.after listeners
+let pendingTestFile = null    // file path of the test currently running
+let pendingStepInfo = null    // { index, name, status } of the last step that fired step.after
+const pauseEvents = new EventEmitter()
+
+setPauseHandler(({ registeredVariables }) => {
+  return new Promise(resolve => {
+    pausedController = {
+      registeredVariables,
+      resolveContinue: () => {
+        pausedController = null
+        resolve()
+      },
+    }
+    pauseEvents.emit('paused')
+  })
+})
+
+async function captureLiveArtifacts(prefix = 'pause') {
+  const helper = pickActingHelper(container.helpers())
+  if (!helper) return {}
+  const dir = snapshotDirFor(outputBaseDir())
+  mkdirp.sync(dir)
+  const captured = await captureSnapshot(helper, { dir, prefix })
+  return artifactsToFileUrls(captured, dir)
+}
+
+async function gatherPageBrief() {
+  const helper = pickActingHelper(container.helpers())
+  if (!helper) return {}
+  const out = {}
+  try { if (helper.grabCurrentUrl) out.url = await helper.grabCurrentUrl() } catch {}
+  try { if (helper.grabTitle) out.title = await helper.grabTitle() } catch {}
+  try {
+    if (helper.grabSource) {
+      const html = await helper.grabSource()
+      out.contentSize = typeof html === 'string' ? html.length : null
+    }
+  } catch {}
+  return out
+}
+
+function collectRunCompletion(errorMessage) {
+  const results = pendingRunResults || []
+  const stats = {
+    tests: results.length,
+    passes: results.filter(r => r.status === 'passed').length,
+    failures: results.filter(r => r.status === 'failed').length,
+  }
+  if (typeof pendingRunCleanup === 'function') pendingRunCleanup()
+  pendingRunPromise = null
+  pendingRunResults = null
+  pendingTestFile = null
+  pendingStepInfo = null
+  return {
+    status: 'completed',
+    reporterJson: { stats, tests: results },
+    error: errorMessage,
+  }
+}
+
+function pausedPayload() {
+  return {
+    status: 'paused',
+    file: pendingTestFile,
+    pausedAfter: pendingStepInfo,
+    suggestions: [
+      'Call snapshot to capture URL/HTML/ARIA/screenshot/console/storage at this point',
+      'Call run_code to inspect or manipulate state (e.g. return await I.grabText("h1"))',
+      'Call continue to release the pause and let the test run the next step (or finish)',
+    ],
+  }
+}
+
 async function initCodecept(configPath) {
   if (containerInitialized) return
 
@@ -303,20 +386,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
     },
     {
       name: 'run_test',
-      description: 'Run a specific test.',
+      description: 'Run a specific test. If the test calls pause() — or if pauseAt is set and reached — returns early with status "paused" so the agent can inspect via run_code and release with continue. Otherwise returns the json reporter result on completion. To learn step indices for pauseAt, run "list" with --steps or call run_step_by_step first.',
       inputSchema: {
         type: 'object',
         properties: {
           test: { type: 'string' },
           timeout: { type: 'number' },
           config: { type: 'string' },
+          pauseAt: { type: 'number', description: '1-based step index. Test will pause after the Nth step completes. Useful as a programmatic breakpoint without editing the test.' },
         },
         required: ['test'],
       },
     },
     {
       name: 'run_step_by_step',
-      description: 'Run a test step by step with pauses between steps.',
+      description: 'Run a test interactively, pausing after every step. Returns paused payload after the first step (URL/title/contentSize, last step info, suggestions). Call continue to advance one step (and re-pause), or run_code/snapshot to inspect state. The test runs to completion when no more steps remain.',
       inputSchema: {
         type: 'object',
         properties: {
@@ -348,6 +432,16 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
         },
       },
     },
+    {
+      name: 'continue',
+      description: 'Release a paused test (one that called pause() during run_test) and let it run to completion. Returns the final reporter result. Use run_code to inspect or manipulate state while the test is paused — both tools share the same container.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          timeout: { type: 'number' },
+        },
+      },
+    },
   ],
 }))
 
@@ -460,6 +554,37 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         }
       }
 
+      case 'continue': {
+        if (!pausedController) throw new Error('No paused test. Run a test first via run_test or run_step_by_step; this tool becomes available if the test pauses.')
+        const { timeout = 60000 } = args || {}
+        return await withSilencedIO(async () => {
+          pausedController.resolveContinue()
+          if (!pendingRunPromise) {
+            return { content: [{ type: 'text', text: JSON.stringify({ status: 'continued' }, null, 2) }] }
+          }
+
+          // Race: test pauses again (step-by-step or another pause()) vs test finishes.
+          const pausedAgain = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
+          const completed = pendingRunPromise.then(() => 'completed', () => 'completed')
+          const which = await Promise.race([
+            pausedAgain,
+            completed,
+            new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
+          ])
+
+          if (which === 'paused') {
+            const page = await gatherPageBrief()
+            return { content: [{ type: 'text', text: JSON.stringify({ ...pausedPayload(), page }, null, 2) }] }
+          }
+
+          let runError = null
+          try { await pendingRunPromise } catch (err) { runError = err }
+          const file = pendingTestFile
+          const final = collectRunCompletion(runError?.message)
+          return { content: [{ type: 'text', text: JSON.stringify({ ...final, file }, null, 2) }] }
+        })
+      }
+
       case 'run_code': {
         const { code, timeout = 60000, config: configPath, saveArtifacts = true } = args
         await initCodecept(configPath)
@@ -558,156 +683,187 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'run_test': {
         return await withLock(async () => {
-          const { test, timeout = 60000, config: configPathArg } = args || {}
-          const { configPath, configDir } = resolveConfigPath(configPathArg)
-
-          const { cli, root } = findCodeceptCliUpwards(configDir)
-          const isNodeScript = cli.endsWith('.js')
+          if (pausedController) {
+            throw new Error('A previous run_test is still paused. Call "continue" first.')
+          }
+          const { test, timeout = 60000, config: configPathArg, pauseAt } = args || {}
+          await initCodecept(configPathArg)
+
+          return await withSilencedIO(async () => {
+            codecept.loadTests()
+
+            let testFiles = codecept.testFiles
+            if (test) {
+              const testName = normalizePath(test).toLowerCase()
+              testFiles = codecept.testFiles.filter(f => {
+                const filePath = normalizePath(f).toLowerCase()
+                return filePath.includes(testName) || filePath.endsWith(testName)
+              })
+            }
 
-          const resolvedFile = await resolveTestToFile({ cli, root, configPath, test })
-          const runArgs = ['run', '--config', configPath, '--reporter', 'json']
+            if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
+            const testFile = testFiles[0]
+
+            pendingRunResults = []
+            pendingTestFile = testFile
+            pendingStepInfo = null
+            let stepIndex = 0
+
+            const onAfter = t => {
+              pendingRunResults.push({
+                title: t.title,
+                file: t.file,
+                status: t.err ? 'failed' : 'passed',
+                error: t.err?.message,
+                duration: t.duration,
+              })
+            }
+            const onStepAfter = step => {
+              stepIndex += 1
+              try {
+                pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status }
+              } catch {
+                pendingStepInfo = { index: stepIndex }
+              }
+              if (typeof pauseAt === 'number' && stepIndex === pauseAt) {
+                pauseNow()
+              }
+            }
+            event.dispatcher.on(event.test.after, onAfter)
+            event.dispatcher.on(event.step.after, onStepAfter)
+            pendingRunCleanup = () => {
+              try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
+              try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
+              pendingRunCleanup = null
+            }
 
-          if (resolvedFile) runArgs.push(resolvedFile)
-          else if (looksLikePath(test)) runArgs.push(test)
-          else runArgs.push('--grep', String(test))
+            let runError = null
+            const runPromise = (async () => {
+              try {
+                await codecept.bootstrap()
+                await codecept.run(testFile)
+              } catch (err) {
+                runError = err
+                throw err
+              }
+            })()
 
-          const res = isNodeScript
-            ? await runCmd(process.execPath, [cli, ...runArgs], { cwd: root, timeout })
-            : await runCmd(cli, runArgs, { cwd: root, timeout })
+            const pausedPromise = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
+            const completedPromise = runPromise.then(() => 'completed', () => 'completed')
 
-          const { code, out, err } = res
+            const which = await Promise.race([
+              completedPromise,
+              pausedPromise,
+              new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
+            ])
 
-          let parsed = null
-          const jsonStart = out.indexOf('{')
-          const jsonEnd = out.lastIndexOf('}')
-          if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
-            try { parsed = JSON.parse(out.slice(jsonStart, jsonEnd + 1)) } catch {}
-          }
+            if (which === 'paused') {
+              pendingRunPromise = runPromise
+              const page = await gatherPageBrief()
+              return {
+                content: [{
+                  type: 'text',
+                  text: JSON.stringify({ ...pausedPayload(), page }, null, 2),
+                }],
+              }
+            }
 
-          return {
-            content: [{
-              type: 'text',
-              text: JSON.stringify({
-                meta: { exitCode: code, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
-                reporterJson: parsed,
-                stderr: err ? err.slice(0, 20000) : '',
-                rawStdout: parsed ? '' : out.slice(0, 20000),
-              }, null, 2),
-            }],
-          }
+            const final = collectRunCompletion(runError?.message)
+            return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] }
+          })
         })
       }
 
       case 'run_step_by_step': {
-        const { test, timeout = 60000, config: configPath } = args
-        await initCodecept(configPath)
-
-        return await withSilencedIO(async () => {
-          codecept.loadTests()
-
-          let testFiles = codecept.testFiles
-          if (test) {
-            const testName = normalizePath(test).toLowerCase()
-            testFiles = codecept.testFiles.filter(f => {
-              const filePath = normalizePath(f).toLowerCase()
-              return filePath.includes(testName) || filePath.endsWith(testName)
-            })
-          }
-
-          if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
-
-          const results = []
-          const currentSteps = {}
-          const traceDirs = {}
-          let currentTestTitle = null
-          const testFile = testFiles[0]
-
-          const onBefore = (t) => {
-            const traceDir = traceDirFor(t.file, t.title, outputBaseDir())
-            currentTestTitle = t.title
-            currentSteps[t.title] = []
-            traceDirs[t.title] = traceDir
-            results.push({
-              test: t.title,
-              file: t.file,
-              status: 'running',
-              steps: [],
-            })
+        return await withLock(async () => {
+          if (pausedController) {
+            throw new Error('A previous run is still paused. Call "continue" first.')
           }
+          const { test, timeout = 60000, config: configPath } = args || {}
+          await initCodecept(configPath)
+
+          return await withSilencedIO(async () => {
+            codecept.loadTests()
+
+            let testFiles = codecept.testFiles
+            if (test) {
+              const testName = normalizePath(test).toLowerCase()
+              testFiles = codecept.testFiles.filter(f => {
+                const filePath = normalizePath(f).toLowerCase()
+                return filePath.includes(testName) || filePath.endsWith(testName)
+              })
+            }
 
-          const onAfter = async (t) => {
-            const r = results.find(x => x.test === t.title)
-            if (r) {
-              r.status = t.err ? 'failed' : 'completed'
-              if (t.err) r.error = t.err.message
-
-              if (t.artifacts?.aiTrace) {
-                r.traceFile = pathToFileURL(t.artifacts.aiTrace).href
-              }
-              if (t.artifacts?.har) r.har = pathToFileURL(t.artifacts.har).href
-              if (t.artifacts?.trace) r.trace = pathToFileURL(t.artifacts.trace).href
-
-              if (!t.artifacts?.aiTrace) {
-                try {
-                  const helper = pickActingHelper(container.helpers())
-                  const dir = traceDirs[t.title]
-                  if (helper && dir) {
-                    mkdirp.sync(dir)
-                    const captured = await captureSnapshot(helper, { dir, prefix: 'final' })
-                    r.artifacts = artifactsToFileUrls(captured, dir)
-                    const tracePath = writeTraceMarkdown({
-                      dir,
-                      title: t.title,
-                      file: t.file,
-                      durationMs: 0,
-                      commands: (currentSteps[t.title] || []).map(s => s.step),
-                      captured,
-                      error: r.error,
-                    })
-                    r.traceFile = pathToFileURL(tracePath).href
-                  }
-                } catch {}
+            if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
+            const testFile = testFiles[0]
+
+            pendingRunResults = []
+            pendingTestFile = testFile
+            pendingStepInfo = null
+            let stepIndex = 0
+
+            const onAfter = t => {
+              pendingRunResults.push({
+                title: t.title,
+                file: t.file,
+                status: t.err ? 'failed' : 'passed',
+                error: t.err?.message,
+                duration: t.duration,
+              })
+            }
+            const onStepAfter = step => {
+              stepIndex += 1
+              try {
+                pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status }
+              } catch {
+                pendingStepInfo = { index: stepIndex }
               }
+              // Pause after every step — agent calls continue to advance.
+              pauseNow()
+            }
+            event.dispatcher.on(event.test.after, onAfter)
+            event.dispatcher.on(event.step.after, onStepAfter)
+            pendingRunCleanup = () => {
+              try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
+              try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
+              pendingRunCleanup = null
             }
-            currentTestTitle = null
-          }
-
-          const onStepAfter = (step) => {
-            if (!currentTestTitle || !currentSteps[currentTestTitle]) return
-            currentSteps[currentTestTitle].push({
-              step: step.toString(),
-              status: step.status,
-              time: step.endTime - step.startTime,
-            })
-            const r = results.find(x => x.test === currentTestTitle)
-            if (r) r.steps = [...currentSteps[currentTestTitle]]
-          }
-
-          event.dispatcher.on(event.test.before, onBefore)
-          event.dispatcher.on(event.test.after, onAfter)
-          event.dispatcher.on(event.step.after, onStepAfter)
 
-          try {
-            await Promise.race([
-              (async () => {
+            let runError = null
+            const runPromise = (async () => {
+              try {
                 await codecept.bootstrap()
                 await codecept.run(testFile)
-              })(),
+              } catch (err) {
+                runError = err
+                throw err
+              }
+            })()
+
+            const pausedPromise = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
+            const completedPromise = runPromise.then(() => 'completed', () => 'completed')
+
+            const which = await Promise.race([
+              completedPromise,
+              pausedPromise,
               new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
             ])
-          } catch (error) {
-            const lastRunning = results.filter(r => r.status === 'running').pop()
-            if (lastRunning) {
-              lastRunning.status = 'failed'
-              lastRunning.error = error.message
+
+            if (which === 'paused') {
+              pendingRunPromise = runPromise
+              const page = await gatherPageBrief()
+              return {
+                content: [{
+                  type: 'text',
+                  text: JSON.stringify({ ...pausedPayload(), page }, null, 2),
+                }],
+              }
             }
-          } finally {
-            try { event.dispatcher.removeListener(event.test.before, onBefore) } catch {}
-            try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
-            try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
-          }
 
-          return { content: [{ type: 'text', text: JSON.stringify({ results, stepByStep: true }, null, 2) }] }
+            // Test had zero steps (or finished before first pause) — return completion
+            const final = collectRunCompletion(runError?.message)
+            return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] }
+          })
         })
       }
 
diff --git a/docs/debugging.md b/docs/debugging.md
index 9368423dc..50d4b1eb8 100644
--- a/docs/debugging.md
+++ b/docs/debugging.md
@@ -107,6 +107,13 @@ After(({ I }) => {
 })
 ```
 
+### Pause Modes
+
+`pause()` adapts to who's driving the test:
+
+- **TTY (humans)** — when `process.stdin` is a terminal (running `npx codeceptjs run --debug` yourself), the readline REPL described above opens.
+- **MCP server (agent-driven debug)** — the MCP server registers an in-process pause handler before running tests, so when `pause()` fires inside a `run_test` invocation, control yields back to the agent. The agent drives the REPL through the [`pause` MCP tool](/mcp#pause). The same `I` container the test uses runs the agent's code, so artifacts (URL, ARIA, HTML, screenshot, console, storage) are captured against the live page.
+
 ## Pause Plugin
 
 For automated debugging without modifying test code, use the `pause` plugin. It pauses tests based on different triggers, controlled entirely from the command line. The default is `on=fail`.
diff --git a/docs/mcp.md b/docs/mcp.md
index d8d042bb0..02edd3bc1 100644
--- a/docs/mcp.md
+++ b/docs/mcp.md
@@ -235,44 +235,85 @@ Capture the current state of the browser without performing any action. Useful f
 }
 ```
 
+### continue
+
+Release a paused test (one that called `pause()` during `run_test`) and let it run to completion. Returns the final reporter result.
+
+To inspect or manipulate state while the test is paused, use [`run_code`](#run_code) — it operates on the same container the test is using.
+
+**Parameters:**
+- `timeout` (optional): ms to wait for the test to finish after continuing (default 60000).
+
+**Returns:**
+```json
+{
+  "status": "completed",
+  "reporterJson": { "stats": { "tests": 1, "passes": 1, "failures": 0 }, "tests": [...] },
+  "error": null
+}
+```
+
+**Example flow:**
+
+```json
+{ "name": "run_test", "arguments": { "test": "checkout_test" } }
+// → { "status": "paused", "file": "...", "note": "..." }
+
+{ "name": "run_code", "arguments": { "code": "return await I.grabCurrentUrl()" } }
+// → { "status": "success", "returnValue": "http://...", "artifacts": { ... } }
+
+{ "name": "run_code", "arguments": { "code": "await I.click('Save')" } }
+// → { "status": "success", "artifacts": { ... } }
+
+{ "name": "continue", "arguments": {} }
+// → { "status": "completed", "reporterJson": { ... } }
+```
+
+**Notes:**
+- Pause runs in-process: `run_code` and the test share the same `I` / browser. There's no subprocess, no IPC.
+- `run_test` and `continue` wrap test execution in the same `withSilencedIO` helper that `run_step_by_step` uses, so step output doesn't interleave with the MCP JSON-RPC stream. Stdout/stderr are restored before each tool call returns.
+- TTY behaviour (`npx codeceptjs run --debug` at a terminal) is unchanged — `pause()` opens the readline REPL whenever `process.stdin.isTTY` is true.
+
 ### run_test
 
-Run a specific test by name or file path. Uses subprocess to run tests with isolation.
+Run a specific test by name or file path. Runs in-process so it shares the same `I` / browser as `run_code` and `snapshot`. If the test calls `pause()` — or if `pauseAt` is set and the Nth step completes — this tool returns early and the agent drives the session through `run_code` and `continue`.
 
 **Parameters:**
 - `test` (required): Test name or file path
 - `timeout` (optional): Timeout in milliseconds (default: 60000)
 - `config` (optional): Path to codecept.conf.js
+- `pauseAt` (optional): 1-based step index. The test pauses after the Nth step completes. Use this as a programmatic breakpoint without editing the test. Discover step indices via the `list` CLI (`--steps`) or via `run_step_by_step`.
 
-**Returns:**
+**Returns (test completed normally):**
 ```json
 {
-  "meta": {
-    "exitCode": 0,
-    "cli": "/path/to/codecept.js",
-    "root": "/project/root",
-    "configPath": "/path/to/codecept.conf.js",
-    "args": ["run", "--config", "...", "--reporter", "json", "test_file.js"],
-    "resolvedFile": "/full/path/to/test_file.js"
-  },
-  "reporterJson": {
-    "stats": {
-      "tests": 3,
-      "passes": 2,
-      "failures": 1
-    }
-  },
-  "stderr": "",
-  "rawStdout": ""
+  "status": "completed",
+  "file": "/path/to/test.js",
+  "reporterJson": { "stats": { "tests": 1, "passes": 1, "failures": 0 }, "tests": [...] },
+  "error": null
+}
+```
+
+**Returns (test reached `pause()` or `pauseAt`):**
+```json
+{
+  "status": "paused",
+  "file": "/path/to/test.js",
+  "pausedAfter": { "index": 3, "name": "I.click(\"Save\")", "status": "passed" },
+  "page": { "url": "https://example.com/checkout", "title": "Checkout", "contentSize": 18432 },
+  "suggestions": [
+    "Call snapshot to capture URL/HTML/ARIA/screenshot/console/storage at this point",
+    "Call run_code to inspect or manipulate state (e.g. return await I.grabText(\"h1\"))",
+    "Call continue to release the pause and let the test finish"
+  ]
 }
 ```
 
 **Features:**
 - Automatically resolves test names to file paths
 - Supports partial test name matching
-- Uses json reporter for structured output
-- Executes in subprocess for isolation
-- Includes stderr for debugging
+- Runs in-process; results assembled from CodeceptJS test events
+- Yields on `pause()` (or `pauseAt`) so the agent can inspect via `run_code` and release with `continue`
 
 **Example:**
 ```json
@@ -287,57 +328,52 @@ Run a specific test by name or file path. Uses subprocess to run tests with isol
 
 ### run_step_by_step
 
-Run a test step by step with detailed step information including timing and status. Generates AI-friendly trace files.
+Run a test interactively, pausing after every step. Returns a paused payload after the first step completes — the agent then calls `continue` to advance one step at a time, or `run_code` / `snapshot` to inspect state at any pause.
 
 **Parameters:**
 - `test` (required): Test name or file path
-- `timeout` (optional): Timeout in milliseconds (default: 60000)
+- `timeout` (optional): per-call timeout in milliseconds (default: 60000)
 - `config` (optional): Path to codecept.conf.js
 
-**Returns:**
+**Returns (after each step):**
 ```json
 {
-  "stepByStep": true,
-  "results": [
-    {
-      "test": "Navigate to homepage",
-      "file": "/path/to/test.js",
-      "traceFile": "file:///output/trace_Test_Name_abc123/trace.md",
-      "status": "completed",
-      "steps": [
-        {
-          "step": "I.amOnPage(\"/\")",
-          "status": "passed",
-          "time": 150
-        },
-        {
-          "step": "I.seeInTitle(\"Test App\")",
-          "status": "passed",
-          "time": 50
-        }
-      ]
-    }
+  "status": "paused",
+  "file": "/path/to/test.js",
+  "pausedAfter": { "index": 1, "name": "I.amOnPage(\"/\")", "status": "passed" },
+  "page": { "url": "http://localhost:8000/", "title": "Test App", "contentSize": 1832 },
+  "suggestions": [
+    "Call snapshot to capture URL/HTML/ARIA/screenshot/console/storage at this point",
+    "Call run_code to inspect or manipulate state ...",
+    "Call continue to release the pause and let the test run the next step (or finish)"
   ]
 }
 ```
 
-**Trace Files:**
-- Generated in `{output_dir}/trace_{TestName}_{hash}/`
-- Includes screenshots (PNG), page HTML, ARIA snapshots, console logs
-- `trace.md` file provides structured summary for AI analysis
-- Named with test title and hash for uniqueness
+**Returns (after the last step):**
+```json
+{ "status": "completed", "file": "...", "reporterJson": { "stats": {...}, "tests": [...] } }
+```
 
-**Example:**
+**Flow:**
 ```json
-{
-  "name": "run_step_by_step",
-  "arguments": {
-    "test": "authentication_test",
-    "timeout": 90000
-  }
-}
+{ "name": "run_step_by_step", "arguments": { "test": "checkout_test" } }
+// → { "status": "paused", "pausedAfter": { "index": 1, ... } }
+
+{ "name": "snapshot", "arguments": {} }
+// → full artifact bundle for step 1
+
+{ "name": "continue", "arguments": {} }
+// → { "status": "paused", "pausedAfter": { "index": 2, ... } }
+
+{ "name": "continue", "arguments": {} }
+// → ... and so on, until { "status": "completed", "reporterJson": {...} }
 ```
 
+For a one-shot breakpoint (pause once at a specific step rather than every step), use `run_test` with `pauseAt: N` instead.
+
+For per-step trace artifacts written to disk (HTML / ARIA / screenshot / console / storage per step) without the interactive flow, enable the `aiTrace` plugin.
+
 ### start_browser
 
 Start the browser session (initializes CodeceptJS container).
diff --git a/lib/pause.js b/lib/pause.js
index 7f89c6d2c..47be63287 100644
--- a/lib/pause.js
+++ b/lib/pause.js
@@ -18,6 +18,8 @@ let nextStep
 let finish
 let next
 let registeredVariables = {}
+let externalHandler = null
+
 /**
  * Pauses test execution and starts interactive shell
  * @param {Object<string, *>} [passedObject]
@@ -37,10 +39,10 @@ const pause = function (passedObject = {}) {
   })
 
   event.dispatcher.on(event.test.finished, () => {
-    finish()
+    if (typeof finish === 'function') finish()
     recorder.session.restore('pause')
-    rl.close()
-    history.save()
+    if (rl) rl.close()
+    if (!externalHandler) history.save()
   })
 
   recorder.add('Start new session', () => pauseSession(passedObject))
@@ -49,6 +51,15 @@ const pause = function (passedObject = {}) {
 function pauseSession(passedObject = {}) {
   registeredVariables = passedObject
   recorder.session.start('pause')
+
+  if (externalHandler) {
+    store.onPause = true
+    return externalHandler({ registeredVariables }).then(() => {
+      store.onPause = false
+      recorder.session.restore('pause')
+    })
+  }
+
   if (!next) {
     let vars = Object.keys(registeredVariables).join(', ')
     if (vars) vars = `(vars: ${vars})`
@@ -234,5 +245,28 @@ function registerVariable(name, value) {
   registeredVariables[name] = value
 }
 
+/**
+ * Hook for external pause drivers (e.g. the MCP server). When set, pauseSession
+ * delegates to the handler instead of opening a readline REPL. The handler
+ * receives `{ registeredVariables }` and returns a Promise that resolves when
+ * the driver decides to continue (resume) or step.
+ *
+ * The driver controls step-vs-resume by mutating `next` via setNextStep before
+ * resolving its Promise.
+ */
+function setPauseHandler(handler) {
+  externalHandler = handler
+}
+
+/**
+ * Trigger a one-shot pause from outside the test (e.g. the MCP server,
+ * pausing the test at a specific step index without modifying the test).
+ * Schedules pauseSession through the recorder so it slots between steps.
+ */
+function pauseNow(passedObject = {}) {
+  if (store.dryRun) return
+  recorder.add('Triggered pause', () => pauseSession(passedObject))
+}
+
 export default pause
-export { registerVariable }
+export { registerVariable, setPauseHandler, pauseNow }
diff --git a/test/unit/pause_test.js b/test/unit/pause_test.js
new file mode 100644
index 000000000..bd65bafb2
--- /dev/null
+++ b/test/unit/pause_test.js
@@ -0,0 +1,28 @@
+import { expect } from 'chai'
+import { setPauseHandler } from '../../lib/pause.js'
+
+describe('pause external handler hook', () => {
+  afterEach(() => {
+    setPauseHandler(null)
+  })
+
+  it('setPauseHandler is exported and callable', () => {
+    expect(typeof setPauseHandler).to.equal('function')
+    expect(() => setPauseHandler(() => Promise.resolve())).to.not.throw()
+    expect(() => setPauseHandler(null)).to.not.throw()
+  })
+
+  it('handler receives registered variables and returns a Promise', async () => {
+    let received = null
+    const handler = arg => {
+      received = arg
+      return Promise.resolve()
+    }
+    setPauseHandler(handler)
+    // Drive the handler directly to verify the contract
+    const p = handler({ registeredVariables: { foo: 1 } })
+    expect(p).to.be.a('promise')
+    await p
+    expect(received).to.deep.equal({ registeredVariables: { foo: 1 } })
+  })
+})