diff --git a/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/index.ts b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/index.ts new file mode 100644 index 0000000..51615ac --- /dev/null +++ b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/index.ts @@ -0,0 +1,70 @@ +import type { PromptMetrics, ScenarioPrompt } from "../../../types/scenario"; +import withoutMcpLog from "./without-mcp.claude.log?raw"; +import withMcpLog from "./with-mcp.claude.log?raw"; +import { withoutMCP, withMCP } from "./timeline"; + +const metrics: PromptMetrics = { + withoutMCP: { timeSeconds: 1080, costUsd: 3.80, quality: 0.18, toolCalls: 85 }, + withMCP: { timeSeconds: 330, costUsd: 1.32, quality: 0.88, toolCalls: 31 }, +}; + +export const grpcTimeoutCrossRepoPrompt: ScenarioPrompt = { + id: "grpc-timeout-cross-repo", + label: "Cross-repo protocol comparison", + environment: "multi-repo", + text: "Trace how an RPC timeout propagates from a Go caller to the server in both grpc-go and connect-go: where does the client encode the context deadline onto the wire, how does the server parse and enforce it, and what is the key architectural difference in how the two libraries own protocol-specific timeout encoding?", + metrics, + withoutMCP, + withMCP, + logs: { + withoutMCP: withoutMcpLog, + withMCP: withMcpLog, + }, + qualityBreakdown: [ + { + dimension: "Task Quality", + weight: "40%", + definition: "Did it produce an accurate, source-verified comparison of both implementations?", + baseline: "0.20", + mcp: "1.0", + notes: + "Baseline: grpc-go side accurate from local source; connect-go side describes correct wire format (Connect-Timeout-Ms) but cannot verify SetTimeout() internals or the protocol interface abstraction — key architectural insight missed entirely. MCP: fully traced both implementations from source, identified the protocol-abstraction design difference.", + }, + { + dimension: "File Recall", + weight: "30%", + definition: "Did it identify the ground-truth files across both repos?", + baseline: "0.20", + mcp: "0.85", + notes: + "Ground truth: http2_client.go, handler_server.go, http_util.go, transport.go (grpc-go) + protocol.go, protocol_grpc.go, protocol_connect.go, handler.go (connect-go). Baseline: found 2/8 — only grpc-go transport files; connect-go was inaccessible. MCP: found 7/8; missed error.go wrapIfContextError().", + }, + { + dimension: "File Precision", + weight: "20%", + definition: "Did it avoid citing irrelevant files?", + baseline: "0.25", + mcp: "0.70", + notes: + "Baseline: the grpc-go files it did read were relevant, but multiple failed filesystem searches (GOPATH, find /, curl) added noise without contributing signal. MCP: stayed on-target; read a few extra utility files before narrowing.", + }, + { + dimension: "Cross-repo Coverage", + weight: "10%", + definition: "Did it cite verified source from both repos?", + baseline: "0.00", + mcp: "1.0", + notes: + "Baseline: connect-go source was physically unavailable — zero verified cross-repo coverage. MCP: cited exact line numbers from both github.com/grpc/grpc-go and github.com/connectrpc/connect-go.", + }, + { + dimension: "Composite", + weight: "100%", + definition: "Overall weighted reward", + baseline: "0.18", + mcp: "0.88", + notes: + "Baseline: scored on grpc-go alone; cross-repo coverage was zero. MCP: 5x improvement driven by discovering connect-go source and identifying the architectural difference that only becomes visible when reading both codebases together.", + }, + ], +}; diff --git a/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/timeline.ts b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/timeline.ts new file mode 100644 index 0000000..a7a5bbd --- /dev/null +++ b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/timeline.ts @@ -0,0 +1,414 @@ +import type { TimelineEvent } from "../../../types/scenario"; + +export const withoutMCP: TimelineEvent[] = [ + { + at: 300, + type: "assistant", + text: "I'll trace how gRPC and connect-go both encode context deadlines onto the wire. Let me start with the grpc-go transport layer.", + stream: true, + }, + { + at: 800, + type: "tool", + name: "execute_bash", + args: 'find /workspace/grpc-go/internal/transport -name "*.go" | grep -v test | sort', + status: "running", + }, + { + at: 1500, + type: "tool", + name: "execute_bash", + args: 'find /workspace/grpc-go/internal/transport -name "*.go" | grep -v test | sort', + status: "done", + }, + { + at: 1900, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/http2_client.go [540, 620]", + status: "running", + }, + { + at: 2700, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/http2_client.go [540, 620]", + status: "done", + }, + { + at: 3100, + type: "assistant", + text: "Found the client-side encoding: `ctx.Deadline()` → `grpcutil.EncodeDuration(timeout)` → `grpc-timeout` HPACK header. Now tracing the server-side decode.", + stream: true, + }, + { + at: 3700, + type: "tool", + name: "execute_bash", + args: 'grep -n "timeoutSet\\|grpc-timeout\\|DecodeDuration" /workspace/grpc-go/internal/transport/http2_server.go | head -20', + status: "running", + }, + { + at: 4500, + type: "tool", + name: "execute_bash", + args: 'grep -n "timeoutSet\\|grpc-timeout\\|DecodeDuration" /workspace/grpc-go/internal/transport/http2_server.go | head -20', + status: "done", + }, + { + at: 4900, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/handler_server.go [388, 405]", + status: "running", + }, + { + at: 5800, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/handler_server.go [388, 405]", + status: "done", + }, + { + at: 6200, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/http_util.go [155, 200]", + status: "running", + }, + { + at: 7000, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/http_util.go [155, 200]", + status: "done", + }, + { + at: 7500, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/transport.go [765, 778]", + status: "running", + }, + { + at: 8200, + type: "tool", + name: "str_replace_editor", + args: "view: /workspace/grpc-go/internal/transport/transport.go [765, 778]", + status: "done", + }, + { + at: 8600, + type: "assistant", + text: "grpc-go side fully traced. Now I need to find connect-go — the question asks me to compare both implementations.", + stream: true, + }, + { + at: 9200, + type: "tool", + name: "execute_bash", + args: 'find /workspace -maxdepth 3 -name "go.mod" | xargs grep -l "connectrpc" 2>/dev/null', + status: "running", + }, + { + at: 10000, + type: "tool", + name: "execute_bash", + args: 'find /workspace -maxdepth 3 -name "go.mod" | xargs grep -l "connectrpc" 2>/dev/null', + status: "done", + }, + { + at: 10400, + type: "assistant", + text: "connect-go not in workspace. Checking GOPATH module cache.", + stream: true, + }, + { + at: 10900, + type: "tool", + name: "execute_bash", + args: 'find $GOPATH/pkg/mod/connectrpc.com -maxdepth 4 -name "protocol_connect.go" 2>/dev/null | head -5', + status: "running", + }, + { + at: 12300, + type: "tool", + name: "execute_bash", + args: 'find $GOPATH/pkg/mod/connectrpc.com -maxdepth 4 -name "protocol_connect.go" 2>/dev/null | head -5', + status: "done", + }, + { + at: 12700, + type: "assistant", + text: "Module cache empty. Doing a broad filesystem search.", + stream: true, + }, + { + at: 13200, + type: "tool", + name: "execute_bash", + args: 'find / -name "protocol_connect.go" 2>/dev/null | grep connectrpc | head -3', + status: "running", + }, + { + at: 15600, + type: "tool", + name: "execute_bash", + args: 'find / -name "protocol_connect.go" 2>/dev/null | grep connectrpc | head -3', + status: "done", + }, + { + at: 16000, + type: "assistant", + text: "connect-go source unavailable anywhere on this machine. Checking whether grpc-go has any comments referencing the Connect protocol timeout header.", + stream: true, + }, + { + at: 16600, + type: "tool", + name: "execute_bash", + args: 'grep -rn "Connect-Timeout" /workspace/grpc-go/ 2>/dev/null | head -10', + status: "running", + }, + { + at: 17400, + type: "tool", + name: "execute_bash", + args: 'grep -rn "Connect-Timeout" /workspace/grpc-go/ 2>/dev/null | head -10', + status: "done", + }, + { + at: 17900, + type: "tool", + name: "execute_bash", + args: "grep -rn 'connectrpc' /workspace/grpc-go/go.sum 2>/dev/null | head -5", + status: "running", + }, + { + at: 18600, + type: "tool", + name: "execute_bash", + args: "grep -rn 'connectrpc' /workspace/grpc-go/go.sum 2>/dev/null | head -5", + status: "done", + }, + { + at: 19000, + type: "assistant", + text: "No references. Trying to fetch via GitHub API.", + stream: true, + }, + { + at: 19500, + type: "tool", + name: "execute_bash", + args: "curl -sf https://api.github.com/repos/connectrpc/connect-go/contents/protocol_connect.go | head -30", + status: "running", + }, + { + at: 21200, + type: "tool", + name: "execute_bash", + args: "curl -sf https://api.github.com/repos/connectrpc/connect-go/contents/protocol_connect.go | head -30", + status: "done", + }, + { + at: 21600, + type: "assistant", + text: "Network access unavailable. Writing analysis based on grpc-go source (fully verified) and the public Connect protocol spec for connect-go (implementation details inferred, not verified from source).", + stream: true, + }, + { + at: 22200, + type: "tool", + name: "str_replace_editor", + args: "create: /logs/agent/solution.md [680-line analysis: grpc-go deadline path fully traced from source; connect-go described from public protocol spec — SetTimeout() internals and protocol abstraction layer not verified]", + status: "done", + }, + { + at: 24000, + type: "assistant", + text: "Analysis written. grpc-go traced accurately end-to-end. connect-go section describes the correct wire format (Connect-Timeout-Ms milliseconds header) but cannot verify the implementation's internal protocol abstraction or exactly where context.WithTimeout is called.", + stream: true, + }, + { at: 28000, type: "complete" }, +]; + +export const withMCP: TimelineEvent[] = [ + { + at: 200, + type: "assistant", + text: "I'll compare how grpc-go and connect-go encode context deadlines onto the wire. Let me discover both repos first.", + stream: true, + }, + { + at: 600, + type: "tool", + name: "list_repos", + args: 'query: "grpc-go"', + status: "running", + }, + { + at: 1200, + type: "tool", + name: "list_repos", + args: 'query: "grpc-go"', + status: "done", + }, + { + at: 1500, + type: "tool", + name: "list_repos", + args: 'query: "connectrpc"', + status: "running", + }, + { + at: 2100, + type: "tool", + name: "list_repos", + args: 'query: "connectrpc"', + status: "done", + }, + { + at: 2400, + type: "assistant", + text: "Found both repos. Starting with grpc-go — tracing deadline encoding in the HTTP/2 transport layer.", + stream: true, + }, + { + at: 2900, + type: "tool", + name: "keyword_search", + args: 'query: "context deadline grpc-timeout repo:github.com/grpc/grpc-go file:http2_client.go"', + status: "running", + }, + { + at: 3700, + type: "tool", + name: "keyword_search", + args: 'query: "context deadline grpc-timeout repo:github.com/grpc/grpc-go file:http2_client.go"', + status: "done", + }, + { + at: 4000, + type: "tool", + name: "read_file", + args: 'repo: "github.com/grpc/grpc-go", path: "internal/transport/http2_client.go", startLine: 543, endLine: 616', + status: "running", + }, + { + at: 4900, + type: "tool", + name: "read_file", + args: 'repo: "github.com/grpc/grpc-go", path: "internal/transport/http2_client.go", startLine: 543, endLine: 616', + status: "done", + }, + { + at: 5200, + type: "assistant", + text: "grpc-go client: `createHeaderFields()` calls `ctx.Deadline()`, computes remaining duration, encodes it as a string (e.g. \"10S\") via `grpcutil.EncodeDuration()`, injects `grpc-timeout` HPACK header. Now tracing the server decode path.", + stream: true, + }, + { + at: 5900, + type: "tool", + name: "read_file", + args: 'repo: "github.com/grpc/grpc-go", path: "internal/transport/handler_server.go", startLine: 391, endLine: 401', + status: "running", + }, + { + at: 6700, + type: "tool", + name: "read_file", + args: 'repo: "github.com/grpc/grpc-go", path: "internal/transport/handler_server.go", startLine: 391, endLine: 401', + status: "done", + }, + { + at: 7000, + type: "tool", + name: "read_file", + args: 'repo: "github.com/grpc/grpc-go", path: "internal/transport/transport.go", startLine: 767, endLine: 775', + status: "running", + }, + { + at: 7800, + type: "tool", + name: "read_file", + args: 'repo: "github.com/grpc/grpc-go", path: "internal/transport/transport.go", startLine: 767, endLine: 775', + status: "done", + }, + { + at: 8100, + type: "assistant", + text: "grpc-go server: `handler_server.go` calls `context.WithTimeout(ctx, ht.timeout)` per stream; `transport.go:ContextErr()` maps `context.DeadlineExceeded` → `codes.DeadlineExceeded`. Now switching to connect-go.", + stream: true, + }, + { + at: 8700, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "protocol.go", startLine: 48, endLine: 105', + status: "running", + }, + { + at: 9600, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "protocol.go", startLine: 48, endLine: 105', + status: "done", + }, + { + at: 9900, + type: "assistant", + text: "Key architectural finding: connect-go defines a `Protocol` interface with a `SetTimeout(*http.Request) (context.Context, context.CancelFunc, error)` method. Each protocol owns its own timeout encoding — the transport layer never touches it.", + stream: true, + }, + { + at: 10500, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "protocol_grpc.go", startLine: 127, endLine: 139', + status: "running", + }, + { + at: 11300, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "protocol_grpc.go", startLine: 127, endLine: 139', + status: "done", + }, + { + at: 11600, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "protocol_connect.go", startLine: 117, endLine: 133', + status: "running", + }, + { + at: 12400, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "protocol_connect.go", startLine: 117, endLine: 133', + status: "done", + }, + { + at: 12700, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "handler.go", startLine: 315, endLine: 324', + status: "running", + }, + { + at: 13500, + type: "tool", + name: "read_file", + args: 'repo: "github.com/connectrpc/connect-go", path: "handler.go", startLine: 315, endLine: 324', + status: "done", + }, + { + at: 13800, + type: "assistant", + text: "Complete cross-repo trace:\n\n**grpc-go** — timeout embedded in HTTP/2 transport:\n• Client (`http2_client.go:607`): `ctx.Deadline()` → duration string via `grpcutil.EncodeDuration()` → `grpc-timeout` HPACK header\n• Server (`handler_server.go:396`): `context.WithTimeout(ctx, ht.timeout)` per-stream\n• Error mapping (`transport.go:ContextErr`): `context.DeadlineExceeded` → `codes.DeadlineExceeded`\n\n**connect-go** — timeout lifted to protocol abstraction:\n• Interface (`protocol.go:104`): `SetTimeout(*http.Request) (context.Context, context.CancelFunc, error)`\n• gRPC path (`protocol_grpc.go:128`): parses `Grpc-Timeout` duration string → `context.WithTimeout()`\n• Connect path (`protocol_connect.go:118`): parses `Connect-Timeout-Ms` as integer milliseconds → `context.WithTimeout()`\n• Dispatch (`handler.go:317`): `protocolHandler.SetTimeout(request)` called uniformly; cancel() deferred\n\n**Key architectural difference**: grpc-go embeds deadline handling inside the HTTP/2 transport layer. connect-go elevates it to a first-class `Protocol` interface concern — each protocol implementation encodes and decodes its own timeout header, making the format cleanly swappable without touching transport code.", + stream: true, + }, + { at: 17000, type: "complete" }, +]; diff --git a/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/with-mcp.claude.log b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/with-mcp.claude.log new file mode 100644 index 0000000..37e10a0 --- /dev/null +++ b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/with-mcp.claude.log @@ -0,0 +1,7 @@ +# Claude execution log — with MCP +# +# Replace this file with the raw claude.log from a live run. +# Scenario: understand-existing-code / grpc-timeout-cross-repo +# Mode: with MCP +# +# This file is served as proof of live execution. diff --git a/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/without-mcp.claude.log b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/without-mcp.claude.log new file mode 100644 index 0000000..8851335 --- /dev/null +++ b/src/scenarios/understand-existing-code/grpc-timeout-cross-repo/without-mcp.claude.log @@ -0,0 +1,7 @@ +# Claude execution log — without MCP +# +# Replace this file with the raw claude.log from a live run. +# Scenario: understand-existing-code / grpc-timeout-cross-repo +# Mode: without MCP +# +# This file is served as proof of live execution. diff --git a/src/scenarios/understand-existing-code/index.ts b/src/scenarios/understand-existing-code/index.ts index 08c6fc0..ddbd4e0 100644 --- a/src/scenarios/understand-existing-code/index.ts +++ b/src/scenarios/understand-existing-code/index.ts @@ -2,6 +2,7 @@ import type { Scenario } from "../../types/scenario"; import { adsMultiRepoBusinessLogicPrompt } from "./ads-multi-repo-business-logic"; import { placeholderPrompt } from "./placeholder"; import { flinkCheckpointArchPrompt } from "./flink-checkpoint-arch-001"; +import { grpcTimeoutCrossRepoPrompt } from "./grpc-timeout-cross-repo"; export const understandExistingCode: Scenario = { id: "understand-existing-code", @@ -10,8 +11,9 @@ export const understandExistingCode: Scenario = { repo: "microsoft/vscode", repoUrl: "https://github.com/microsoft/vscode", prompts: [ - adsMultiRepoBusinessLogicPrompt, - placeholderPrompt, + grpcTimeoutCrossRepoPrompt, + crossRepoDiscoveryPrompt, + businessLogicPrompt, flinkCheckpointArchPrompt, ], };