From 61a61e63620e7e3ece7e9787502d5ab913a072e2 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 00:00:15 +0530 Subject: [PATCH 01/10] fastapi-sqlalchemy-pg-catalog: minimal repro sample for keploy/integrations#193 FastAPI + SQLAlchemy 2.x + psycopg2 + Postgres 13 sample built to exercise the v3 dispatcher's simple-query ClassCatalog branch. The sample's init.sql pre-creates the `project` table so SQLAlchemy's Base.metadata.create_all skips CREATE TABLE at record time -- the shape the dispatcher bug requires. Used by the keploy/integrations Woodpecker lane sqlalchemy-pg-catalog-postgres to assert that recorded `type: query` mocks with `class: CATALOG` are consulted by the simple-query path, not just the extended-query path. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/README.md | 87 +++++++++++++++++++ fastapi-sqlalchemy-pg-catalog/app/Dockerfile | 12 +++ fastapi-sqlalchemy-pg-catalog/app/main.py | 70 +++++++++++++++ .../app/requirements.txt | 4 + .../docker-compose.yml | 34 ++++++++ fastapi-sqlalchemy-pg-catalog/flow.sh | 23 +++++ fastapi-sqlalchemy-pg-catalog/init.sql | 15 ++++ 7 files changed, 245 insertions(+) create mode 100644 fastapi-sqlalchemy-pg-catalog/README.md create mode 100644 fastapi-sqlalchemy-pg-catalog/app/Dockerfile create mode 100644 fastapi-sqlalchemy-pg-catalog/app/main.py create mode 100644 fastapi-sqlalchemy-pg-catalog/app/requirements.txt create mode 100644 fastapi-sqlalchemy-pg-catalog/docker-compose.yml create mode 100755 fastapi-sqlalchemy-pg-catalog/flow.sh create mode 100644 fastapi-sqlalchemy-pg-catalog/init.sql diff --git a/fastapi-sqlalchemy-pg-catalog/README.md b/fastapi-sqlalchemy-pg-catalog/README.md new file mode 100644 index 0000000..94eb6da --- /dev/null +++ b/fastapi-sqlalchemy-pg-catalog/README.md @@ -0,0 +1,87 @@ +# fastapi-sqlalchemy-pg-catalog + +Minimal FastAPI + SQLAlchemy 2.x + psycopg2 + Postgres 13 sample that +reproduces the Postgres v3 dispatcher's simple-query `ClassCatalog` +asymmetry (keploy/integrations#193). + +## What the bug looks like + +At app boot, SQLAlchemy's `Base.metadata.create_all(engine)` issues a +`pg_catalog.pg_class` probe per declared table to decide whether to +skip `CREATE TABLE`. With psycopg2 + parameter-less SQL the probe +goes through the **simple-query** protocol path. + +In `pkg/postgres/v3/replayer/dispatcher/dispatcher.go`: + +* The **extended-query** path (`runEngineForPortal`, `case + match.ClassCatalog`) consults the recorded transactional mock first + and only falls back to the synthetic `Engines.Catalog.Execute` on + miss. +* The **simple-query** path (`dispatchBySQLHash`, `case + match.ClassCatalog`) goes straight to the synthetic engine — even + though a recorded `type: query` mock with `class: CATALOG` and the + correct rows is sitting in `mocks.yaml`. + +With no `type: catalog` snapshot present, the synthetic engine +answers `rows: 0, cc: "SELECT 0"`. SQLAlchemy reads zero rows as +"table missing", issues `CREATE TABLE project ...`, and the +transactional engine misses (because the recording never captured a +CREATE TABLE — at record time the table already existed). The app +worker dies with `psycopg2.DatabaseError: keploy-pg-v3: no recorded +invocation matched`, every HTTP testcase that follows fails with +connection-reset. + +## Reproducing locally + +```bash +cd fastapi-sqlalchemy-pg-catalog +docker compose build + +# Baseline (no keploy) — should pass +docker compose up -d +bash flow.sh +docker compose down -v + +# Record +( bash flow.sh > flow-record.log 2>&1 ) & +sudo -E keploy record \ + -c "docker compose -f docker-compose.yml up" \ + --container-name pg-catalog-repro-app \ + --cmd-type docker-compose \ + --record-timer 60s + +# Replay (pre-fix: FAILS with "no recorded invocation matched" on CREATE TABLE) +sudo -E keploy test \ + -c "docker compose -f docker-compose.yml up" \ + --container-name pg-catalog-repro-app \ + --cmd-type docker-compose \ + --apiTimeout 120 --delay 15 --disableMockUpload +``` + +## Layout + +| File | Purpose | +|-----------------------------|---------------------------------------------------------------------------| +| `app/main.py` | FastAPI app with one declarative `Project` model + lifespan create_all | +| `app/Dockerfile` | Python 3.12-slim + requirements | +| `app/requirements.txt` | fastapi, uvicorn, sqlalchemy 2.0.36, psycopg2-binary 2.9.10 | +| `docker-compose.yml` | postgres:13.22-alpine + app, app published at host port 8123 | +| `init.sql` | Pre-creates the `project` table so record-time create_all is a no-op | +| `flow.sh` | Drives `GET /health` and `GET /projects` against the app | + +## Compose env knobs + +Set these to isolate concurrent runs (the CI lane drives a 3-cell +matrix on one Docker daemon and overrides each): + +| Env var | Default | Purpose | +|------------------|-------------------------|------------------------------------------| +| `APP_CONTAINER` | `pg-catalog-repro-app` | App container name (keploy `--container-name`) | +| `DB_CONTAINER` | `pg-catalog-repro-db` | Postgres container name | +| `APP_HOST_PORT` | `8123` | Host-side port mapped to app's 8000 | +| `COMPOSE_NET` | `reprnet` | Docker network name | + +## Used by + +* `keploy/integrations` Woodpecker lane + `.woodpecker/sqlalchemy-pg-catalog-postgres.yml` diff --git a/fastapi-sqlalchemy-pg-catalog/app/Dockerfile b/fastapi-sqlalchemy-pg-catalog/app/Dockerfile new file mode 100644 index 0000000..f76e7a9 --- /dev/null +++ b/fastapi-sqlalchemy-pg-catalog/app/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.12-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY main.py . + +EXPOSE 8000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "info"] diff --git a/fastapi-sqlalchemy-pg-catalog/app/main.py b/fastapi-sqlalchemy-pg-catalog/app/main.py new file mode 100644 index 0000000..84fdf81 --- /dev/null +++ b/fastapi-sqlalchemy-pg-catalog/app/main.py @@ -0,0 +1,70 @@ +""" +Minimal FastAPI + SQLAlchemy + psycopg2 app that exercises the Postgres +v3 dispatcher's simple-query ClassCatalog branch via SQLAlchemy's +``Base.metadata.create_all`` table-existence probe. + +Boot sequence: + 1. SQLAlchemy creates an engine over psycopg2 (simple-query for + parameter-less SQL). + 2. ``Base.metadata.create_all(engine)`` issues one + ``SELECT pg_catalog.pg_class.relname ...`` probe per declared table + to decide whether each ``CREATE TABLE`` should be skipped. + 3. FastAPI starts serving requests. + +The probe is what hits the dispatcher's ``case match.ClassCatalog`` +branch in ``pkg/postgres/v3/replayer/dispatcher/dispatcher.go`` +(simple-query path, ``dispatchBySQLHash``). +""" + +import logging +import os +import sys +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from sqlalchemy import Column, Integer, String, create_engine, select +from sqlalchemy.orm import Session, declarative_base + +logging.basicConfig( + level=logging.INFO, + stream=sys.stdout, + format="%(asctime)s %(levelname)s %(name)s %(message)s", +) +log = logging.getLogger("repro") + +DATABASE_URL = os.environ["DATABASE_URL"] + +Base = declarative_base() + + +class Project(Base): + __tablename__ = "project" + + id = Column(Integer, primary_key=True) + name = Column(String(100), nullable=False) + + +engine = create_engine(DATABASE_URL, echo=True, future=True) + + +@asynccontextmanager +async def lifespan(_: FastAPI): + log.info("startup: running Base.metadata.create_all (pg_class probe expected)") + Base.metadata.create_all(engine) + log.info("startup: create_all complete") + yield + + +app = FastAPI(lifespan=lifespan) + + +@app.get("/health") +def health(): + return {"ok": True} + + +@app.get("/projects") +def list_projects(): + with Session(engine) as s: + rows = s.execute(select(Project)).scalars().all() + return [{"id": r.id, "name": r.name} for r in rows] diff --git a/fastapi-sqlalchemy-pg-catalog/app/requirements.txt b/fastapi-sqlalchemy-pg-catalog/app/requirements.txt new file mode 100644 index 0000000..5877279 --- /dev/null +++ b/fastapi-sqlalchemy-pg-catalog/app/requirements.txt @@ -0,0 +1,4 @@ +fastapi==0.115.0 +uvicorn==0.30.6 +sqlalchemy==2.0.36 +psycopg2-binary==2.9.10 diff --git a/fastapi-sqlalchemy-pg-catalog/docker-compose.yml b/fastapi-sqlalchemy-pg-catalog/docker-compose.yml new file mode 100644 index 0000000..9274231 --- /dev/null +++ b/fastapi-sqlalchemy-pg-catalog/docker-compose.yml @@ -0,0 +1,34 @@ +services: + postgres: + image: postgres:13.22-alpine + container_name: ${DB_CONTAINER:-pg-catalog-repro-db} + environment: + POSTGRES_PASSWORD: postgres + POSTGRES_DB: testdb + volumes: + - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro + networks: + - reprnet + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d testdb"] + interval: 2s + timeout: 2s + retries: 30 + + app: + build: ./app + container_name: ${APP_CONTAINER:-pg-catalog-repro-app} + environment: + DATABASE_URL: postgresql+psycopg2://postgres:postgres@postgres:5432/testdb + depends_on: + postgres: + condition: service_healthy + ports: + - "${APP_HOST_PORT:-8123}:8000" + networks: + - reprnet + +networks: + reprnet: + name: ${COMPOSE_NET:-reprnet} + driver: bridge diff --git a/fastapi-sqlalchemy-pg-catalog/flow.sh b/fastapi-sqlalchemy-pg-catalog/flow.sh new file mode 100755 index 0000000..ffc8517 --- /dev/null +++ b/fastapi-sqlalchemy-pg-catalog/flow.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Drives traffic during keploy record. Hits both endpoints. +set -uo pipefail + +APP_HOST_PORT="${APP_HOST_PORT:-8123}" +APP_URL="${APP_URL:-http://localhost:${APP_HOST_PORT}}" + +echo "[flow] waiting for app at $APP_URL ..." +for i in $(seq 1 60); do + if curl -sf "$APP_URL/health" > /dev/null 2>&1; then + echo "[flow] app ready after ${i}s" + break + fi + sleep 1 +done + +echo "[flow] GET /health" +curl -sS "$APP_URL/health" && echo + +echo "[flow] GET /projects" +curl -sS "$APP_URL/projects" && echo + +echo "[flow] done" diff --git a/fastapi-sqlalchemy-pg-catalog/init.sql b/fastapi-sqlalchemy-pg-catalog/init.sql new file mode 100644 index 0000000..75b5bc1 --- /dev/null +++ b/fastapi-sqlalchemy-pg-catalog/init.sql @@ -0,0 +1,15 @@ +-- Pre-create the `project` table so SQLAlchemy's create_all() sees it +-- exists at record time and skips CREATE TABLE. This is what the bug +-- (keploy/integrations#193) requires: at record time the pg_class +-- probe answers "table exists", so CREATE TABLE is never sent and +-- never recorded. At replay time, if the simple-query dispatcher path +-- skips the recorded mock, the synthetic catalog engine returns zero +-- rows, SQLAlchemy concludes "table missing", and issues an +-- unrecorded CREATE TABLE -- which then misses the transactional +-- engine, raises a DatabaseError, and kills app boot. +CREATE TABLE IF NOT EXISTS project ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL +); + +INSERT INTO project (name) VALUES ('seed') ON CONFLICT DO NOTHING; From 4becdde258afba0e3a31c2bdf9b2c845e34ab979 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 00:22:24 +0530 Subject: [PATCH 02/10] fastapi-sqlalchemy-pg-catalog: tighten flow.sh + init.sql per Copilot review flow.sh: - set -Eeuo pipefail so curl/other failures actually fail the script. - Track readiness explicitly; exit 1 with a clear message if the app never reaches /health within READY_TIMEOUT_S (default 60s) instead of silently falling through and proceeding against a dead app. - curl -fsS for the readiness probe and the endpoint calls so HTTP failures propagate as exit codes (the previous `curl -sS ... && echo` shape silenced non-2xx responses). init.sql: - Replace `INSERT ... ON CONFLICT DO NOTHING` with `INSERT ... SELECT WHERE NOT EXISTS`. The model has no UNIQUE constraint on name, so ON CONFLICT had nothing to fire on; this form is genuinely idempotent on re-runs against an existing volume. Refs Copilot review on keploy/samples-python#102. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/flow.sh | 22 ++++++++++++++++------ fastapi-sqlalchemy-pg-catalog/init.sql | 8 +++++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/fastapi-sqlalchemy-pg-catalog/flow.sh b/fastapi-sqlalchemy-pg-catalog/flow.sh index ffc8517..f0b5e73 100755 --- a/fastapi-sqlalchemy-pg-catalog/flow.sh +++ b/fastapi-sqlalchemy-pg-catalog/flow.sh @@ -1,23 +1,33 @@ #!/usr/bin/env bash # Drives traffic during keploy record. Hits both endpoints. -set -uo pipefail +set -Eeuo pipefail APP_HOST_PORT="${APP_HOST_PORT:-8123}" APP_URL="${APP_URL:-http://localhost:${APP_HOST_PORT}}" +READY_TIMEOUT_S="${READY_TIMEOUT_S:-60}" -echo "[flow] waiting for app at $APP_URL ..." -for i in $(seq 1 60); do - if curl -sf "$APP_URL/health" > /dev/null 2>&1; then +echo "[flow] waiting for app at $APP_URL (ceiling ${READY_TIMEOUT_S}s) ..." +ready=0 +for i in $(seq 1 "$READY_TIMEOUT_S"); do + if curl -fsS --max-time 1 "$APP_URL/health" > /dev/null 2>&1; then echo "[flow] app ready after ${i}s" + ready=1 break fi sleep 1 done +if [ "$ready" -ne 1 ]; then + echo "[flow] ERROR: app never became ready at $APP_URL/health within ${READY_TIMEOUT_S}s" >&2 + exit 1 +fi + echo "[flow] GET /health" -curl -sS "$APP_URL/health" && echo +curl -fsS "$APP_URL/health" +echo echo "[flow] GET /projects" -curl -sS "$APP_URL/projects" && echo +curl -fsS "$APP_URL/projects" +echo echo "[flow] done" diff --git a/fastapi-sqlalchemy-pg-catalog/init.sql b/fastapi-sqlalchemy-pg-catalog/init.sql index 75b5bc1..7d0dacf 100644 --- a/fastapi-sqlalchemy-pg-catalog/init.sql +++ b/fastapi-sqlalchemy-pg-catalog/init.sql @@ -12,4 +12,10 @@ CREATE TABLE IF NOT EXISTS project ( name VARCHAR(100) NOT NULL ); -INSERT INTO project (name) VALUES ('seed') ON CONFLICT DO NOTHING; +-- Idempotent seed. `ON CONFLICT DO NOTHING` would only help with a +-- UNIQUE/EXCLUSION constraint on name, which the SQLAlchemy model +-- doesn't declare; use NOT EXISTS so re-running this script against +-- an existing volume doesn't duplicate the row. +INSERT INTO project (name) +SELECT 'seed' +WHERE NOT EXISTS (SELECT 1 FROM project WHERE name = 'seed'); From ec3174ef3e85028e2fd9bace8d503084ad2454e4 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 11:02:46 +0530 Subject: [PATCH 03/10] fastapi-sqlalchemy-pg-catalog: parameterize README + SQL_ECHO env knob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses two Copilot review nits on keploy/samples-python#102: * README.md: the local-repro snippet hardcoded `--container-name pg-catalog-repro-app`, but the compose file uses `${APP_CONTAINER:-pg-catalog-repro-app}` so a user who overrides APP_CONTAINER (e.g. to isolate concurrent runs) would have keploy point at a non-existent container. Both keploy invocations now thread `${APP_CONTAINER:-pg-catalog-repro-app}` so they pick up the same env override compose sees. Also switched the deprecated camelCase `--apiTimeout`/`--disableMockUpload` to the kebab forms the v3 CLI actually registers (`--api-timeout`; mock-upload flag dropped — not registered on v3-dev `test`). * main.py: SQL echo was unconditional. It is INTENTIONALLY on by default for this sample (the whole point is to surface SQLAlchemy's pg_class probe + create_all behaviour in the app log so a reader can correlate it with the keploy agent log), but the noise is real for unrelated investigations. Gated behind SQL_ECHO env var with a comment explaining why it's on by default. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/README.md | 10 +++++++--- fastapi-sqlalchemy-pg-catalog/app/main.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/fastapi-sqlalchemy-pg-catalog/README.md b/fastapi-sqlalchemy-pg-catalog/README.md index 94eb6da..3185a15 100644 --- a/fastapi-sqlalchemy-pg-catalog/README.md +++ b/fastapi-sqlalchemy-pg-catalog/README.md @@ -43,19 +43,23 @@ bash flow.sh docker compose down -v # Record +# Both keploy invocations below pass `--container-name "${APP_CONTAINER:-pg-catalog-repro-app}"` +# so they track whatever the compose file is rendering for the app +# service. If you've overridden APP_CONTAINER (e.g. to isolate +# concurrent runs), the same export reaches both keploy and compose. ( bash flow.sh > flow-record.log 2>&1 ) & sudo -E keploy record \ -c "docker compose -f docker-compose.yml up" \ - --container-name pg-catalog-repro-app \ + --container-name "${APP_CONTAINER:-pg-catalog-repro-app}" \ --cmd-type docker-compose \ --record-timer 60s # Replay (pre-fix: FAILS with "no recorded invocation matched" on CREATE TABLE) sudo -E keploy test \ -c "docker compose -f docker-compose.yml up" \ - --container-name pg-catalog-repro-app \ + --container-name "${APP_CONTAINER:-pg-catalog-repro-app}" \ --cmd-type docker-compose \ - --apiTimeout 120 --delay 15 --disableMockUpload + --api-timeout 120 --delay 15 ``` ## Layout diff --git a/fastapi-sqlalchemy-pg-catalog/app/main.py b/fastapi-sqlalchemy-pg-catalog/app/main.py index 84fdf81..67fc158 100644 --- a/fastapi-sqlalchemy-pg-catalog/app/main.py +++ b/fastapi-sqlalchemy-pg-catalog/app/main.py @@ -33,6 +33,15 @@ log = logging.getLogger("repro") DATABASE_URL = os.environ["DATABASE_URL"] +# SQL echo is INTENTIONALLY on by default — this is a sample for +# demonstrating the dispatcher's simple-Query catalog path, and seeing +# the actual SQLAlchemy queries (pg_catalog.version, pg_class probe, +# CREATE TABLE on miss) in the app log is the load-bearing observation +# that lets a reader correlate the keploy agent log with what the app +# is doing. The trade-off: SQLAlchemy logs every statement at INFO, +# which is verbose in normal operation. Override SQL_ECHO=0 to quiet +# it down for unrelated investigations. +SQL_ECHO = os.environ.get("SQL_ECHO", "1") != "0" Base = declarative_base() @@ -44,7 +53,7 @@ class Project(Base): name = Column(String(100), nullable=False) -engine = create_engine(DATABASE_URL, echo=True, future=True) +engine = create_engine(DATABASE_URL, echo=SQL_ECHO, future=True) @asynccontextmanager From 3203a0a31e396ae03f6d868260665aaec4a4c63c Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 11:27:03 +0530 Subject: [PATCH 04/10] fastapi-sqlalchemy-pg-catalog/app: clearer DATABASE_URL error + async-safe create_all Two nits from the Copilot review: * DATABASE_URL: `os.environ["DATABASE_URL"]` raised a bare KeyError at import time, which surfaces as a noisy traceback in container logs with no hint about what's missing. Switched to os.getenv with an explicit RuntimeError that names the env var and gives a sample URL format. * lifespan: `Base.metadata.create_all(engine)` is synchronous psycopg2 I/O running inside an async lifespan, blocking uvicorn's event loop until pg_class probe + any CREATE TABLE round-trips complete. Switched to `await asyncio.to_thread(...)` so the loop stays responsive. For this minimal repro the difference is small, but the pattern is the right FastAPI shape for any startup that touches a sync DB driver. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/app/main.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fastapi-sqlalchemy-pg-catalog/app/main.py b/fastapi-sqlalchemy-pg-catalog/app/main.py index 67fc158..866a7ef 100644 --- a/fastapi-sqlalchemy-pg-catalog/app/main.py +++ b/fastapi-sqlalchemy-pg-catalog/app/main.py @@ -16,6 +16,7 @@ (simple-query path, ``dispatchBySQLHash``). """ +import asyncio import logging import os import sys @@ -32,7 +33,12 @@ ) log = logging.getLogger("repro") -DATABASE_URL = os.environ["DATABASE_URL"] +DATABASE_URL = os.getenv("DATABASE_URL") +if not DATABASE_URL: + raise RuntimeError( + "DATABASE_URL is required (e.g. postgresql+psycopg2://user:pass@host:5432/db). " + "Set it in docker-compose env or in the host shell before launching uvicorn." + ) # SQL echo is INTENTIONALLY on by default — this is a sample for # demonstrating the dispatcher's simple-Query catalog path, and seeing # the actual SQLAlchemy queries (pg_catalog.version, pg_class probe, @@ -59,7 +65,13 @@ class Project(Base): @asynccontextmanager async def lifespan(_: FastAPI): log.info("startup: running Base.metadata.create_all (pg_class probe expected)") - Base.metadata.create_all(engine) + # create_all does synchronous psycopg2 I/O. Offload to a thread so + # uvicorn's event loop stays responsive (otherwise any other + # async work scheduled on startup would block until the pg_class + # probe + any CREATE TABLE round-trips complete). For this minimal + # repro the difference is small, but the pattern is the right + # FastAPI shape for any startup that touches a sync DB driver. + await asyncio.to_thread(Base.metadata.create_all, engine) log.info("startup: create_all complete") yield From 4316715e3be02b46f32293a062257f1d1cccc21c Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 11:59:43 +0530 Subject: [PATCH 05/10] fastapi-sqlalchemy-pg-catalog: clarify simple-Query semantics on parameterized SQL Copilot review noted (correctly) that the README + main.py docstring described the pg_class probe as "parameter-less SQL", which conflicts with the recorded mock carrying 7 bind values. psycopg2 in fact uses the simple-Query protocol even when the source SQL is parameterized: it substitutes %(param)s placeholders client-side and emits the resulting inlined SQL as a single Q packet (no Bind/Execute frames). The bind values exist at the application layer; the wire shape is simple-Query. Both docs now say so explicitly so a reader doesn't see the recorded bind-value list and conclude the wire path must be extended-Query. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/README.md | 10 ++++++++-- fastapi-sqlalchemy-pg-catalog/app/main.py | 12 +++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fastapi-sqlalchemy-pg-catalog/README.md b/fastapi-sqlalchemy-pg-catalog/README.md index 3185a15..29e334c 100644 --- a/fastapi-sqlalchemy-pg-catalog/README.md +++ b/fastapi-sqlalchemy-pg-catalog/README.md @@ -8,8 +8,14 @@ asymmetry (keploy/integrations#193). At app boot, SQLAlchemy's `Base.metadata.create_all(engine)` issues a `pg_catalog.pg_class` probe per declared table to decide whether to -skip `CREATE TABLE`. With psycopg2 + parameter-less SQL the probe -goes through the **simple-query** protocol path. +skip `CREATE TABLE`. psycopg2 sends the probe over the +**simple-query** protocol (`Q` packet) even though the source SQL is +parameterized — it substitutes the `%(param)s` placeholders +client-side and emits the resulting inlined SQL as a single +statement, with no `Bind`/`Execute` frames. So the wire shape is +simple-Query carrying inlined bind values; the recorded mock keeps +the parameter list for matching, but the dispatcher's classifier +sees a simple-Query CATALOG request. In `pkg/postgres/v3/replayer/dispatcher/dispatcher.go`: diff --git a/fastapi-sqlalchemy-pg-catalog/app/main.py b/fastapi-sqlalchemy-pg-catalog/app/main.py index 866a7ef..e40f2e2 100644 --- a/fastapi-sqlalchemy-pg-catalog/app/main.py +++ b/fastapi-sqlalchemy-pg-catalog/app/main.py @@ -4,11 +4,17 @@ ``Base.metadata.create_all`` table-existence probe. Boot sequence: - 1. SQLAlchemy creates an engine over psycopg2 (simple-query for - parameter-less SQL). + 1. SQLAlchemy creates an engine over psycopg2. psycopg2 sends queries + via the simple-Query protocol (``Q`` packet) even when the source + SQL is parameterized: it does client-side ``%(param)s`` substitution + and emits the resulting string as a single inlined statement + (no ``Bind``/``Execute`` frames). 2. ``Base.metadata.create_all(engine)`` issues one ``SELECT pg_catalog.pg_class.relname ...`` probe per declared table - to decide whether each ``CREATE TABLE`` should be skipped. + to decide whether each ``CREATE TABLE`` should be skipped. The + probe SQL has 7 parameters (table name, relkind chars, namespace); + psycopg2 inlines them before the wire write, so the dispatcher sees + a simple-Query statement that classifies as ``ClassCatalog``. 3. FastAPI starts serving requests. The probe is what hits the dispatcher's ``case match.ClassCatalog`` From 226009f9cbba2e3a561f3a0096d4d0bd19f5aa85 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 12:35:33 +0530 Subject: [PATCH 06/10] fastapi-sqlalchemy-pg-catalog/app: drop redundant future=True on create_engine SQLAlchemy 2.x defaults to the future-2.0 behaviour; passing `future=True` is redundant and can trip a deprecation warning in some 2.x point releases. Dropped to keep the sample free of incidental warning noise that would distract from the dispatcher-bug repro. Refs Copilot review on keploy/samples-python#102. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/app/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fastapi-sqlalchemy-pg-catalog/app/main.py b/fastapi-sqlalchemy-pg-catalog/app/main.py index e40f2e2..b34e024 100644 --- a/fastapi-sqlalchemy-pg-catalog/app/main.py +++ b/fastapi-sqlalchemy-pg-catalog/app/main.py @@ -65,7 +65,10 @@ class Project(Base): name = Column(String(100), nullable=False) -engine = create_engine(DATABASE_URL, echo=SQL_ECHO, future=True) +# SQLAlchemy 2.x defaults to the future-2.0 behaviour, so no +# `future=True` is needed (and passing it can trip a deprecation +# warning depending on the installed minor version). +engine = create_engine(DATABASE_URL, echo=SQL_ECHO) @asynccontextmanager From 6e97dd3bb0404d3b2277dab2e71c67266521b065 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 12:41:30 +0530 Subject: [PATCH 07/10] fastapi-sqlalchemy-pg-catalog/app: dispose engine pool on lifespan shutdown Add try/finally around the lifespan yield so engine.dispose() runs at shutdown. Releases pooled psycopg2 connections cleanly across repeated start/stop cycles (local repro loops, CI lane reruns), which otherwise leak half-open postgres connections. Refs Copilot review on keploy/samples-python#102. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/app/main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fastapi-sqlalchemy-pg-catalog/app/main.py b/fastapi-sqlalchemy-pg-catalog/app/main.py index b34e024..597a7bc 100644 --- a/fastapi-sqlalchemy-pg-catalog/app/main.py +++ b/fastapi-sqlalchemy-pg-catalog/app/main.py @@ -82,7 +82,14 @@ async def lifespan(_: FastAPI): # FastAPI shape for any startup that touches a sync DB driver. await asyncio.to_thread(Base.metadata.create_all, engine) log.info("startup: create_all complete") - yield + try: + yield + finally: + # Release pooled connections on shutdown so repeated + # start/stop cycles (local repro loops, CI lanes) don't leak + # half-open connections to postgres. + engine.dispose() + log.info("shutdown: engine pool disposed") app = FastAPI(lifespan=lifespan) From bd9cfc047c2e37db476e74b4166766e840e39af1 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 12:48:49 +0530 Subject: [PATCH 08/10] fastapi-sqlalchemy-pg-catalog/app: wrap create_all in the lifespan try/finally Copilot noted that the previous try/finally only covered the yield, so a failure in create_all (the *exact* failure mode this repro demonstrates: pre-fix keploy makes create_all raise psycopg2.DatabaseError) wouldn't reach the finally and the engine pool would leak. Moved the startup logging + create_all call inside the try block; finally now runs regardless of where the failure occurs. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/app/main.py | 25 +++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fastapi-sqlalchemy-pg-catalog/app/main.py b/fastapi-sqlalchemy-pg-catalog/app/main.py index 597a7bc..40d1b0e 100644 --- a/fastapi-sqlalchemy-pg-catalog/app/main.py +++ b/fastapi-sqlalchemy-pg-catalog/app/main.py @@ -73,16 +73,23 @@ class Project(Base): @asynccontextmanager async def lifespan(_: FastAPI): - log.info("startup: running Base.metadata.create_all (pg_class probe expected)") - # create_all does synchronous psycopg2 I/O. Offload to a thread so - # uvicorn's event loop stays responsive (otherwise any other - # async work scheduled on startup would block until the pg_class - # probe + any CREATE TABLE round-trips complete). For this minimal - # repro the difference is small, but the pattern is the right - # FastAPI shape for any startup that touches a sync DB driver. - await asyncio.to_thread(Base.metadata.create_all, engine) - log.info("startup: create_all complete") + # Wrap the startup work AND the yield in try/finally so + # engine.dispose() runs even when create_all raises — which is + # the exact failure mode this repro is built around (pre-fix + # keploy makes create_all issue an unrecorded CREATE TABLE that + # raises psycopg2.DatabaseError mid-startup; without the wrap, + # the connection pool would leak on every replay attempt). try: + log.info("startup: running Base.metadata.create_all (pg_class probe expected)") + # create_all does synchronous psycopg2 I/O. Offload to a thread + # so uvicorn's event loop stays responsive (otherwise any other + # async work scheduled on startup would block until the pg_class + # probe + any CREATE TABLE round-trips complete). For this + # minimal repro the difference is small, but the pattern is the + # right FastAPI shape for any startup that touches a sync DB + # driver. + await asyncio.to_thread(Base.metadata.create_all, engine) + log.info("startup: create_all complete") yield finally: # Release pooled connections on shutdown so repeated From 7ae4d92cf1d2fcc1dfd098f466941b22c3ab2efb Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 12:56:08 +0530 Subject: [PATCH 09/10] fastapi-sqlalchemy-pg-catalog/init.sql: clarify when NOT EXISTS actually matters Copilot noted /docker-entrypoint-initdb.d scripts only run on first database initialization, so the previous comment's framing ('re-run against an existing volume') was misleading. Reworded to call out the actual scenario: this is a single-shot insert on a clean volume, and NOT EXISTS is defensive coverage for stale-volume reuse. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/init.sql | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fastapi-sqlalchemy-pg-catalog/init.sql b/fastapi-sqlalchemy-pg-catalog/init.sql index 7d0dacf..4349e82 100644 --- a/fastapi-sqlalchemy-pg-catalog/init.sql +++ b/fastapi-sqlalchemy-pg-catalog/init.sql @@ -12,10 +12,17 @@ CREATE TABLE IF NOT EXISTS project ( name VARCHAR(100) NOT NULL ); --- Idempotent seed. `ON CONFLICT DO NOTHING` would only help with a --- UNIQUE/EXCLUSION constraint on name, which the SQLAlchemy model --- doesn't declare; use NOT EXISTS so re-running this script against --- an existing volume doesn't duplicate the row. +-- Seed the table. +-- +-- Postgres only runs scripts under /docker-entrypoint-initdb.d on +-- *first* database initialization (empty data dir), so on a clean +-- container this is a single-shot insert and `ON CONFLICT` / +-- `NOT EXISTS` wouldn't normally matter. The `NOT EXISTS` guard is +-- defensive belt-and-suspenders for the degenerate case where the +-- compose stack reuses a stale Postgres data volume that already +-- carries the seed row — it keeps the script idempotent without +-- requiring a UNIQUE constraint on project.name (which the +-- SQLAlchemy model doesn't declare). INSERT INTO project (name) SELECT 'seed' WHERE NOT EXISTS (SELECT 1 FROM project WHERE name = 'seed'); From 3ab1ea330ec9341ec20c7fcc0daef3fc0413d9b0 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Tue, 12 May 2026 13:01:01 +0530 Subject: [PATCH 10/10] =?UTF-8?q?fastapi-sqlalchemy-pg-catalog/init.sql:?= =?UTF-8?q?=20drop=20NOT=20EXISTS=20=E2=80=94=20script=20only=20runs=20on?= =?UTF-8?q?=20first=20init?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot's repeated note: /docker-entrypoint-initdb.d scripts run only on first database init. On a stale data volume the script doesn't run at all, so the NOT EXISTS guard couldn't help anyway. Simplified to a plain INSERT and updated the comment to tell readers to recreate the volume (docker compose down -v) if they want a deterministic repro. Signed-off-by: Akash Kumar --- fastapi-sqlalchemy-pg-catalog/init.sql | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/fastapi-sqlalchemy-pg-catalog/init.sql b/fastapi-sqlalchemy-pg-catalog/init.sql index 4349e82..21bee4f 100644 --- a/fastapi-sqlalchemy-pg-catalog/init.sql +++ b/fastapi-sqlalchemy-pg-catalog/init.sql @@ -12,17 +12,9 @@ CREATE TABLE IF NOT EXISTS project ( name VARCHAR(100) NOT NULL ); --- Seed the table. --- --- Postgres only runs scripts under /docker-entrypoint-initdb.d on --- *first* database initialization (empty data dir), so on a clean --- container this is a single-shot insert and `ON CONFLICT` / --- `NOT EXISTS` wouldn't normally matter. The `NOT EXISTS` guard is --- defensive belt-and-suspenders for the degenerate case where the --- compose stack reuses a stale Postgres data volume that already --- carries the seed row — it keeps the script idempotent without --- requiring a UNIQUE constraint on project.name (which the --- SQLAlchemy model doesn't declare). -INSERT INTO project (name) -SELECT 'seed' -WHERE NOT EXISTS (SELECT 1 FROM project WHERE name = 'seed'); +-- Seed the table. The Postgres entrypoint runs scripts under +-- /docker-entrypoint-initdb.d only on first init (empty data dir), +-- so this is single-shot on a clean container. If you reuse a stale +-- data volume, this script doesn't run at all — re-create the +-- volume (`docker compose down -v`) for a deterministic repro. +INSERT INTO project (name) VALUES ('seed');