From 5c5ffe34f1c4fbb94e36c89f6999e39fca92e647 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 00:51:45 +0000
Subject: [PATCH 1/9] docs: add tracemetrics dataset guidance and validate
 aggregate format

Add comprehensive documentation for the tracemetrics dataset:
- Agent guidance with query format, parameter table, and examples
- Common dashboard widget mistakes section
- AGENTS.md lore entry for tracemetrics gotchas

Add validation in validateAggregateNames() that rejects span-style
aggregates (e.g., count(), p50(span.duration)) when --dataset
tracemetrics is used, with a helpful error message showing the
correct comma-separated format: aggregation(value,name,type,unit).

Key lessons encoded:
- tracemetrics uses aggregation(value,metric_name,metric_type,unit)
  NOT the MRI format d:custom/name@unit used by the metrics dataset
- The unit must match what the SDK emits (none if unspecified)
- --group-by always requires --limit
- --sort must reference a field present in --query
- Span attributes cannot be used as aggregate fields
---
 AGENTS.md                                     |  2 +
 docs/src/content/docs/agent-guidance.md       | 61 +++++++++++++++++++
 plugins/sentry-cli/skills/sentry-cli/SKILL.md | 61 +++++++++++++++++++
 src/types/dashboard.ts                        | 40 ++++++++++++
 4 files changed, 164 insertions(+)

diff --git a/AGENTS.md b/AGENTS.md
index 6eb0c3855..da70c7553 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -952,6 +952,8 @@ mock.module("./some-module", () => ({
 <!-- lore:019d3e8a-a4bb-7271-98cf-4cf418f2f581 -->
 * **CLI telemetry command tags use sentry. prefix with dots not bare names**: The \`buildCommand\` wrapper sets the \`command\` telemetry tag using the full Stricli command prefix joined with dots: \`sentry.issue.explain\`, \`sentry.issue.list\`, \`sentry.api\`, etc. — NOT bare names like \`issue.explain\`. When querying Sentry Discover or building dashboard widgets, always use the \`sentry.\` prefix. Verify actual tag values with a Discover query (\`field:command, count()\`, grouped by \`command\`) before assuming the format.
 
+* **Dashboard tracemetrics dataset uses comma-separated aggregate format, not MRI**: SDK v10+ custom metrics (`Sentry.metrics.distribution()`, `.gauge()`, `.count()`) emit `trace_metric` envelope items. Dashboard widgets for these MUST use `--dataset tracemetrics` with aggregate format `aggregation(value,metric_name,metric_type,unit)` — e.g., `p50(value,completion.duration_ms,distribution,none)`. Using `--dataset metrics` with MRI format (`d:custom/name@unit`) produces "Internal Error" or "could not be resolved". The `unit` parameter must match the SDK emission exactly: `none` if no unit specified, `byte` for memory metrics, `second` for uptime. `tracemetrics` only supports `line`, `area`, `bar`, `big_number`, `categorical_bar` display types — no `table` or `stacked_area`. Widgets with `--group-by` always require `--limit`. Sort expressions must reference aggregates present in `--query`.
+
 <!-- lore:019d0846-17bd-7ff3-a6d7-09b59b69a8fe -->
 * **Use toMatchObject not toEqual when testing resolution results with optional fields**: When \`resolveProjectBySlug()\` or \`resolveOrgProjectTarget()\` adds optional fields (like \`projectData\`) to the return type, tests using \`expect(result).toEqual({ org, project })\` fail because \`toEqual\` requires exact match. Use \`toMatchObject({ org, project })\` instead — it checks the specified subset without failing on extra properties. This affects tests across \`event/view\`, \`log/view\`, \`trace/view\`, and \`trace/list\` test files.
 
diff --git a/docs/src/content/docs/agent-guidance.md b/docs/src/content/docs/agent-guidance.md
index 6d84b169a..393b317cd 100644
--- a/docs/src/content/docs/agent-guidance.md
+++ b/docs/src/content/docs/agent-guidance.md
@@ -130,6 +130,58 @@ Use **common** types for general dashboards. Use **specialized** only when speci
 
 Available datasets: `spans` (default, covers most use cases), `discover`, `issue`, `error-events`, `transaction-like`, `metrics`, `logs`, `tracemetrics`, `preprod-app-size`.
 
+### Choosing the right dataset
+
+- **`spans`** — use for span-based queries: `span.duration`, `span.op`, `transaction`, span attributes, `cache.hit`, etc. This is the default and covers most use cases.
+- **`tracemetrics`** — use for custom metrics emitted via `Sentry.metrics.distribution()`, `Sentry.metrics.gauge()`, `Sentry.metrics.count()`, or SDK integrations like `nodeRuntimeMetricsIntegration`. The query format is different from spans (see below).
+- **`metrics`** — the legacy MRI-based metrics dataset (`d:custom/name@unit`). Deprecated in favor of `tracemetrics` for SDK v10+ trace-bound metrics. Avoid unless you know the org has standalone custom metrics enabled.
+
+### tracemetrics query format
+
+Custom metrics emitted via the Sentry SDK (e.g., `Sentry.metrics.distribution("completion.duration_ms", value)`) use the `tracemetrics` dataset with a **comma-separated aggregate format**, not the MRI format used by the `metrics` dataset.
+
+**Format:** `aggregation(value,metric_name,metric_type,unit)`
+
+| Parameter | Description | Values |
+|---|---|---|
+| `aggregation` | Aggregate function | `avg`, `sum`, `count`, `p50`, `p75`, `p90`, `p95`, `p99`, `min`, `max` |
+| `value` | Literal string | Always `value` |
+| `metric_name` | The metric name as emitted by the SDK | e.g., `completion.duration_ms`, `node.runtime.cpu.utilization` |
+| `metric_type` | The Sentry metric type | `distribution`, `gauge`, `counter`, `set` |
+| `unit` | The unit passed to the SDK (or `none` if omitted) | `none`, `byte`, `second`, `millisecond`, `ratio`, etc. |
+
+**CLI shorthand:** `--query aggregation:value,metric_name,metric_type,unit`
+
+**Examples:**
+
+```bash
+# Distribution metric (no unit specified in SDK → "none")
+sentry dashboard widget add <dashboard> "Completion Latency" \
+  --display line --dataset tracemetrics \
+  --query "p50(value,completion.duration_ms,distribution,none)" \
+  --query "p90(value,completion.duration_ms,distribution,none)"
+
+# Gauge metric with byte unit (from nodeRuntimeMetricsIntegration)
+sentry dashboard widget add <dashboard> "Memory Usage" \
+  --display line --dataset tracemetrics \
+  --query "avg(value,node.runtime.mem.rss,gauge,byte)" \
+  --query "avg(value,node.runtime.mem.heap_used,gauge,byte)"
+
+# Gauge metric with no unit
+sentry dashboard widget add <dashboard> "CPU Utilization" \
+  --display line --dataset tracemetrics \
+  --query "avg(value,node.runtime.cpu.utilization,gauge,none)"
+```
+
+**How to determine the correct parameters:**
+1. **metric_name**: The first argument to `Sentry.metrics.distribution()`, `.gauge()`, or `.count()`.
+2. **metric_type**: `distribution` for `.distribution()`, `gauge` for `.gauge()`, `counter` for `.count()`.
+3. **unit**: The `unit` option passed to the SDK call. If no `unit` option is specified, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory metrics, `second` for CPU/uptime, and no unit (`none`) for utilization ratios.
+
+**Important:** Do NOT use MRI format (`d:custom/name@unit`) with the `tracemetrics` dataset — that format is for the legacy `metrics` dataset only. The `tracemetrics` dataset uses the comma-separated format above.
+
+**Display type restrictions:** `tracemetrics` supports `area`, `bar`, `big_number`, `categorical_bar`, `line` only. No `table`, `top_n`, or `stacked_area`.
+
 Run `sentry dashboard widget --help` for the full list including aggregate functions.
 
 **Row-filling examples:**
@@ -195,3 +247,12 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 - **Not using `--web`**: View commands support `-w`/`--web` to open the resource in the browser — useful for sharing links.
 - **Fetching API schemas instead of using the CLI**: Prefer `sentry schema` to browse the API and `sentry api` to make requests — the CLI handles authentication and endpoint resolution, so there's rarely a need to download OpenAPI specs separately.
 - **Using `sentry api` when CLI commands suffice**: `sentry issue list --json` already includes `shortId`, `title`, `priority`, `level`, `status`, `permalink`, and other fields at the top level. Some fields like `count`, `userCount`, `firstSeen`, and `lastSeen` may be null depending on the issue. Use `--fields` to select specific fields and `--help` to see all available fields. Only fall back to `sentry api` for data the CLI doesn't expose.
+
+### Dashboard Widget Mistakes
+
+- **Using `metrics` dataset for SDK v10+ custom metrics**: SDK v10+ emits trace-bound metrics via `trace_metric` envelope items. Use `--dataset tracemetrics` with the comma-separated format `aggregation(value,metric_name,metric_type,unit)`, NOT `--dataset metrics` with MRI format `d:custom/name@unit`.
+- **Wrong MRI unit in metrics queries**: If you do use the `metrics` dataset, the `@unit` suffix must exactly match what the SDK emits. If no `unit` option is passed to `Sentry.metrics.distribution()`, the unit is `none`. Check the SDK source — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, and no unit for utilization.
+- **Missing `--limit` with `--group-by`**: Widgets that use `--group-by` MUST include `--limit`. The Sentry API rejects grouped widgets without a limit. Always include `--limit 5` (or another value) when using `--group-by`.
+- **`--sort` referencing a field not in `--query`**: The `--sort` field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400. Either add `count` to `--query` or sort by a queried field.
+- **Span attributes are not queryable as metrics**: You cannot use `avg:dsn.files_collected` on span attributes via the `events-stats` endpoint. Span attributes are key-value metadata on spans — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and a few built-in measurements support aggregation.
+- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate. The API silently accepts the sort but the dashboard shows errors.
diff --git a/plugins/sentry-cli/skills/sentry-cli/SKILL.md b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
index 7cb991827..d775f804c 100644
--- a/plugins/sentry-cli/skills/sentry-cli/SKILL.md
+++ b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
@@ -140,6 +140,58 @@ Use **common** types for general dashboards. Use **specialized** only when speci
 
 Available datasets: `spans` (default, covers most use cases), `discover`, `issue`, `error-events`, `transaction-like`, `metrics`, `logs`, `tracemetrics`, `preprod-app-size`.
 
+#### Choosing the right dataset
+
+- **`spans`** — use for span-based queries: `span.duration`, `span.op`, `transaction`, span attributes, `cache.hit`, etc. This is the default and covers most use cases.
+- **`tracemetrics`** — use for custom metrics emitted via `Sentry.metrics.distribution()`, `Sentry.metrics.gauge()`, `Sentry.metrics.count()`, or SDK integrations like `nodeRuntimeMetricsIntegration`. The query format is different from spans (see below).
+- **`metrics`** — the legacy MRI-based metrics dataset (`d:custom/name@unit`). Deprecated in favor of `tracemetrics` for SDK v10+ trace-bound metrics. Avoid unless you know the org has standalone custom metrics enabled.
+
+#### tracemetrics query format
+
+Custom metrics emitted via the Sentry SDK (e.g., `Sentry.metrics.distribution("completion.duration_ms", value)`) use the `tracemetrics` dataset with a **comma-separated aggregate format**, not the MRI format used by the `metrics` dataset.
+
+**Format:** `aggregation(value,metric_name,metric_type,unit)`
+
+| Parameter | Description | Values |
+|---|---|---|
+| `aggregation` | Aggregate function | `avg`, `sum`, `count`, `p50`, `p75`, `p90`, `p95`, `p99`, `min`, `max` |
+| `value` | Literal string | Always `value` |
+| `metric_name` | The metric name as emitted by the SDK | e.g., `completion.duration_ms`, `node.runtime.cpu.utilization` |
+| `metric_type` | The Sentry metric type | `distribution`, `gauge`, `counter`, `set` |
+| `unit` | The unit passed to the SDK (or `none` if omitted) | `none`, `byte`, `second`, `millisecond`, `ratio`, etc. |
+
+**CLI shorthand:** `--query aggregation:value,metric_name,metric_type,unit`
+
+**Examples:**
+
+```bash
+# Distribution metric (no unit specified in SDK → "none")
+sentry dashboard widget add <dashboard> "Completion Latency" \
+  --display line --dataset tracemetrics \
+  --query "p50(value,completion.duration_ms,distribution,none)" \
+  --query "p90(value,completion.duration_ms,distribution,none)"
+
+# Gauge metric with byte unit (from nodeRuntimeMetricsIntegration)
+sentry dashboard widget add <dashboard> "Memory Usage" \
+  --display line --dataset tracemetrics \
+  --query "avg(value,node.runtime.mem.rss,gauge,byte)" \
+  --query "avg(value,node.runtime.mem.heap_used,gauge,byte)"
+
+# Gauge metric with no unit
+sentry dashboard widget add <dashboard> "CPU Utilization" \
+  --display line --dataset tracemetrics \
+  --query "avg(value,node.runtime.cpu.utilization,gauge,none)"
+```
+
+**How to determine the correct parameters:**
+1. **metric_name**: The first argument to `Sentry.metrics.distribution()`, `.gauge()`, or `.count()`.
+2. **metric_type**: `distribution` for `.distribution()`, `gauge` for `.gauge()`, `counter` for `.count()`.
+3. **unit**: The `unit` option passed to the SDK call. If no `unit` option is specified, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory metrics, `second` for CPU/uptime, and no unit (`none`) for utilization ratios.
+
+**Important:** Do NOT use MRI format (`d:custom/name@unit`) with the `tracemetrics` dataset — that format is for the legacy `metrics` dataset only. The `tracemetrics` dataset uses the comma-separated format above.
+
+**Display type restrictions:** `tracemetrics` supports `area`, `bar`, `big_number`, `categorical_bar`, `line` only. No `table`, `top_n`, or `stacked_area`.
+
 Run `sentry dashboard widget --help` for the full list including aggregate functions.
 
 **Row-filling examples:**
@@ -206,6 +258,15 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 - **Fetching API schemas instead of using the CLI**: Prefer `sentry schema` to browse the API and `sentry api` to make requests — the CLI handles authentication and endpoint resolution, so there's rarely a need to download OpenAPI specs separately.
 - **Using `sentry api` when CLI commands suffice**: `sentry issue list --json` already includes `shortId`, `title`, `priority`, `level`, `status`, `permalink`, and other fields at the top level. Some fields like `count`, `userCount`, `firstSeen`, and `lastSeen` may be null depending on the issue. Use `--fields` to select specific fields and `--help` to see all available fields. Only fall back to `sentry api` for data the CLI doesn't expose.
 
+#### Dashboard Widget Mistakes
+
+- **Using `metrics` dataset for SDK v10+ custom metrics**: SDK v10+ emits trace-bound metrics via `trace_metric` envelope items. Use `--dataset tracemetrics` with the comma-separated format `aggregation(value,metric_name,metric_type,unit)`, NOT `--dataset metrics` with MRI format `d:custom/name@unit`.
+- **Wrong MRI unit in metrics queries**: If you do use the `metrics` dataset, the `@unit` suffix must exactly match what the SDK emits. If no `unit` option is passed to `Sentry.metrics.distribution()`, the unit is `none`. Check the SDK source — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, and no unit for utilization.
+- **Missing `--limit` with `--group-by`**: Widgets that use `--group-by` MUST include `--limit`. The Sentry API rejects grouped widgets without a limit. Always include `--limit 5` (or another value) when using `--group-by`.
+- **`--sort` referencing a field not in `--query`**: The `--sort` field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400. Either add `count` to `--query` or sort by a queried field.
+- **Span attributes are not queryable as metrics**: You cannot use `avg:dsn.files_collected` on span attributes via the `events-stats` endpoint. Span attributes are key-value metadata on spans — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and a few built-in measurements support aggregation.
+- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate. The API silently accepts the sort but the dashboard shows errors.
+
 ## Prerequisites
 
 The CLI must be installed and authenticated before use.
diff --git a/src/types/dashboard.ts b/src/types/dashboard.ts
index 48eda1daa..051cf0a39 100644
--- a/src/types/dashboard.ts
+++ b/src/types/dashboard.ts
@@ -478,10 +478,29 @@ function extractFunctionName(aggregate: string): string {
   return parenIdx > 0 ? aggregate.slice(0, parenIdx) : aggregate;
 }
 
+/**
+ * Check whether a parsed aggregate uses the tracemetrics comma-separated format.
+ * Format: `aggregation(value,metric_name,metric_type,unit)`
+ * Example: `p50(value,completion.duration_ms,distribution,none)`
+ */
+function isTracemetricsAggregate(aggregate: string): boolean {
+  const parenIdx = aggregate.indexOf("(");
+  if (parenIdx < 0) {
+    return false;
+  }
+  const inner = aggregate.slice(parenIdx + 1, -1);
+  return inner.startsWith("value,") && inner.split(",").length === 4;
+}
+
 /**
  * Validate that all aggregate function names in a list are known.
  * Throws a ValidationError listing valid functions if any are invalid.
  *
+ * For the `tracemetrics` dataset, aggregates must use the comma-separated
+ * format: `aggregation(value,metric_name,metric_type,unit)`. Standard
+ * span-style aggregates like `count()` or `p50(span.duration)` are
+ * invalid for tracemetrics.
+ *
  * @param aggregates - Parsed aggregate strings (e.g. ["count()", "p95(span.duration)"])
  * @param dataset - Widget dataset, determines which function list to validate against
  */
@@ -489,6 +508,27 @@ export function validateAggregateNames(
   aggregates: string[],
   dataset?: string
 ): void {
+  // tracemetrics uses a different aggregate format — validate structure, not function names
+  if (dataset === "tracemetrics") {
+    for (const agg of aggregates) {
+      if (!isTracemetricsAggregate(agg)) {
+        throw new ValidationError(
+          `Invalid tracemetrics aggregate "${agg}".\n\n` +
+            "tracemetrics queries must use the format: aggregation(value,metric_name,metric_type,unit)\n" +
+            "Example: p50(value,completion.duration_ms,distribution,none)\n\n" +
+            "Parameters:\n" +
+            "  - aggregation: avg, sum, count, p50, p75, p90, p95, p99, min, max\n" +
+            `  - value: literal string "value"\n` +
+            "  - metric_name: the name passed to Sentry.metrics.distribution/gauge/count\n" +
+            "  - metric_type: distribution, gauge, counter, set\n" +
+            "  - unit: none, byte, second, millisecond, etc. (must match SDK emission)",
+          "query"
+        );
+      }
+    }
+    return;
+  }
+
   const validFunctions: readonly string[] =
     dataset === "discover" || dataset === "error-events"
       ? DISCOVER_AGGREGATE_FUNCTIONS

From b5ad3ad1a80c44d3b148dffc58f0c2842da79524 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 08:52:18 +0000
Subject: [PATCH 2/9] refactor(dashboard): remove deprecated metrics dataset,
 move dataset guidance to widget help

Remove the legacy 'metrics' dataset (MRI-based, d:custom/name@unit)
from WIDGET_TYPES, DATASET_SUPPORTED_DISPLAY_TYPES, widget help text,
agent guidance, and tests. The 'tracemetrics' dataset with its
comma-separated format is the correct choice for SDK v10+ custom
metrics.

Move dataset descriptions and tracemetrics query format docs into
the widget command's fullDescription so both humans and agents see
it via 'sentry dashboard widget --help'.
---
 AGENTS.md                                     |  2 +-
 docs/src/content/docs/agent-guidance.md       | 68 ++-----------------
 plugins/sentry-cli/skills/sentry-cli/SKILL.md | 68 ++-----------------
 src/commands/dashboard/widget/index.ts        | 23 ++++++-
 src/types/dashboard.ts                        | 13 +---
 test/types/dashboard.test.ts                  |  2 -
 6 files changed, 37 insertions(+), 139 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index da70c7553..df3159b83 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -952,7 +952,7 @@ mock.module("./some-module", () => ({
 <!-- lore:019d3e8a-a4bb-7271-98cf-4cf418f2f581 -->
 * **CLI telemetry command tags use sentry. prefix with dots not bare names**: The \`buildCommand\` wrapper sets the \`command\` telemetry tag using the full Stricli command prefix joined with dots: \`sentry.issue.explain\`, \`sentry.issue.list\`, \`sentry.api\`, etc. — NOT bare names like \`issue.explain\`. When querying Sentry Discover or building dashboard widgets, always use the \`sentry.\` prefix. Verify actual tag values with a Discover query (\`field:command, count()\`, grouped by \`command\`) before assuming the format.
 
-* **Dashboard tracemetrics dataset uses comma-separated aggregate format, not MRI**: SDK v10+ custom metrics (`Sentry.metrics.distribution()`, `.gauge()`, `.count()`) emit `trace_metric` envelope items. Dashboard widgets for these MUST use `--dataset tracemetrics` with aggregate format `aggregation(value,metric_name,metric_type,unit)` — e.g., `p50(value,completion.duration_ms,distribution,none)`. Using `--dataset metrics` with MRI format (`d:custom/name@unit`) produces "Internal Error" or "could not be resolved". The `unit` parameter must match the SDK emission exactly: `none` if no unit specified, `byte` for memory metrics, `second` for uptime. `tracemetrics` only supports `line`, `area`, `bar`, `big_number`, `categorical_bar` display types — no `table` or `stacked_area`. Widgets with `--group-by` always require `--limit`. Sort expressions must reference aggregates present in `--query`.
+* **Dashboard tracemetrics dataset uses comma-separated aggregate format**: SDK v10+ custom metrics (`Sentry.metrics.distribution()`, `.gauge()`, `.count()`) emit `trace_metric` envelope items. Dashboard widgets for these MUST use `--dataset tracemetrics` with aggregate format `aggregation(value,metric_name,metric_type,unit)` — e.g., `p50(value,completion.duration_ms,distribution,none)`. The `unit` parameter must match the SDK emission exactly: `none` if no unit specified, `byte` for memory metrics, `second` for uptime. `tracemetrics` only supports `line`, `area`, `bar`, `big_number`, `categorical_bar` display types — no `table` or `stacked_area`. Widgets with `--group-by` always require `--limit`. Sort expressions must reference aggregates present in `--query`.
 
 <!-- lore:019d0846-17bd-7ff3-a6d7-09b59b69a8fe -->
 * **Use toMatchObject not toEqual when testing resolution results with optional fields**: When \`resolveProjectBySlug()\` or \`resolveOrgProjectTarget()\` adds optional fields (like \`projectData\`) to the return type, tests using \`expect(result).toEqual({ org, project })\` fail because \`toEqual\` requires exact match. Use \`toMatchObject({ org, project })\` instead — it checks the specified subset without failing on extra properties. This affects tests across \`event/view\`, \`log/view\`, \`trace/view\`, and \`trace/list\` test files.
diff --git a/docs/src/content/docs/agent-guidance.md b/docs/src/content/docs/agent-guidance.md
index 393b317cd..2cb9ff937 100644
--- a/docs/src/content/docs/agent-guidance.md
+++ b/docs/src/content/docs/agent-guidance.md
@@ -128,61 +128,7 @@ Display types with default sizes:
 
 Use **common** types for general dashboards. Use **specialized** only when specifically requested. Avoid **internal** types unless the user explicitly asks.
 
-Available datasets: `spans` (default, covers most use cases), `discover`, `issue`, `error-events`, `transaction-like`, `metrics`, `logs`, `tracemetrics`, `preprod-app-size`.
-
-### Choosing the right dataset
-
-- **`spans`** — use for span-based queries: `span.duration`, `span.op`, `transaction`, span attributes, `cache.hit`, etc. This is the default and covers most use cases.
-- **`tracemetrics`** — use for custom metrics emitted via `Sentry.metrics.distribution()`, `Sentry.metrics.gauge()`, `Sentry.metrics.count()`, or SDK integrations like `nodeRuntimeMetricsIntegration`. The query format is different from spans (see below).
-- **`metrics`** — the legacy MRI-based metrics dataset (`d:custom/name@unit`). Deprecated in favor of `tracemetrics` for SDK v10+ trace-bound metrics. Avoid unless you know the org has standalone custom metrics enabled.
-
-### tracemetrics query format
-
-Custom metrics emitted via the Sentry SDK (e.g., `Sentry.metrics.distribution("completion.duration_ms", value)`) use the `tracemetrics` dataset with a **comma-separated aggregate format**, not the MRI format used by the `metrics` dataset.
-
-**Format:** `aggregation(value,metric_name,metric_type,unit)`
-
-| Parameter | Description | Values |
-|---|---|---|
-| `aggregation` | Aggregate function | `avg`, `sum`, `count`, `p50`, `p75`, `p90`, `p95`, `p99`, `min`, `max` |
-| `value` | Literal string | Always `value` |
-| `metric_name` | The metric name as emitted by the SDK | e.g., `completion.duration_ms`, `node.runtime.cpu.utilization` |
-| `metric_type` | The Sentry metric type | `distribution`, `gauge`, `counter`, `set` |
-| `unit` | The unit passed to the SDK (or `none` if omitted) | `none`, `byte`, `second`, `millisecond`, `ratio`, etc. |
-
-**CLI shorthand:** `--query aggregation:value,metric_name,metric_type,unit`
-
-**Examples:**
-
-```bash
-# Distribution metric (no unit specified in SDK → "none")
-sentry dashboard widget add <dashboard> "Completion Latency" \
-  --display line --dataset tracemetrics \
-  --query "p50(value,completion.duration_ms,distribution,none)" \
-  --query "p90(value,completion.duration_ms,distribution,none)"
-
-# Gauge metric with byte unit (from nodeRuntimeMetricsIntegration)
-sentry dashboard widget add <dashboard> "Memory Usage" \
-  --display line --dataset tracemetrics \
-  --query "avg(value,node.runtime.mem.rss,gauge,byte)" \
-  --query "avg(value,node.runtime.mem.heap_used,gauge,byte)"
-
-# Gauge metric with no unit
-sentry dashboard widget add <dashboard> "CPU Utilization" \
-  --display line --dataset tracemetrics \
-  --query "avg(value,node.runtime.cpu.utilization,gauge,none)"
-```
-
-**How to determine the correct parameters:**
-1. **metric_name**: The first argument to `Sentry.metrics.distribution()`, `.gauge()`, or `.count()`.
-2. **metric_type**: `distribution` for `.distribution()`, `gauge` for `.gauge()`, `counter` for `.count()`.
-3. **unit**: The `unit` option passed to the SDK call. If no `unit` option is specified, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory metrics, `second` for CPU/uptime, and no unit (`none`) for utilization ratios.
-
-**Important:** Do NOT use MRI format (`d:custom/name@unit`) with the `tracemetrics` dataset — that format is for the legacy `metrics` dataset only. The `tracemetrics` dataset uses the comma-separated format above.
-
-**Display type restrictions:** `tracemetrics` supports `area`, `bar`, `big_number`, `categorical_bar`, `line` only. No `table`, `top_n`, or `stacked_area`.
-
-Run `sentry dashboard widget --help` for the full list including aggregate functions.
+Available datasets: `spans` (default), `tracemetrics`, `discover`, `issue`, `error-events`, `logs`. Run `sentry dashboard widget --help` for dataset descriptions, query formats, and examples.
 
 **Row-filling examples:**
 
@@ -250,9 +196,9 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 
 ### Dashboard Widget Mistakes
 
-- **Using `metrics` dataset for SDK v10+ custom metrics**: SDK v10+ emits trace-bound metrics via `trace_metric` envelope items. Use `--dataset tracemetrics` with the comma-separated format `aggregation(value,metric_name,metric_type,unit)`, NOT `--dataset metrics` with MRI format `d:custom/name@unit`.
-- **Wrong MRI unit in metrics queries**: If you do use the `metrics` dataset, the `@unit` suffix must exactly match what the SDK emits. If no `unit` option is passed to `Sentry.metrics.distribution()`, the unit is `none`. Check the SDK source — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, and no unit for utilization.
-- **Missing `--limit` with `--group-by`**: Widgets that use `--group-by` MUST include `--limit`. The Sentry API rejects grouped widgets without a limit. Always include `--limit 5` (or another value) when using `--group-by`.
-- **`--sort` referencing a field not in `--query`**: The `--sort` field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400. Either add `count` to `--query` or sort by a queried field.
-- **Span attributes are not queryable as metrics**: You cannot use `avg:dsn.files_collected` on span attributes via the `events-stats` endpoint. Span attributes are key-value metadata on spans — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and a few built-in measurements support aggregation.
-- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate. The API silently accepts the sort but the dashboard shows errors.
+- **Wrong dataset for custom metrics**: Use `--dataset tracemetrics` for custom metrics (`Sentry.metrics.distribution/gauge/count`). The query format is `aggregation(value,metric_name,metric_type,unit)` — see `sentry dashboard widget --help` for details.
+- **Wrong unit in tracemetrics queries**: The `unit` parameter must match the SDK emission. If no `unit` option is passed to `Sentry.metrics.*()`, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, `none` for utilization ratios.
+- **Missing `--limit` with `--group-by`**: The Sentry API rejects grouped widgets without a limit. Always include `--limit` when using `--group-by`.
+- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400.
+- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and built-in measurements support aggregation.
+- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate.
diff --git a/plugins/sentry-cli/skills/sentry-cli/SKILL.md b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
index d775f804c..50e67d651 100644
--- a/plugins/sentry-cli/skills/sentry-cli/SKILL.md
+++ b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
@@ -138,61 +138,7 @@ Display types with default sizes:
 
 Use **common** types for general dashboards. Use **specialized** only when specifically requested. Avoid **internal** types unless the user explicitly asks.
 
-Available datasets: `spans` (default, covers most use cases), `discover`, `issue`, `error-events`, `transaction-like`, `metrics`, `logs`, `tracemetrics`, `preprod-app-size`.
-
-#### Choosing the right dataset
-
-- **`spans`** — use for span-based queries: `span.duration`, `span.op`, `transaction`, span attributes, `cache.hit`, etc. This is the default and covers most use cases.
-- **`tracemetrics`** — use for custom metrics emitted via `Sentry.metrics.distribution()`, `Sentry.metrics.gauge()`, `Sentry.metrics.count()`, or SDK integrations like `nodeRuntimeMetricsIntegration`. The query format is different from spans (see below).
-- **`metrics`** — the legacy MRI-based metrics dataset (`d:custom/name@unit`). Deprecated in favor of `tracemetrics` for SDK v10+ trace-bound metrics. Avoid unless you know the org has standalone custom metrics enabled.
-
-#### tracemetrics query format
-
-Custom metrics emitted via the Sentry SDK (e.g., `Sentry.metrics.distribution("completion.duration_ms", value)`) use the `tracemetrics` dataset with a **comma-separated aggregate format**, not the MRI format used by the `metrics` dataset.
-
-**Format:** `aggregation(value,metric_name,metric_type,unit)`
-
-| Parameter | Description | Values |
-|---|---|---|
-| `aggregation` | Aggregate function | `avg`, `sum`, `count`, `p50`, `p75`, `p90`, `p95`, `p99`, `min`, `max` |
-| `value` | Literal string | Always `value` |
-| `metric_name` | The metric name as emitted by the SDK | e.g., `completion.duration_ms`, `node.runtime.cpu.utilization` |
-| `metric_type` | The Sentry metric type | `distribution`, `gauge`, `counter`, `set` |
-| `unit` | The unit passed to the SDK (or `none` if omitted) | `none`, `byte`, `second`, `millisecond`, `ratio`, etc. |
-
-**CLI shorthand:** `--query aggregation:value,metric_name,metric_type,unit`
-
-**Examples:**
-
-```bash
-# Distribution metric (no unit specified in SDK → "none")
-sentry dashboard widget add <dashboard> "Completion Latency" \
-  --display line --dataset tracemetrics \
-  --query "p50(value,completion.duration_ms,distribution,none)" \
-  --query "p90(value,completion.duration_ms,distribution,none)"
-
-# Gauge metric with byte unit (from nodeRuntimeMetricsIntegration)
-sentry dashboard widget add <dashboard> "Memory Usage" \
-  --display line --dataset tracemetrics \
-  --query "avg(value,node.runtime.mem.rss,gauge,byte)" \
-  --query "avg(value,node.runtime.mem.heap_used,gauge,byte)"
-
-# Gauge metric with no unit
-sentry dashboard widget add <dashboard> "CPU Utilization" \
-  --display line --dataset tracemetrics \
-  --query "avg(value,node.runtime.cpu.utilization,gauge,none)"
-```
-
-**How to determine the correct parameters:**
-1. **metric_name**: The first argument to `Sentry.metrics.distribution()`, `.gauge()`, or `.count()`.
-2. **metric_type**: `distribution` for `.distribution()`, `gauge` for `.gauge()`, `counter` for `.count()`.
-3. **unit**: The `unit` option passed to the SDK call. If no `unit` option is specified, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory metrics, `second` for CPU/uptime, and no unit (`none`) for utilization ratios.
-
-**Important:** Do NOT use MRI format (`d:custom/name@unit`) with the `tracemetrics` dataset — that format is for the legacy `metrics` dataset only. The `tracemetrics` dataset uses the comma-separated format above.
-
-**Display type restrictions:** `tracemetrics` supports `area`, `bar`, `big_number`, `categorical_bar`, `line` only. No `table`, `top_n`, or `stacked_area`.
-
-Run `sentry dashboard widget --help` for the full list including aggregate functions.
+Available datasets: `spans` (default), `tracemetrics`, `discover`, `issue`, `error-events`, `logs`. Run `sentry dashboard widget --help` for dataset descriptions, query formats, and examples.
 
 **Row-filling examples:**
 
@@ -260,12 +206,12 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 
 #### Dashboard Widget Mistakes
 
-- **Using `metrics` dataset for SDK v10+ custom metrics**: SDK v10+ emits trace-bound metrics via `trace_metric` envelope items. Use `--dataset tracemetrics` with the comma-separated format `aggregation(value,metric_name,metric_type,unit)`, NOT `--dataset metrics` with MRI format `d:custom/name@unit`.
-- **Wrong MRI unit in metrics queries**: If you do use the `metrics` dataset, the `@unit` suffix must exactly match what the SDK emits. If no `unit` option is passed to `Sentry.metrics.distribution()`, the unit is `none`. Check the SDK source — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, and no unit for utilization.
-- **Missing `--limit` with `--group-by`**: Widgets that use `--group-by` MUST include `--limit`. The Sentry API rejects grouped widgets without a limit. Always include `--limit 5` (or another value) when using `--group-by`.
-- **`--sort` referencing a field not in `--query`**: The `--sort` field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400. Either add `count` to `--query` or sort by a queried field.
-- **Span attributes are not queryable as metrics**: You cannot use `avg:dsn.files_collected` on span attributes via the `events-stats` endpoint. Span attributes are key-value metadata on spans — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and a few built-in measurements support aggregation.
-- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate. The API silently accepts the sort but the dashboard shows errors.
+- **Wrong dataset for custom metrics**: Use `--dataset tracemetrics` for custom metrics (`Sentry.metrics.distribution/gauge/count`). The query format is `aggregation(value,metric_name,metric_type,unit)` — see `sentry dashboard widget --help` for details.
+- **Wrong unit in tracemetrics queries**: The `unit` parameter must match the SDK emission. If no `unit` option is passed to `Sentry.metrics.*()`, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, `none` for utilization ratios.
+- **Missing `--limit` with `--group-by`**: The Sentry API rejects grouped widgets without a limit. Always include `--limit` when using `--group-by`.
+- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400.
+- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and built-in measurements support aggregation.
+- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate.
 
 ## Prerequisites
 
diff --git a/src/commands/dashboard/widget/index.ts b/src/commands/dashboard/widget/index.ts
index facdc39b4..f5efb6263 100644
--- a/src/commands/dashboard/widget/index.ts
+++ b/src/commands/dashboard/widget/index.ts
@@ -19,8 +19,17 @@ export const widgetRoute = buildRouteMap({
       "  specialized: stacked_area (3×2), top_n (3×2), categorical_bar (3×2), text (3×2)\n" +
       "  internal:    details (3×2), wheel (3×2), rage_and_dead_clicks (3×2),\n" +
       "               server_tree (3×2), agents_traces_table (3×2)\n\n" +
-      "Datasets: spans (default), discover, issue, error-events, transaction-like,\n" +
-      "          metrics, logs, tracemetrics, preprod-app-size\n\n" +
+      "Datasets:\n" +
+      "  spans (default)    Span-based queries: span.duration, span.op, transaction,\n" +
+      "                     span attributes, cache.hit, etc. Covers most use cases.\n" +
+      "  tracemetrics       Custom metrics from Sentry.metrics.distribution/gauge/count.\n" +
+      "                     Query format: aggregation(value,metric_name,metric_type,unit)\n" +
+      "                     Example: p50(value,completion.duration_ms,distribution,none)\n" +
+      "                     Supported displays: line, area, bar, big_number, categorical_bar\n" +
+      "  discover           Legacy discover queries (adds failure_rate, apdex, etc.)\n" +
+      "  issue              Issue-based queries\n" +
+      "  error-events       Error event queries\n" +
+      "  logs               Log queries\n\n" +
       "Aggregates (spans): count, count_unique, sum, avg, percentile, p50, p75,\n" +
       "  p90, p95, p99, p100, eps, epm, any, min, max\n" +
       "Aggregates (discover adds): failure_count, failure_rate, apdex,\n" +
@@ -28,6 +37,11 @@ export const widgetRoute = buildRouteMap({
       "  last_seen, latest_event, var, stddev, cov, corr, performance_score,\n" +
       "  opportunity_score, count_scores\n" +
       "Aliases: spm → epm, sps → eps, tpm → epm, tps → eps\n\n" +
+      "tracemetrics query format:\n" +
+      "  aggregation(value,metric_name,metric_type,unit)\n" +
+      "    - metric_name: name passed to Sentry.metrics.distribution/gauge/count\n" +
+      "    - metric_type: distribution, gauge, counter, set\n" +
+      "    - unit: none (if unspecified), byte, second, millisecond, ratio, etc.\n\n" +
       "Row-filling examples:\n" +
       "  # 3 KPIs (2+2+2 = 6)\n" +
       '  sentry dashboard widget add <d> "Error Count" --display big_number --query count\n' +
@@ -40,6 +54,11 @@ export const widgetRoute = buildRouteMap({
       '  sentry dashboard widget add <d> "Top Endpoints" --display table \\\n' +
       "    --query count --query p95:span.duration --group-by transaction \\\n" +
       "    --sort -count --limit 10\n\n" +
+      "  # Custom metrics (tracemetrics dataset)\n" +
+      '  sentry dashboard widget add <d> "Latency" --display line \\\n' +
+      "    --dataset tracemetrics \\\n" +
+      '    --query "p50(value,completion.duration_ms,distribution,none)" \\\n' +
+      '    --query "p90(value,completion.duration_ms,distribution,none)"\n\n' +
       "Commands:\n" +
       "  add    Add a widget to a dashboard\n" +
       "  edit   Edit a widget in a dashboard\n" +
diff --git a/src/types/dashboard.ts b/src/types/dashboard.ts
index 051cf0a39..71707080d 100644
--- a/src/types/dashboard.ts
+++ b/src/types/dashboard.ts
@@ -26,7 +26,6 @@ import { logger } from "../lib/logger.js";
 export const WIDGET_TYPES = [
   "discover",
   "issue",
-  "metrics",
   "error-events",
   "transaction-like",
   "spans",
@@ -360,16 +359,6 @@ export const DATASET_SUPPORTED_DISPLAY_TYPES = {
     "table",
     "top_n",
   ],
-  metrics: [
-    "area",
-    "bar",
-    "big_number",
-    "categorical_bar",
-    "line",
-    "stacked_area",
-    "table",
-    "top_n",
-  ],
   logs: [
     "area",
     "bar",
@@ -1006,7 +995,7 @@ export type WidgetDataResult =
 /**
  * Maps widget types to API dataset parameter values.
  *
- * Widget types that don't map to a dataset (issue, metrics, etc.)
+ * Widget types that don't map to a dataset (issue, tracemetrics, etc.)
  * return null and are rendered as "unsupported".
  */
 const WIDGET_TYPE_TO_DATASET: Record<string, string> = {
diff --git a/test/types/dashboard.test.ts b/test/types/dashboard.test.ts
index ced8dc131..2297d15fb 100644
--- a/test/types/dashboard.test.ts
+++ b/test/types/dashboard.test.ts
@@ -53,7 +53,6 @@ describe("WIDGET_TYPES", () => {
     const expected: WidgetType[] = [
       "discover",
       "issue",
-      "metrics",
       "error-events",
       "transaction-like",
       "spans",
@@ -770,7 +769,6 @@ describe("mapWidgetTypeToDataset", () => {
 
   test("returns null for unsupported widget types", () => {
     expect(mapWidgetTypeToDataset("issue")).toBeNull();
-    expect(mapWidgetTypeToDataset("metrics")).toBeNull();
     expect(mapWidgetTypeToDataset("tracemetrics")).toBeNull();
     expect(mapWidgetTypeToDataset("preprod-app-size")).toBeNull();
   });

From 640233122c327f103ac574226ef0bdfe79a8fc17 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 09:02:24 +0000
Subject: [PATCH 3/9] feat(dashboard): validate --group-by requires --limit and
 --sort references --query

Add client-side validation that catches two common API errors early:

1. validateGroupByRequiresLimit: Widgets with --group-by must include
   --limit. The Sentry API rejects grouped widgets without a limit.
   Only fires for explicit --group-by, not auto-defaulted columns
   (e.g., issue dataset auto-defaults columns to ['issue']).

2. validateSortReferencesAggregate: The --sort expression must reference
   an aggregate present in --query. Prevents 400 errors from the API
   when sort references a field not in the widget's aggregates.

Both validations run in the add path (buildWidgetFromFlags) and edit
path (validateQueryConstraints), with the edit path only checking when
the user actively changes query/group-by/sort flags.
---
 src/commands/dashboard/resolve.ts          | 53 ++++++++++++++++
 src/commands/dashboard/widget/edit.ts      | 71 ++++++++++++++++------
 test/commands/dashboard/widget/add.test.ts |  1 +
 3 files changed, 107 insertions(+), 18 deletions(-)

diff --git a/src/commands/dashboard/resolve.ts b/src/commands/dashboard/resolve.ts
index 1ad13f229..59b593d2d 100644
--- a/src/commands/dashboard/resolve.ts
+++ b/src/commands/dashboard/resolve.ts
@@ -324,6 +324,50 @@ export function resolveWidgetIndex(
  * @param opts - Widget configuration from parsed flags
  * @returns Validated widget with computed query fields
  */
+/**
+ * Validate that a sort expression references an aggregate present in the query.
+ * The Sentry API returns 400 when the sort field isn't in the widget's aggregates.
+ *
+ * @param orderby - Parsed sort expression (e.g., "-count()", "p90(span.duration)")
+ * @param aggregates - Parsed aggregate expressions from the query
+ */
+export function validateSortReferencesAggregate(
+  orderby: string,
+  aggregates: string[]
+): void {
+  // Strip leading "-" for descending sorts
+  const sortAgg = orderby.startsWith("-") ? orderby.slice(1) : orderby;
+  if (!aggregates.includes(sortAgg)) {
+    throw new ValidationError(
+      `Sort expression "${orderby}" references "${sortAgg}" which is not in the query.\n\n` +
+        "The --sort field must be one of the aggregate expressions in --query.\n" +
+        `Current aggregates: ${aggregates.join(", ")}\n\n` +
+        `Either add "${sortAgg}" to --query or sort by an existing aggregate.`,
+      "sort"
+    );
+  }
+}
+
+/**
+ * Validate that grouped widgets (those with columns/group-by) include a limit.
+ * The Sentry API rejects grouped widgets without a limit.
+ *
+ * @param columns - Group-by columns
+ * @param limit - Widget limit (undefined if not set)
+ */
+export function validateGroupByRequiresLimit(
+  columns: string[],
+  limit: number | undefined
+): void {
+  if (columns.length > 0 && limit === undefined) {
+    throw new ValidationError(
+      "Widgets with --group-by require --limit. " +
+        "Add --limit <n> to specify the maximum number of groups to display.",
+      "limit"
+    );
+  }
+}
+
 export function buildWidgetFromFlags(opts: {
   title: string;
   display: string;
@@ -350,6 +394,15 @@ export function buildWidgetFromFlags(opts: {
     orderby = `-${aggregates[0]}`;
   }
 
+  // Only validate when user explicitly passes --group-by, not for auto-defaulted columns
+  // (e.g., issue dataset auto-defaults columns to ["issue"] for table display)
+  if (opts.groupBy) {
+    validateGroupByRequiresLimit(columns, opts.limit);
+  }
+  if (orderby) {
+    validateSortReferencesAggregate(orderby, aggregates);
+  }
+
   const raw = {
     title: opts.title,
     displayType: opts.display,
diff --git a/src/commands/dashboard/widget/edit.ts b/src/commands/dashboard/widget/edit.ts
index 342190791..1975fc987 100644
--- a/src/commands/dashboard/widget/edit.ts
+++ b/src/commands/dashboard/widget/edit.ts
@@ -32,6 +32,8 @@ import {
   resolveDashboardId,
   resolveOrgFromTarget,
   resolveWidgetIndex,
+  validateGroupByRequiresLimit,
+  validateSortReferencesAggregate,
   validateWidgetEnums,
   type WidgetQueryFlags,
 } from "../resolve.js";
@@ -101,15 +103,15 @@ function mergeLayout(
   };
 }
 
-/** Build the replacement widget object by merging flags over existing */
-function buildReplacement(
+/**
+ * Validate enum and aggregate constraints on the effective (merged) widget state.
+ * Extracted from buildReplacement to stay under Biome's complexity limit.
+ */
+function validateEnumsAndAggregates(
   flags: EditFlags,
-  existing: DashboardWidget
-): DashboardWidget {
-  const mergedQueries = mergeQueries(flags, existing.queries?.[0]);
-
-  // Validate aggregates when query or dataset changes — prevents broken widgets
-  // (e.g. switching --dataset from discover to spans with discover-only aggregates)
+  existing: DashboardWidget,
+  mergedQueries: DashboardWidgetQuery[] | undefined
+): void {
   const newDataset = flags.dataset ?? existing.widgetType;
   const aggregatesToValidate =
     mergedQueries?.[0]?.aggregates ?? existing.queries?.[0]?.aggregates;
@@ -117,22 +119,55 @@ function buildReplacement(
     validateAggregateNames(aggregatesToValidate, newDataset);
   }
 
-  const limit = flags.limit !== undefined ? flags.limit : existing.limit;
-
-  const effectiveDisplay = flags.display ?? existing.displayType;
-  const effectiveDataset = flags.dataset ?? existing.widgetType;
-
-  // Re-validate after merging with existing values. validateWidgetEnums only
-  // checks the cross-constraint when both args are provided, so it misses
-  // e.g. `--dataset preprod-app-size` on a widget that's already `table`.
-  // validateWidgetEnums itself skips untracked display types (text, wheel, etc.).
   if (flags.display || flags.dataset) {
+    const effectiveDisplay = flags.display ?? existing.displayType;
+    const effectiveDataset = flags.dataset ?? existing.widgetType;
     validateWidgetEnums(effectiveDisplay, effectiveDataset);
   }
+}
+
+/**
+ * Validate group-by+limit and sort constraints on the effective (merged) widget state.
+ * Only runs when the user changes query, group-by, or sort — not when preserving
+ * existing widget state which may predate these validations.
+ */
+function validateQueryConstraints(
+  flags: EditFlags,
+  existing: DashboardWidget,
+  mergedQueries: DashboardWidgetQuery[] | undefined,
+  limit: number | null | undefined
+): void {
+  if (flags["group-by"] || flags.query) {
+    const columns =
+      mergedQueries?.[0]?.columns ?? existing.queries?.[0]?.columns ?? [];
+    validateGroupByRequiresLimit(columns, limit ?? undefined);
+  }
+
+  if (flags.sort || flags.query) {
+    const orderby =
+      mergedQueries?.[0]?.orderby ?? existing.queries?.[0]?.orderby;
+    const aggregates =
+      mergedQueries?.[0]?.aggregates ?? existing.queries?.[0]?.aggregates ?? [];
+    if (orderby && aggregates.length > 0) {
+      validateSortReferencesAggregate(orderby, aggregates);
+    }
+  }
+}
+
+/** Build the replacement widget object by merging flags over existing */
+function buildReplacement(
+  flags: EditFlags,
+  existing: DashboardWidget
+): DashboardWidget {
+  const mergedQueries = mergeQueries(flags, existing.queries?.[0]);
+  const limit = flags.limit !== undefined ? flags.limit : existing.limit;
+
+  validateEnumsAndAggregates(flags, existing, mergedQueries);
+  validateQueryConstraints(flags, existing, mergedQueries, limit);
 
   const raw: Record<string, unknown> = {
     title: flags["new-title"] ?? existing.title,
-    displayType: effectiveDisplay,
+    displayType: flags.display ?? existing.displayType,
     queries: mergedQueries ?? existing.queries,
     layout: mergeLayout(flags, existing),
   };
diff --git a/test/commands/dashboard/widget/add.test.ts b/test/commands/dashboard/widget/add.test.ts
index d4f3027ce..9023835bf 100644
--- a/test/commands/dashboard/widget/add.test.ts
+++ b/test/commands/dashboard/widget/add.test.ts
@@ -286,6 +286,7 @@ describe("dashboard widget add", () => {
         display: "table",
         dataset: "issue",
         "group-by": ["project"],
+        limit: 5,
       },
       "123",
       "Issues by Project"

From 3fc74876ecf905739dafc294bbaf3bea2ea5e42e Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 09:10:17 +0000
Subject: [PATCH 4/9] feat(dashboard): warn on likely span attribute misuse in
 aggregate fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add warnUnknownAggregateFields() that logs a warning when an aggregate
field doesn't match known aggregatable span fields (span.duration,
span.self_time, http.*, cache.* measurements). Span attributes like
dsn.files_collected or resolve.method are key-value metadata that
cannot be aggregated — they should be used in --where or --group-by.

Uses a warning (not error) because measurements are project-specific
and the known-good list may not be exhaustive.
---
 docs/src/content/docs/agent-guidance.md |  6 +--
 src/commands/dashboard/resolve.ts       | 54 +++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/docs/src/content/docs/agent-guidance.md b/docs/src/content/docs/agent-guidance.md
index 2cb9ff937..2e7d1af78 100644
--- a/docs/src/content/docs/agent-guidance.md
+++ b/docs/src/content/docs/agent-guidance.md
@@ -198,7 +198,7 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 
 - **Wrong dataset for custom metrics**: Use `--dataset tracemetrics` for custom metrics (`Sentry.metrics.distribution/gauge/count`). The query format is `aggregation(value,metric_name,metric_type,unit)` — see `sentry dashboard widget --help` for details.
 - **Wrong unit in tracemetrics queries**: The `unit` parameter must match the SDK emission. If no `unit` option is passed to `Sentry.metrics.*()`, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, `none` for utilization ratios.
-- **Missing `--limit` with `--group-by`**: The Sentry API rejects grouped widgets without a limit. Always include `--limit` when using `--group-by`.
-- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400.
-- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and built-in measurements support aggregation.
+- **Missing `--limit` with `--group-by`**: Always include `--limit` when using `--group-by`. The CLI validates this before sending to the API.
+- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. The CLI validates this before sending to the API.
+- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. The CLI warns when an aggregate field doesn't match known aggregatable fields (`span.duration`, `span.self_time`, etc.).
 - **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate.
diff --git a/src/commands/dashboard/resolve.ts b/src/commands/dashboard/resolve.ts
index 59b593d2d..318a700d9 100644
--- a/src/commands/dashboard/resolve.ts
+++ b/src/commands/dashboard/resolve.ts
@@ -18,6 +18,7 @@ import {
   ValidationError,
 } from "../../lib/errors.js";
 import { fuzzyMatch } from "../../lib/fuzzy.js";
+import { logger } from "../../lib/logger.js";
 import { resolveEffectiveOrg } from "../../lib/region.js";
 import { resolveOrg } from "../../lib/resolve-target.js";
 import { setOrgProjectContext } from "../../lib/telemetry.js";
@@ -368,6 +369,58 @@ export function validateGroupByRequiresLimit(
   }
 }
 
+const log = logger.withTag("dashboard");
+
+/**
+ * Known aggregatable fields for the spans dataset.
+ *
+ * Span attributes (e.g., dsn.files_collected, resolve.method) are key-value
+ * metadata and cannot be used as aggregate fields — only in --where or --group-by.
+ * This set covers built-in numeric fields that support aggregation.
+ * Measurements (http.*, cache.*, etc.) are project-specific and may not be
+ * exhaustive — we warn instead of error for unknown fields.
+ */
+const KNOWN_SPAN_AGGREGATE_FIELDS = new Set([
+  "span.duration",
+  "span.self_time",
+  "http.response_content_length",
+  "http.decoded_response_content_length",
+  "http.response_transfer_size",
+  "cache.item_size",
+]);
+
+/**
+ * Warn when an aggregate argument looks like a span attribute rather than
+ * an aggregatable field. No-arg functions (count(), epm()) are fine.
+ * Only checks for the spans dataset.
+ */
+function warnUnknownAggregateFields(
+  aggregates: string[],
+  dataset: string | undefined
+): void {
+  if (dataset && dataset !== "spans") {
+    return;
+  }
+  for (const agg of aggregates) {
+    const parenIdx = agg.indexOf("(");
+    if (parenIdx < 0) {
+      continue;
+    }
+    const inner = agg.slice(parenIdx + 1, -1);
+    // No-arg functions like count(), epm() have empty inner — skip
+    if (!inner) {
+      continue;
+    }
+    if (!KNOWN_SPAN_AGGREGATE_FIELDS.has(inner)) {
+      log.warn(
+        `Aggregate field "${inner}" in "${agg}" is not a known aggregatable span field. ` +
+          "Span attributes (custom tags) cannot be aggregated — use them in --where or --group-by instead. " +
+          `Known fields: ${[...KNOWN_SPAN_AGGREGATE_FIELDS].join(", ")}`
+      );
+    }
+  }
+}
+
 export function buildWidgetFromFlags(opts: {
   title: string;
   display: string;
@@ -380,6 +433,7 @@ export function buildWidgetFromFlags(opts: {
 }): DashboardWidget {
   const aggregates = (opts.query ?? ["count"]).map(parseAggregate);
   validateAggregateNames(aggregates, opts.dataset);
+  warnUnknownAggregateFields(aggregates, opts.dataset);
 
   // Issue table widgets need at least one column or the Sentry UI shows "Columns: None".
   // Default to ["issue"] for table display only — timeseries (line/area/bar) don't use columns.

From 751f75063b07981868246457cfb6988b3b9afc63 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 2 Apr 2026 09:10:50 +0000
Subject: [PATCH 5/9] chore: regenerate skill files and command docs

---
 plugins/sentry-cli/skills/sentry-cli/SKILL.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/plugins/sentry-cli/skills/sentry-cli/SKILL.md b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
index 50e67d651..1147cae4c 100644
--- a/plugins/sentry-cli/skills/sentry-cli/SKILL.md
+++ b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
@@ -208,9 +208,9 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 
 - **Wrong dataset for custom metrics**: Use `--dataset tracemetrics` for custom metrics (`Sentry.metrics.distribution/gauge/count`). The query format is `aggregation(value,metric_name,metric_type,unit)` — see `sentry dashboard widget --help` for details.
 - **Wrong unit in tracemetrics queries**: The `unit` parameter must match the SDK emission. If no `unit` option is passed to `Sentry.metrics.*()`, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, `none` for utilization ratios.
-- **Missing `--limit` with `--group-by`**: The Sentry API rejects grouped widgets without a limit. Always include `--limit` when using `--group-by`.
-- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. If you sort by `-count` but only query `p50:span.duration`, the API returns 400.
-- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. Only `span.duration` and built-in measurements support aggregation.
+- **Missing `--limit` with `--group-by`**: Always include `--limit` when using `--group-by`. The CLI validates this before sending to the API.
+- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. The CLI validates this before sending to the API.
+- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. The CLI warns when an aggregate field doesn't match known aggregatable fields (`span.duration`, `span.self_time`, etc.).
 - **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate.
 
 ## Prerequisites

From 9c1271add3c6fe401b1de35bf3c76de8b674eb55 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 09:14:44 +0000
Subject: [PATCH 6/9] =?UTF-8?q?docs:=20remove=20Dashboard=20Widget=20Mista?=
 =?UTF-8?q?kes=20section=20=E2=80=94=20CLI=20validates=20these=20now?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/src/content/docs/agent-guidance.md       | 9 ---------
 plugins/sentry-cli/skills/sentry-cli/SKILL.md | 9 ---------
 2 files changed, 18 deletions(-)

diff --git a/docs/src/content/docs/agent-guidance.md b/docs/src/content/docs/agent-guidance.md
index 2e7d1af78..af9ce92b1 100644
--- a/docs/src/content/docs/agent-guidance.md
+++ b/docs/src/content/docs/agent-guidance.md
@@ -193,12 +193,3 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 - **Not using `--web`**: View commands support `-w`/`--web` to open the resource in the browser — useful for sharing links.
 - **Fetching API schemas instead of using the CLI**: Prefer `sentry schema` to browse the API and `sentry api` to make requests — the CLI handles authentication and endpoint resolution, so there's rarely a need to download OpenAPI specs separately.
 - **Using `sentry api` when CLI commands suffice**: `sentry issue list --json` already includes `shortId`, `title`, `priority`, `level`, `status`, `permalink`, and other fields at the top level. Some fields like `count`, `userCount`, `firstSeen`, and `lastSeen` may be null depending on the issue. Use `--fields` to select specific fields and `--help` to see all available fields. Only fall back to `sentry api` for data the CLI doesn't expose.
-
-### Dashboard Widget Mistakes
-
-- **Wrong dataset for custom metrics**: Use `--dataset tracemetrics` for custom metrics (`Sentry.metrics.distribution/gauge/count`). The query format is `aggregation(value,metric_name,metric_type,unit)` — see `sentry dashboard widget --help` for details.
-- **Wrong unit in tracemetrics queries**: The `unit` parameter must match the SDK emission. If no `unit` option is passed to `Sentry.metrics.*()`, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, `none` for utilization ratios.
-- **Missing `--limit` with `--group-by`**: Always include `--limit` when using `--group-by`. The CLI validates this before sending to the API.
-- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. The CLI validates this before sending to the API.
-- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. The CLI warns when an aggregate field doesn't match known aggregatable fields (`span.duration`, `span.self_time`, etc.).
-- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate.
diff --git a/plugins/sentry-cli/skills/sentry-cli/SKILL.md b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
index 1147cae4c..22dca3ecc 100644
--- a/plugins/sentry-cli/skills/sentry-cli/SKILL.md
+++ b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
@@ -204,15 +204,6 @@ When querying the Events API (directly or via `sentry api`), valid dataset value
 - **Fetching API schemas instead of using the CLI**: Prefer `sentry schema` to browse the API and `sentry api` to make requests — the CLI handles authentication and endpoint resolution, so there's rarely a need to download OpenAPI specs separately.
 - **Using `sentry api` when CLI commands suffice**: `sentry issue list --json` already includes `shortId`, `title`, `priority`, `level`, `status`, `permalink`, and other fields at the top level. Some fields like `count`, `userCount`, `firstSeen`, and `lastSeen` may be null depending on the issue. Use `--fields` to select specific fields and `--help` to see all available fields. Only fall back to `sentry api` for data the CLI doesn't expose.
 
-#### Dashboard Widget Mistakes
-
-- **Wrong dataset for custom metrics**: Use `--dataset tracemetrics` for custom metrics (`Sentry.metrics.distribution/gauge/count`). The query format is `aggregation(value,metric_name,metric_type,unit)` — see `sentry dashboard widget --help` for details.
-- **Wrong unit in tracemetrics queries**: The `unit` parameter must match the SDK emission. If no `unit` option is passed to `Sentry.metrics.*()`, use `none`. Check the SDK source for integrations — e.g., `nodeRuntimeMetricsIntegration` uses `byte` for memory, `second` for uptime, `none` for utilization ratios.
-- **Missing `--limit` with `--group-by`**: Always include `--limit` when using `--group-by`. The CLI validates this before sending to the API.
-- **`--sort` referencing a field not in `--query`**: The sort field must be one of the aggregate expressions in `--query`. The CLI validates this before sending to the API.
-- **Span attributes are not aggregatable**: You cannot use `avg:dsn.files_collected` on span attributes. Span attributes are key-value metadata — use them in `--where` filters or `--group-by` columns, not as aggregate fields. The CLI warns when an aggregate field doesn't match known aggregatable fields (`span.duration`, `span.self_time`, etc.).
-- **Stale `--sort` after changing `--query`**: When editing a widget to change the query (e.g., p75→p50), also update `--sort` if it references the old aggregate.
-
 ## Prerequisites
 
 The CLI must be installed and authenticated before use.

From 2d1312bd082712f4c893a08b1fc96591fa8112b5 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 09:24:48 +0000
Subject: [PATCH 7/9] fix(dashboard): address Seer and Bugbot review findings

- Fix validateQueryConstraints false positive: only check group-by+limit
  when user explicitly passes --group-by, not when merely changing --query
  on an existing grouped widget (which may have auto-defaulted columns)
- Fix orphaned JSDoc: move buildWidgetFromFlags doc comment to its
  actual function definition after the validation helpers were inserted
---
 src/commands/dashboard/resolve.ts     | 18 +++++++++---------
 src/commands/dashboard/widget/edit.ts |  5 ++++-
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/commands/dashboard/resolve.ts b/src/commands/dashboard/resolve.ts
index 318a700d9..ca4e64b74 100644
--- a/src/commands/dashboard/resolve.ts
+++ b/src/commands/dashboard/resolve.ts
@@ -316,15 +316,6 @@ export function resolveWidgetIndex(
   return matchIndex;
 }
 
-/**
- * Build a widget from user-provided flag values.
- *
- * Shared between `dashboard widget add` and `dashboard widget edit`.
- * Parses aggregate shorthand, sort expressions, and validates via Zod schema.
- *
- * @param opts - Widget configuration from parsed flags
- * @returns Validated widget with computed query fields
- */
 /**
  * Validate that a sort expression references an aggregate present in the query.
  * The Sentry API returns 400 when the sort field isn't in the widget's aggregates.
@@ -421,6 +412,15 @@ function warnUnknownAggregateFields(
   }
 }
 
+/**
+ * Build a widget from user-provided flag values.
+ *
+ * Shared between `dashboard widget add` and `dashboard widget edit`.
+ * Parses aggregate shorthand, sort expressions, and validates via Zod schema.
+ *
+ * @param opts - Widget configuration from parsed flags
+ * @returns Validated widget with computed query fields
+ */
 export function buildWidgetFromFlags(opts: {
   title: string;
   display: string;
diff --git a/src/commands/dashboard/widget/edit.ts b/src/commands/dashboard/widget/edit.ts
index 1975fc987..150ec1da0 100644
--- a/src/commands/dashboard/widget/edit.ts
+++ b/src/commands/dashboard/widget/edit.ts
@@ -137,7 +137,10 @@ function validateQueryConstraints(
   mergedQueries: DashboardWidgetQuery[] | undefined,
   limit: number | null | undefined
 ): void {
-  if (flags["group-by"] || flags.query) {
+  // Only validate when user explicitly passes --group-by, not when merely
+  // changing --query on an existing grouped widget (which may have auto-defaulted
+  // columns like ["issue"] with no limit)
+  if (flags["group-by"]) {
     const columns =
       mergedQueries?.[0]?.columns ?? existing.queries?.[0]?.columns ?? [];
     validateGroupByRequiresLimit(columns, limit ?? undefined);

From f73a894c3cc9f60e289d8f83e16ac30ec03208eb Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 09:31:28 +0000
Subject: [PATCH 8/9] fix(dashboard): only validate sort on explicit --sort,
 not inherited from existing widget

---
 src/commands/dashboard/widget/edit.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/commands/dashboard/widget/edit.ts b/src/commands/dashboard/widget/edit.ts
index 150ec1da0..59c1370cd 100644
--- a/src/commands/dashboard/widget/edit.ts
+++ b/src/commands/dashboard/widget/edit.ts
@@ -146,7 +146,9 @@ function validateQueryConstraints(
     validateGroupByRequiresLimit(columns, limit ?? undefined);
   }
 
-  if (flags.sort || flags.query) {
+  // Only validate sort when user explicitly passes --sort, not when merely
+  // changing --query (which may leave the existing auto-defaulted sort stale)
+  if (flags.sort) {
     const orderby =
       mergedQueries?.[0]?.orderby ?? existing.queries?.[0]?.orderby;
     const aggregates =

From a87c58eeffee14730ec2a6bf656e386770fcd0d9 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Thu, 2 Apr 2026 09:42:15 +0000
Subject: [PATCH 9/9] fix(dashboard): only warn on numeric aggregate functions
 for unknown fields

Functions like count_unique(transaction) and any(span.op) accept
non-numeric columns and should not trigger the span-attribute warning.
Only numeric aggregates (avg, sum, p50-p100, min, max, percentile)
require measurement fields.
---
 src/commands/dashboard/resolve.ts | 37 ++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/commands/dashboard/resolve.ts b/src/commands/dashboard/resolve.ts
index ca4e64b74..b1e878a45 100644
--- a/src/commands/dashboard/resolve.ts
+++ b/src/commands/dashboard/resolve.ts
@@ -381,8 +381,30 @@ const KNOWN_SPAN_AGGREGATE_FIELDS = new Set([
 ]);
 
 /**
- * Warn when an aggregate argument looks like a span attribute rather than
- * an aggregatable field. No-arg functions (count(), epm()) are fine.
+ * Aggregate functions that require numeric measurement fields.
+ * Functions like count_unique, any, count accept non-numeric columns
+ * (e.g., transaction, span.op) and should not trigger the warning.
+ */
+const NUMERIC_AGGREGATE_FUNCTIONS = new Set([
+  "avg",
+  "sum",
+  "min",
+  "max",
+  "p50",
+  "p75",
+  "p90",
+  "p95",
+  "p99",
+  "p100",
+  "percentile",
+]);
+
+/**
+ * Warn when a numeric aggregate function (avg, sum, p50, etc.) is applied
+ * to a field that isn't a known aggregatable span measurement. Functions
+ * like count_unique(transaction) or any(span.op) accept non-numeric
+ * columns and are not checked.
+ *
  * Only checks for the spans dataset.
  */
 function warnUnknownAggregateFields(
@@ -397,16 +419,21 @@ function warnUnknownAggregateFields(
     if (parenIdx < 0) {
       continue;
     }
+    const fn = agg.slice(0, parenIdx);
+    // Only check numeric aggregate functions — count_unique, any, etc. accept any column
+    if (!NUMERIC_AGGREGATE_FUNCTIONS.has(fn)) {
+      continue;
+    }
     const inner = agg.slice(parenIdx + 1, -1);
-    // No-arg functions like count(), epm() have empty inner — skip
     if (!inner) {
       continue;
     }
     if (!KNOWN_SPAN_AGGREGATE_FIELDS.has(inner)) {
       log.warn(
         `Aggregate field "${inner}" in "${agg}" is not a known aggregatable span field. ` +
-          "Span attributes (custom tags) cannot be aggregated — use them in --where or --group-by instead. " +
-          `Known fields: ${[...KNOWN_SPAN_AGGREGATE_FIELDS].join(", ")}`
+          "Span attributes (custom tags) cannot be used with numeric aggregates — " +
+          "use them in --where or --group-by instead. " +
+          `Known numeric fields: ${[...KNOWN_SPAN_AGGREGATE_FIELDS].join(", ")}`
       );
     }
   }