diff --git a/AGENTS.md b/AGENTS.md
index 6eb0c3855..df3159b83 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -952,6 +952,8 @@ mock.module("./some-module", () => ({
 <!-- lore:019d3e8a-a4bb-7271-98cf-4cf418f2f581 -->
 * **CLI telemetry command tags use sentry. prefix with dots not bare names**: The \`buildCommand\` wrapper sets the \`command\` telemetry tag using the full Stricli command prefix joined with dots: \`sentry.issue.explain\`, \`sentry.issue.list\`, \`sentry.api\`, etc. — NOT bare names like \`issue.explain\`. When querying Sentry Discover or building dashboard widgets, always use the \`sentry.\` prefix. Verify actual tag values with a Discover query (\`field:command, count()\`, grouped by \`command\`) before assuming the format.
 
+* **Dashboard tracemetrics dataset uses comma-separated aggregate format**: SDK v10+ custom metrics (`Sentry.metrics.distribution()`, `.gauge()`, `.count()`) emit `trace_metric` envelope items. Dashboard widgets for these MUST use `--dataset tracemetrics` with aggregate format `aggregation(value,metric_name,metric_type,unit)` — e.g., `p50(value,completion.duration_ms,distribution,none)`. The `unit` parameter must match the SDK emission exactly: `none` if no unit specified, `byte` for memory metrics, `second` for uptime. `tracemetrics` only supports `line`, `area`, `bar`, `big_number`, `categorical_bar` display types — no `table` or `stacked_area`. Widgets with `--group-by` always require `--limit`. Sort expressions must reference aggregates present in `--query`.
+
 <!-- lore:019d0846-17bd-7ff3-a6d7-09b59b69a8fe -->
 * **Use toMatchObject not toEqual when testing resolution results with optional fields**: When \`resolveProjectBySlug()\` or \`resolveOrgProjectTarget()\` adds optional fields (like \`projectData\`) to the return type, tests using \`expect(result).toEqual({ org, project })\` fail because \`toEqual\` requires exact match. Use \`toMatchObject({ org, project })\` instead — it checks the specified subset without failing on extra properties. This affects tests across \`event/view\`, \`log/view\`, \`trace/view\`, and \`trace/list\` test files.
 
diff --git a/docs/src/content/docs/agent-guidance.md b/docs/src/content/docs/agent-guidance.md
index 6d84b169a..af9ce92b1 100644
--- a/docs/src/content/docs/agent-guidance.md
+++ b/docs/src/content/docs/agent-guidance.md
@@ -128,9 +128,7 @@ Display types with default sizes:
 
 Use **common** types for general dashboards. Use **specialized** only when specifically requested. Avoid **internal** types unless the user explicitly asks.
 
-Available datasets: `spans` (default, covers most use cases), `discover`, `issue`, `error-events`, `transaction-like`, `metrics`, `logs`, `tracemetrics`, `preprod-app-size`.
-
-Run `sentry dashboard widget --help` for the full list including aggregate functions.
+Available datasets: `spans` (default), `tracemetrics`, `discover`, `issue`, `error-events`, `logs`. Run `sentry dashboard widget --help` for dataset descriptions, query formats, and examples.
 
 **Row-filling examples:**
 
diff --git a/plugins/sentry-cli/skills/sentry-cli/SKILL.md b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
index 7cb991827..22dca3ecc 100644
--- a/plugins/sentry-cli/skills/sentry-cli/SKILL.md
+++ b/plugins/sentry-cli/skills/sentry-cli/SKILL.md
@@ -138,9 +138,7 @@ Display types with default sizes:
 
 Use **common** types for general dashboards. Use **specialized** only when specifically requested. Avoid **internal** types unless the user explicitly asks.
 
-Available datasets: `spans` (default, covers most use cases), `discover`, `issue`, `error-events`, `transaction-like`, `metrics`, `logs`, `tracemetrics`, `preprod-app-size`.
-
-Run `sentry dashboard widget --help` for the full list including aggregate functions.
+Available datasets: `spans` (default), `tracemetrics`, `discover`, `issue`, `error-events`, `logs`. Run `sentry dashboard widget --help` for dataset descriptions, query formats, and examples.
 
 **Row-filling examples:**
 
diff --git a/src/commands/dashboard/resolve.ts b/src/commands/dashboard/resolve.ts
index 1ad13f229..b1e878a45 100644
--- a/src/commands/dashboard/resolve.ts
+++ b/src/commands/dashboard/resolve.ts
@@ -18,6 +18,7 @@ import {
   ValidationError,
 } from "../../lib/errors.js";
 import { fuzzyMatch } from "../../lib/fuzzy.js";
+import { logger } from "../../lib/logger.js";
 import { resolveEffectiveOrg } from "../../lib/region.js";
 import { resolveOrg } from "../../lib/resolve-target.js";
 import { setOrgProjectContext } from "../../lib/telemetry.js";
@@ -315,6 +316,129 @@ export function resolveWidgetIndex(
   return matchIndex;
 }
 
+/**
+ * Validate that a sort expression references an aggregate present in the query.
+ * The Sentry API returns 400 when the sort field isn't in the widget's aggregates.
+ *
+ * @param orderby - Parsed sort expression (e.g., "-count()", "p90(span.duration)")
+ * @param aggregates - Parsed aggregate expressions from the query
+ */
+export function validateSortReferencesAggregate(
+  orderby: string,
+  aggregates: string[]
+): void {
+  // Strip leading "-" for descending sorts
+  const sortAgg = orderby.startsWith("-") ? orderby.slice(1) : orderby;
+  if (!aggregates.includes(sortAgg)) {
+    throw new ValidationError(
+      `Sort expression "${orderby}" references "${sortAgg}" which is not in the query.\n\n` +
+        "The --sort field must be one of the aggregate expressions in --query.\n" +
+        `Current aggregates: ${aggregates.join(", ")}\n\n` +
+        `Either add "${sortAgg}" to --query or sort by an existing aggregate.`,
+      "sort"
+    );
+  }
+}
+
+/**
+ * Validate that grouped widgets (those with columns/group-by) include a limit.
+ * The Sentry API rejects grouped widgets without a limit.
+ *
+ * @param columns - Group-by columns
+ * @param limit - Widget limit (undefined if not set)
+ */
+export function validateGroupByRequiresLimit(
+  columns: string[],
+  limit: number | undefined
+): void {
+  if (columns.length > 0 && limit === undefined) {
+    throw new ValidationError(
+      "Widgets with --group-by require --limit. " +
+        "Add --limit <n> to specify the maximum number of groups to display.",
+      "limit"
+    );
+  }
+}
+
+const log = logger.withTag("dashboard");
+
+/**
+ * Known aggregatable fields for the spans dataset.
+ *
+ * Span attributes (e.g., dsn.files_collected, resolve.method) are key-value
+ * metadata and cannot be used as aggregate fields — only in --where or --group-by.
+ * This set covers built-in numeric fields that support aggregation.
+ * Measurements (http.*, cache.*, etc.) are project-specific and may not be
+ * exhaustive — we warn instead of error for unknown fields.
+ */
+const KNOWN_SPAN_AGGREGATE_FIELDS = new Set([
+  "span.duration",
+  "span.self_time",
+  "http.response_content_length",
+  "http.decoded_response_content_length",
+  "http.response_transfer_size",
+  "cache.item_size",
+]);
+
+/**
+ * Aggregate functions that require numeric measurement fields.
+ * Functions like count_unique, any, count accept non-numeric columns
+ * (e.g., transaction, span.op) and should not trigger the warning.
+ */
+const NUMERIC_AGGREGATE_FUNCTIONS = new Set([
+  "avg",
+  "sum",
+  "min",
+  "max",
+  "p50",
+  "p75",
+  "p90",
+  "p95",
+  "p99",
+  "p100",
+  "percentile",
+]);
+
+/**
+ * Warn when a numeric aggregate function (avg, sum, p50, etc.) is applied
+ * to a field that isn't a known aggregatable span measurement. Functions
+ * like count_unique(transaction) or any(span.op) accept non-numeric
+ * columns and are not checked.
+ *
+ * Only checks for the spans dataset.
+ */
+function warnUnknownAggregateFields(
+  aggregates: string[],
+  dataset: string | undefined
+): void {
+  if (dataset && dataset !== "spans") {
+    return;
+  }
+  for (const agg of aggregates) {
+    const parenIdx = agg.indexOf("(");
+    if (parenIdx < 0) {
+      continue;
+    }
+    const fn = agg.slice(0, parenIdx);
+    // Only check numeric aggregate functions — count_unique, any, etc. accept any column
+    if (!NUMERIC_AGGREGATE_FUNCTIONS.has(fn)) {
+      continue;
+    }
+    const inner = agg.slice(parenIdx + 1, -1);
+    if (!inner) {
+      continue;
+    }
+    if (!KNOWN_SPAN_AGGREGATE_FIELDS.has(inner)) {
+      log.warn(
+        `Aggregate field "${inner}" in "${agg}" is not a known aggregatable span field. ` +
+          "Span attributes (custom tags) cannot be used with numeric aggregates — " +
+          "use them in --where or --group-by instead. " +
+          `Known numeric fields: ${[...KNOWN_SPAN_AGGREGATE_FIELDS].join(", ")}`
+      );
+    }
+  }
+}
+
 /**
  * Build a widget from user-provided flag values.
  *
@@ -336,6 +460,7 @@ export function buildWidgetFromFlags(opts: {
 }): DashboardWidget {
   const aggregates = (opts.query ?? ["count"]).map(parseAggregate);
   validateAggregateNames(aggregates, opts.dataset);
+  warnUnknownAggregateFields(aggregates, opts.dataset);
 
   // Issue table widgets need at least one column or the Sentry UI shows "Columns: None".
   // Default to ["issue"] for table display only — timeseries (line/area/bar) don't use columns.
@@ -350,6 +475,15 @@ export function buildWidgetFromFlags(opts: {
     orderby = `-${aggregates[0]}`;
   }
 
+  // Only validate when user explicitly passes --group-by, not for auto-defaulted columns
+  // (e.g., issue dataset auto-defaults columns to ["issue"] for table display)
+  if (opts.groupBy) {
+    validateGroupByRequiresLimit(columns, opts.limit);
+  }
+  if (orderby) {
+    validateSortReferencesAggregate(orderby, aggregates);
+  }
+
   const raw = {
     title: opts.title,
     displayType: opts.display,
diff --git a/src/commands/dashboard/widget/edit.ts b/src/commands/dashboard/widget/edit.ts
index 342190791..59c1370cd 100644
--- a/src/commands/dashboard/widget/edit.ts
+++ b/src/commands/dashboard/widget/edit.ts
@@ -32,6 +32,8 @@ import {
   resolveDashboardId,
   resolveOrgFromTarget,
   resolveWidgetIndex,
+  validateGroupByRequiresLimit,
+  validateSortReferencesAggregate,
   validateWidgetEnums,
   type WidgetQueryFlags,
 } from "../resolve.js";
@@ -101,15 +103,15 @@ function mergeLayout(
   };
 }
 
-/** Build the replacement widget object by merging flags over existing */
-function buildReplacement(
+/**
+ * Validate enum and aggregate constraints on the effective (merged) widget state.
+ * Extracted from buildReplacement to stay under Biome's complexity limit.
+ */
+function validateEnumsAndAggregates(
   flags: EditFlags,
-  existing: DashboardWidget
-): DashboardWidget {
-  const mergedQueries = mergeQueries(flags, existing.queries?.[0]);
-
-  // Validate aggregates when query or dataset changes — prevents broken widgets
-  // (e.g. switching --dataset from discover to spans with discover-only aggregates)
+  existing: DashboardWidget,
+  mergedQueries: DashboardWidgetQuery[] | undefined
+): void {
   const newDataset = flags.dataset ?? existing.widgetType;
   const aggregatesToValidate =
     mergedQueries?.[0]?.aggregates ?? existing.queries?.[0]?.aggregates;
@@ -117,22 +119,60 @@ function buildReplacement(
     validateAggregateNames(aggregatesToValidate, newDataset);
   }
 
-  const limit = flags.limit !== undefined ? flags.limit : existing.limit;
-
-  const effectiveDisplay = flags.display ?? existing.displayType;
-  const effectiveDataset = flags.dataset ?? existing.widgetType;
-
-  // Re-validate after merging with existing values. validateWidgetEnums only
-  // checks the cross-constraint when both args are provided, so it misses
-  // e.g. `--dataset preprod-app-size` on a widget that's already `table`.
-  // validateWidgetEnums itself skips untracked display types (text, wheel, etc.).
   if (flags.display || flags.dataset) {
+    const effectiveDisplay = flags.display ?? existing.displayType;
+    const effectiveDataset = flags.dataset ?? existing.widgetType;
     validateWidgetEnums(effectiveDisplay, effectiveDataset);
   }
+}
+
+/**
+ * Validate group-by+limit and sort constraints on the effective (merged) widget state.
+ * Only runs when the user changes query, group-by, or sort — not when preserving
+ * existing widget state which may predate these validations.
+ */
+function validateQueryConstraints(
+  flags: EditFlags,
+  existing: DashboardWidget,
+  mergedQueries: DashboardWidgetQuery[] | undefined,
+  limit: number | null | undefined
+): void {
+  // Only validate when user explicitly passes --group-by, not when merely
+  // changing --query on an existing grouped widget (which may have auto-defaulted
+  // columns like ["issue"] with no limit)
+  if (flags["group-by"]) {
+    const columns =
+      mergedQueries?.[0]?.columns ?? existing.queries?.[0]?.columns ?? [];
+    validateGroupByRequiresLimit(columns, limit ?? undefined);
+  }
+
+  // Only validate sort when user explicitly passes --sort, not when merely
+  // changing --query (which may leave the existing auto-defaulted sort stale)
+  if (flags.sort) {
+    const orderby =
+      mergedQueries?.[0]?.orderby ?? existing.queries?.[0]?.orderby;
+    const aggregates =
+      mergedQueries?.[0]?.aggregates ?? existing.queries?.[0]?.aggregates ?? [];
+    if (orderby && aggregates.length > 0) {
+      validateSortReferencesAggregate(orderby, aggregates);
+    }
+  }
+}
+
+/** Build the replacement widget object by merging flags over existing */
+function buildReplacement(
+  flags: EditFlags,
+  existing: DashboardWidget
+): DashboardWidget {
+  const mergedQueries = mergeQueries(flags, existing.queries?.[0]);
+  const limit = flags.limit !== undefined ? flags.limit : existing.limit;
+
+  validateEnumsAndAggregates(flags, existing, mergedQueries);
+  validateQueryConstraints(flags, existing, mergedQueries, limit);
 
   const raw: Record<string, unknown> = {
     title: flags["new-title"] ?? existing.title,
-    displayType: effectiveDisplay,
+    displayType: flags.display ?? existing.displayType,
     queries: mergedQueries ?? existing.queries,
     layout: mergeLayout(flags, existing),
   };
diff --git a/src/commands/dashboard/widget/index.ts b/src/commands/dashboard/widget/index.ts
index facdc39b4..f5efb6263 100644
--- a/src/commands/dashboard/widget/index.ts
+++ b/src/commands/dashboard/widget/index.ts
@@ -19,8 +19,17 @@ export const widgetRoute = buildRouteMap({
       "  specialized: stacked_area (3×2), top_n (3×2), categorical_bar (3×2), text (3×2)\n" +
       "  internal:    details (3×2), wheel (3×2), rage_and_dead_clicks (3×2),\n" +
       "               server_tree (3×2), agents_traces_table (3×2)\n\n" +
-      "Datasets: spans (default), discover, issue, error-events, transaction-like,\n" +
-      "          metrics, logs, tracemetrics, preprod-app-size\n\n" +
+      "Datasets:\n" +
+      "  spans (default)    Span-based queries: span.duration, span.op, transaction,\n" +
+      "                     span attributes, cache.hit, etc. Covers most use cases.\n" +
+      "  tracemetrics       Custom metrics from Sentry.metrics.distribution/gauge/count.\n" +
+      "                     Query format: aggregation(value,metric_name,metric_type,unit)\n" +
+      "                     Example: p50(value,completion.duration_ms,distribution,none)\n" +
+      "                     Supported displays: line, area, bar, big_number, categorical_bar\n" +
+      "  discover           Legacy discover queries (adds failure_rate, apdex, etc.)\n" +
+      "  issue              Issue-based queries\n" +
+      "  error-events       Error event queries\n" +
+      "  logs               Log queries\n\n" +
       "Aggregates (spans): count, count_unique, sum, avg, percentile, p50, p75,\n" +
       "  p90, p95, p99, p100, eps, epm, any, min, max\n" +
       "Aggregates (discover adds): failure_count, failure_rate, apdex,\n" +
@@ -28,6 +37,11 @@ export const widgetRoute = buildRouteMap({
       "  last_seen, latest_event, var, stddev, cov, corr, performance_score,\n" +
       "  opportunity_score, count_scores\n" +
       "Aliases: spm → epm, sps → eps, tpm → epm, tps → eps\n\n" +
+      "tracemetrics query format:\n" +
+      "  aggregation(value,metric_name,metric_type,unit)\n" +
+      "    - metric_name: name passed to Sentry.metrics.distribution/gauge/count\n" +
+      "    - metric_type: distribution, gauge, counter, set\n" +
+      "    - unit: none (if unspecified), byte, second, millisecond, ratio, etc.\n\n" +
       "Row-filling examples:\n" +
       "  # 3 KPIs (2+2+2 = 6)\n" +
       '  sentry dashboard widget add <d> "Error Count" --display big_number --query count\n' +
@@ -40,6 +54,11 @@ export const widgetRoute = buildRouteMap({
       '  sentry dashboard widget add <d> "Top Endpoints" --display table \\\n' +
       "    --query count --query p95:span.duration --group-by transaction \\\n" +
       "    --sort -count --limit 10\n\n" +
+      "  # Custom metrics (tracemetrics dataset)\n" +
+      '  sentry dashboard widget add <d> "Latency" --display line \\\n' +
+      "    --dataset tracemetrics \\\n" +
+      '    --query "p50(value,completion.duration_ms,distribution,none)" \\\n' +
+      '    --query "p90(value,completion.duration_ms,distribution,none)"\n\n' +
       "Commands:\n" +
       "  add    Add a widget to a dashboard\n" +
       "  edit   Edit a widget in a dashboard\n" +
diff --git a/src/types/dashboard.ts b/src/types/dashboard.ts
index 48eda1daa..71707080d 100644
--- a/src/types/dashboard.ts
+++ b/src/types/dashboard.ts
@@ -26,7 +26,6 @@ import { logger } from "../lib/logger.js";
 export const WIDGET_TYPES = [
   "discover",
   "issue",
-  "metrics",
   "error-events",
   "transaction-like",
   "spans",
@@ -360,16 +359,6 @@ export const DATASET_SUPPORTED_DISPLAY_TYPES = {
     "table",
     "top_n",
   ],
-  metrics: [
-    "area",
-    "bar",
-    "big_number",
-    "categorical_bar",
-    "line",
-    "stacked_area",
-    "table",
-    "top_n",
-  ],
   logs: [
     "area",
     "bar",
@@ -478,10 +467,29 @@ function extractFunctionName(aggregate: string): string {
   return parenIdx > 0 ? aggregate.slice(0, parenIdx) : aggregate;
 }
 
+/**
+ * Check whether a parsed aggregate uses the tracemetrics comma-separated format.
+ * Format: `aggregation(value,metric_name,metric_type,unit)`
+ * Example: `p50(value,completion.duration_ms,distribution,none)`
+ */
+function isTracemetricsAggregate(aggregate: string): boolean {
+  const parenIdx = aggregate.indexOf("(");
+  if (parenIdx < 0) {
+    return false;
+  }
+  const inner = aggregate.slice(parenIdx + 1, -1);
+  return inner.startsWith("value,") && inner.split(",").length === 4;
+}
+
 /**
  * Validate that all aggregate function names in a list are known.
  * Throws a ValidationError listing valid functions if any are invalid.
  *
+ * For the `tracemetrics` dataset, aggregates must use the comma-separated
+ * format: `aggregation(value,metric_name,metric_type,unit)`. Standard
+ * span-style aggregates like `count()` or `p50(span.duration)` are
+ * invalid for tracemetrics.
+ *
  * @param aggregates - Parsed aggregate strings (e.g. ["count()", "p95(span.duration)"])
  * @param dataset - Widget dataset, determines which function list to validate against
  */
@@ -489,6 +497,27 @@ export function validateAggregateNames(
   aggregates: string[],
   dataset?: string
 ): void {
+  // tracemetrics uses a different aggregate format — validate structure, not function names
+  if (dataset === "tracemetrics") {
+    for (const agg of aggregates) {
+      if (!isTracemetricsAggregate(agg)) {
+        throw new ValidationError(
+          `Invalid tracemetrics aggregate "${agg}".\n\n` +
+            "tracemetrics queries must use the format: aggregation(value,metric_name,metric_type,unit)\n" +
+            "Example: p50(value,completion.duration_ms,distribution,none)\n\n" +
+            "Parameters:\n" +
+            "  - aggregation: avg, sum, count, p50, p75, p90, p95, p99, min, max\n" +
+            `  - value: literal string "value"\n` +
+            "  - metric_name: the name passed to Sentry.metrics.distribution/gauge/count\n" +
+            "  - metric_type: distribution, gauge, counter, set\n" +
+            "  - unit: none, byte, second, millisecond, etc. (must match SDK emission)",
+          "query"
+        );
+      }
+    }
+    return;
+  }
+
   const validFunctions: readonly string[] =
     dataset === "discover" || dataset === "error-events"
       ? DISCOVER_AGGREGATE_FUNCTIONS
@@ -966,7 +995,7 @@ export type WidgetDataResult =
 /**
  * Maps widget types to API dataset parameter values.
  *
- * Widget types that don't map to a dataset (issue, metrics, etc.)
+ * Widget types that don't map to a dataset (issue, tracemetrics, etc.)
  * return null and are rendered as "unsupported".
  */
 const WIDGET_TYPE_TO_DATASET: Record<string, string> = {
diff --git a/test/commands/dashboard/widget/add.test.ts b/test/commands/dashboard/widget/add.test.ts
index d4f3027ce..9023835bf 100644
--- a/test/commands/dashboard/widget/add.test.ts
+++ b/test/commands/dashboard/widget/add.test.ts
@@ -286,6 +286,7 @@ describe("dashboard widget add", () => {
         display: "table",
         dataset: "issue",
         "group-by": ["project"],
+        limit: 5,
       },
       "123",
       "Issues by Project"
diff --git a/test/types/dashboard.test.ts b/test/types/dashboard.test.ts
index ced8dc131..2297d15fb 100644
--- a/test/types/dashboard.test.ts
+++ b/test/types/dashboard.test.ts
@@ -53,7 +53,6 @@ describe("WIDGET_TYPES", () => {
     const expected: WidgetType[] = [
       "discover",
       "issue",
-      "metrics",
       "error-events",
       "transaction-like",
       "spans",
@@ -770,7 +769,6 @@ describe("mapWidgetTypeToDataset", () => {
 
   test("returns null for unsupported widget types", () => {
     expect(mapWidgetTypeToDataset("issue")).toBeNull();
-    expect(mapWidgetTypeToDataset("metrics")).toBeNull();
     expect(mapWidgetTypeToDataset("tracemetrics")).toBeNull();
     expect(mapWidgetTypeToDataset("preprod-app-size")).toBeNull();
   });