diff --git a/package.json b/package.json index 5c259df7a..0a2bf6823 100644 --- a/package.json +++ b/package.json @@ -68,6 +68,7 @@ "@apify/actor-templates": "^0.1.5", "@apify/consts": "^2.36.0", "@apify/input_schema": "^3.17.0", + "@apify/json_schemas": "^0.13.0", "@apify/utilities": "^2.18.0", "@crawlee/memory-storage": "^3.12.0", "@inquirer/core": "^11.0.0", diff --git a/src/commands/_register.ts b/src/commands/_register.ts index 16657acd8..93c266e05 100644 --- a/src/commands/_register.ts +++ b/src/commands/_register.ts @@ -32,7 +32,7 @@ import { RunsIndexCommand } from './runs/_index.js'; import { SecretsIndexCommand } from './secrets/_index.js'; import { TasksIndexCommand } from './task/_index.js'; import { TelemetryIndexCommand } from './telemetry/_index.js'; -import { ValidateInputSchemaCommand } from './validate-schema.js'; +import { ValidateSchemaCommand } from './validate-schema.js'; export const apifyCommands = [ // namespaces @@ -62,7 +62,7 @@ export const apifyCommands = [ TopLevelPullCommand, ToplevelPushCommand, RunCommand, - ValidateInputSchemaCommand, + ValidateSchemaCommand, HelpCommand, // test commands diff --git a/src/commands/edit-input-schema.ts b/src/commands/edit-input-schema.ts index 533ed1bf9..a50b21cbc 100644 --- a/src/commands/edit-input-schema.ts +++ b/src/commands/edit-input-schema.ts @@ -41,11 +41,19 @@ export class EditInputSchemaCommand extends ApifyCommand>; + + try { + result = await readInputSchema({ + forcePath: this.args.path, + cwd: process.cwd(), + }); + } catch (err) { + error({ message: (err as Error).message }); + return; + } + + const { inputSchema: existingSchema, inputSchemaPath } = result; if (existingSchema && !inputSchemaPath) { // If path is not returned, it means the input schema must be directly embedded as object in actor.json diff --git a/src/commands/run.ts b/src/commands/run.ts index 917f9593f..3e531c778 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -424,7 +424,14 @@ export class RunCommand extends ApifyCommand { * @param inputOverride Optional input received through command flags */ private async validateAndStoreInput(inputOverride?: { input: Record; source: string }) { - const { inputSchema } = await readInputSchema({ cwd: process.cwd() }); + let inputSchema: Record | null | undefined; + + try { + ({ inputSchema } = await readInputSchema({ cwd: process.cwd() })); + } catch (err) { + warning({ message: (err as Error).message }); + inputSchema = null; + } if (!inputSchema) { if (!inputOverride) { diff --git a/src/commands/validate-schema.ts b/src/commands/validate-schema.ts index 71cf4ae54..757631454 100644 --- a/src/commands/validate-schema.ts +++ b/src/commands/validate-schema.ts @@ -1,41 +1,147 @@ import process from 'node:process'; +import { validateInputSchema } from '@apify/input_schema'; + import { ApifyCommand } from '../lib/command-framework/apify-command.js'; import { Args } from '../lib/command-framework/args.js'; -import { LOCAL_CONFIG_PATH } from '../lib/consts.js'; -import { readAndValidateInputSchema } from '../lib/input_schema.js'; -import { success } from '../lib/outputs.js'; +import { CommandExitCodes, LOCAL_CONFIG_PATH } from '../lib/consts.js'; +import { + readAndValidateInputSchema, + readInputSchema, + readStorageSchema, + validateDatasetSchema, + validateKvsSchema, + validateOutputSchema, +} from '../lib/input_schema.js'; +import { error, info, success } from '../lib/outputs.js'; +import { Ajv2019 } from '../lib/utils.js'; -export class ValidateInputSchemaCommand extends ApifyCommand { +export class ValidateSchemaCommand extends ApifyCommand { static override name = 'validate-schema' as const; - static override description = `Validates Actor input schema from one of these locations (in priority order): - 1. Object in '${LOCAL_CONFIG_PATH}' under "input" key - 2. JSON file path in '${LOCAL_CONFIG_PATH}' "input" key - 3. .actor/INPUT_SCHEMA.json - 4. INPUT_SCHEMA.json + static override description = `Validates Actor schemas. + +When a path argument is provided, validates only the input schema at that path. -Optionally specify custom schema path to validate.`; +When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': + - Input schema (from "input" key or default locations) + - Dataset schema (from "storages.dataset") + - Output schema (from "output") + - Key-Value Store schema (from "storages.keyValueStore")`; static override args = { path: Args.string({ required: false, - description: 'Optional path to your INPUT_SCHEMA.json file. If not provided ./INPUT_SCHEMA.json is used.', + description: + 'Optional path to your INPUT_SCHEMA.json file. If not provided, validates all schemas in actor.json.', }), }; static override hiddenAliases = ['vis']; async run() { + if (this.args.path) { + await this.validateInputSchemaAtPath(this.args.path); + return; + } + + await this.validateAllSchemas(); + } + + private async validateInputSchemaAtPath(forcePath: string) { await readAndValidateInputSchema({ - forcePath: this.args.path, + forcePath, cwd: process.cwd(), - getMessage: (path) => - path - ? `Validating input schema at ${path}` - : `Validating input schema embedded in '${LOCAL_CONFIG_PATH}'`, + getMessage: (path) => `Validating input schema at ${path ?? forcePath}`, }); success({ message: 'Input schema is valid.' }); } + + private async validateAllSchemas() { + const cwd = process.cwd(); + let foundAny = false; + let hasErrors = false; + + // Input schema — not using readAndValidateInputSchema here because it throws + // when no schema is found; in the all-schemas scan, a missing input schema + // should be silently skipped, not treated as an error. + try { + const { inputSchema, inputSchemaPath } = await readInputSchema({ cwd, throwOnMissing: true }); + + if (inputSchema) { + foundAny = true; + + const location = inputSchemaPath ? `at ${inputSchemaPath}` : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating input schema ${location}` }); + + const validator = new Ajv2019({ strict: false }); + validateInputSchema(validator, inputSchema); + success({ message: 'Input schema is valid.' }); + } + } catch (err) { + foundAny = true; + hasErrors = true; + error({ message: (err as Error).message }); + } + + // Storage schemas (Dataset, Output, Key-Value Store) + const storageSchemas = [ + { + label: 'Dataset', + read: () => readStorageSchema({ cwd, key: 'dataset', label: 'Dataset', throwOnMissing: true }), + validate: validateDatasetSchema, + }, + { + label: 'Output', + read: () => + readStorageSchema({ + cwd, + key: 'output', + label: 'Output', + getRef: (config) => config?.output, + throwOnMissing: true, + }), + validate: validateOutputSchema, + }, + { + label: 'Key-Value Store', + read: () => + readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store', throwOnMissing: true }), + validate: validateKvsSchema, + }, + ]; + + for (const { label, read, validate } of storageSchemas) { + try { + const result = read(); + + if (result) { + foundAny = true; + + const location = result.schemaPath + ? `at ${result.schemaPath}` + : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating ${label} schema ${location}` }); + + validate(result.schema); + success({ message: `${label} schema is valid.` }); + } + } catch (err) { + foundAny = true; + hasErrors = true; + error({ message: (err as Error).message }); + } + } + + if (!foundAny) { + throw new Error( + `No schemas found. Make sure '${LOCAL_CONFIG_PATH}' exists and defines at least one schema.`, + ); + } + + if (hasErrors) { + process.exitCode = CommandExitCodes.InvalidInput; + } + } } diff --git a/src/lib/input_schema.ts b/src/lib/input_schema.ts index 9c515c54a..8c95f15ee 100644 --- a/src/lib/input_schema.ts +++ b/src/lib/input_schema.ts @@ -1,10 +1,16 @@ import { existsSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; +import type { Ajv, ErrorObject } from 'ajv'; import { cloneDeep } from 'es-toolkit'; import { KEY_VALUE_STORE_KEYS } from '@apify/consts'; import { validateInputSchema } from '@apify/input_schema'; +import { + getDatasetSchemaValidator, + getKeyValueStoreSchemaValidator, + getOutputSchemaValidator, +} from '@apify/json_schemas'; import { ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_PATH } from './consts.js'; import { info, warning } from './outputs.js'; @@ -24,7 +30,15 @@ const DEFAULT_INPUT_SCHEMA_PATHS = [ * In such a case, path would be set to the location * where the input schema would be expected to be found (and e.g. can be created there). */ -export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; cwd: string }) => { +export const readInputSchema = async ({ + forcePath, + cwd, + throwOnMissing = false, +}: { + forcePath?: string; + cwd: string; + throwOnMissing?: boolean; +}) => { if (forcePath) { return { inputSchema: getJsonFileContent(forcePath), @@ -34,7 +48,7 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; const localConfig = getLocalConfig(cwd); - if (typeof localConfig?.input === 'object') { + if (typeof localConfig?.input === 'object' && localConfig.input !== null) { return { inputSchema: localConfig.input as Record, inputSchemaPath: null, @@ -43,8 +57,25 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; if (typeof localConfig?.input === 'string') { const fullPath = join(cwd, ACTOR_SPECIFICATION_FOLDER, localConfig.input); + const schema = getJsonFileContent(fullPath); + + if (!schema) { + if (throwOnMissing) { + throw new Error(`Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`); + } + + warning({ + message: `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + }); + + return { + inputSchema: null, + inputSchemaPath: fullPath, + }; + } + return { - inputSchema: getJsonFileContent(fullPath), + inputSchema: schema, inputSchemaPath: fullPath, }; } @@ -115,11 +146,13 @@ export const readStorageSchema = ({ key, label, getRef, + throwOnMissing = false, }: { cwd: string; key: string; label: string; getRef?: (config: ReturnType) => unknown; + throwOnMissing?: boolean; }): { schema: Record; schemaPath: string | null } | null => { const localConfig = getLocalConfig(cwd); @@ -137,6 +170,12 @@ export const readStorageSchema = ({ const schema = getJsonFileContent(fullPath); if (!schema) { + if (throwOnMissing) { + throw new Error( + `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + ); + } + warning({ message: `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, }); @@ -255,8 +294,40 @@ export const getDefaultsFromInputSchema = (inputSchema: any) => { return defaults; }; +function formatSchemaValidationErrors(errors: ErrorObject[], schemaName: string): string { + const details = errors + .map((err) => { + const path = err.instancePath ? ` at ${err.instancePath}` : ''; + return ` - ${err.message}${path}`; + }) + .join('\n'); + + return `${schemaName} schema is not valid:\n${details}`; +} + +export function validateDatasetSchema(schema: Record): void { + const validate = getDatasetSchemaValidator(); + if (!validate(schema)) { + throw new Error(formatSchemaValidationErrors(validate.errors!, 'Dataset')); + } +} + +export function validateOutputSchema(schema: Record): void { + const validate = getOutputSchemaValidator(); + if (!validate(schema)) { + throw new Error(formatSchemaValidationErrors(validate.errors!, 'Output')); + } +} + +export function validateKvsSchema(schema: Record): void { + const validate = getKeyValueStoreSchemaValidator(); + if (!validate(schema)) { + throw new Error(formatSchemaValidationErrors(validate.errors!, 'Key-Value Store')); + } +} + // Lots of code copied from @apify-packages/actor, this really should be moved to the shared input_schema package -export const getAjvValidator = (inputSchema: any, ajvInstance: import('ajv').Ajv) => { +export const getAjvValidator = (inputSchema: any, ajvInstance: Ajv) => { const copyOfSchema = cloneDeep(inputSchema); copyOfSchema.required = []; diff --git a/test/local/commands/validate-schema.test.ts b/test/local/commands/validate-schema.test.ts index d9fa9eeab..e80ea44e7 100644 --- a/test/local/commands/validate-schema.test.ts +++ b/test/local/commands/validate-schema.test.ts @@ -1,38 +1,256 @@ -import { ValidateInputSchemaCommand } from '../../../src/commands/validate-schema.js'; +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { basename, join } from 'node:path'; + +import { ValidateSchemaCommand } from '../../../src/commands/validate-schema.js'; import { testRunCommand } from '../../../src/lib/command-framework/apify-command.js'; +import { validDatasetSchemaPath } from '../../__setup__/dataset-schemas/paths.js'; import { useConsoleSpy } from '../../__setup__/hooks/useConsoleSpy.js'; +import { useTempPath } from '../../__setup__/hooks/useTempPath.js'; import { invalidInputSchemaPath, unparsableInputSchemaPath, validInputSchemaPath, } from '../../__setup__/input-schemas/paths.js'; +import { validKvsSchemaPath } from '../../__setup__/kvs-schemas/paths.js'; +import { validOutputSchemaPath } from '../../__setup__/output-schemas/paths.js'; + +const { lastErrorMessage, logMessages } = useConsoleSpy(); + +async function setupActorConfig( + basePath: string, + { + inputSchema, + datasetSchemaRef, + outputSchemaRef, + kvsSchemaRef, + }: { + inputSchema?: Record; + datasetSchemaRef?: string | Record; + outputSchemaRef?: string | Record; + kvsSchemaRef?: string | Record; + }, +) { + const actorDir = join(basePath, '.actor'); + await mkdir(actorDir, { recursive: true }); + + const minimalInput = inputSchema ?? { + title: 'Test', + type: 'object', + schemaVersion: 1, + properties: { + foo: { title: 'Foo', description: 'A foo field', type: 'string', default: 'bar', editor: 'textfield' }, + }, + }; + + await writeFile(join(actorDir, 'input_schema.json'), JSON.stringify(minimalInput, null, '\t')); + + const actorJson: Record = { + actorSpecification: 1, + name: 'test-actor', + version: '0.1', + input: './input_schema.json', + }; -const { lastErrorMessage } = useConsoleSpy(); + const storages: Record = {}; + + if (datasetSchemaRef !== undefined) { + if (typeof datasetSchemaRef === 'string') { + const content = await readFile(datasetSchemaRef, 'utf-8'); + const fileName = basename(datasetSchemaRef); + await writeFile(join(actorDir, fileName), content); + storages.dataset = `./${fileName}`; + } else { + storages.dataset = datasetSchemaRef; + } + } + + if (kvsSchemaRef !== undefined) { + if (typeof kvsSchemaRef === 'string') { + const content = await readFile(kvsSchemaRef, 'utf-8'); + const fileName = `kvs-${basename(kvsSchemaRef)}`; + await writeFile(join(actorDir, fileName), content); + storages.keyValueStore = `./${fileName}`; + } else { + storages.keyValueStore = kvsSchemaRef; + } + } + + if (Object.keys(storages).length > 0) { + actorJson.storages = storages; + } + + if (outputSchemaRef !== undefined) { + if (typeof outputSchemaRef === 'string') { + const content = await readFile(outputSchemaRef, 'utf-8'); + const fileName = `output-${basename(outputSchemaRef)}`; + await writeFile(join(actorDir, fileName), content); + actorJson.output = `./${fileName}`; + } else { + actorJson.output = outputSchemaRef; + } + } + + await writeFile(join(actorDir, 'actor.json'), JSON.stringify(actorJson, null, '\t')); +} describe('apify validate-schema', () => { - it('should correctly validate schema 1', async () => { - await testRunCommand(ValidateInputSchemaCommand, { - args_path: validInputSchemaPath, + describe('with path argument (backward compat)', () => { + it('should correctly validate schema 1', async () => { + await testRunCommand(ValidateSchemaCommand, { + args_path: validInputSchemaPath, + }); + + expect(lastErrorMessage()).toMatch(/is valid/); }); - expect(lastErrorMessage()).toMatch(/is valid/); - }); + it('should correctly validate schema 2', async () => { + await testRunCommand(ValidateSchemaCommand, { + args_path: invalidInputSchemaPath, + }); - it('should correctly validate schema 2', async () => { - await testRunCommand(ValidateInputSchemaCommand, { - args_path: invalidInputSchemaPath, + expect(lastErrorMessage()).to.contain( + 'Field schema.properties.queries.editor must be equal to one of the allowed values', + ); }); - expect(lastErrorMessage()).to.contain( - 'Field schema.properties.queries.editor must be equal to one of the allowed values', - ); + it('should correctly validate schema 3', async () => { + await testRunCommand(ValidateSchemaCommand, { + args_path: unparsableInputSchemaPath, + }); + + expect(lastErrorMessage()).to.contain.oneOf([ + 'Unexpected token }', + "Expected ',' or ']' after array element", + ]); + }); }); - it('should correctly validate schema 3', async () => { - await testRunCommand(ValidateInputSchemaCommand, { - args_path: unparsableInputSchemaPath, + describe('without path argument (all schemas)', () => { + const { joinPath, beforeAllCalls, afterAllCalls } = useTempPath('validate-schema', { + create: true, + remove: true, + cwd: true, + cwdParent: false, + }); + + beforeEach(async () => { + await beforeAllCalls(); + }); + + afterEach(async () => { + await afterAllCalls(); + }); + + it('should validate all schemas when no path is provided', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: validDatasetSchemaPath, + outputSchemaRef: validOutputSchemaPath, + kvsSchemaRef: validKvsSchemaPath, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Dataset schema is valid'); + expect(allMessages).toContain('Output schema is valid'); + expect(allMessages).toContain('Key-Value Store schema is valid'); }); - expect(lastErrorMessage()).to.contain.oneOf(['Unexpected token }', "Expected ',' or ']' after array element"]); + it('should skip schemas not defined in actor.json', async () => { + await setupActorConfig(joinPath(), {}); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).not.toContain('Dataset'); + expect(allMessages).not.toContain('Output'); + expect(allMessages).not.toContain('Key-Value Store'); + }); + + it('should report error for invalid dataset schema', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: { + // missing actorSpecification — invalid + fields: {}, + views: {}, + }, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Dataset schema is not valid'); + }); + + it('should report error for invalid output schema', async () => { + await setupActorConfig(joinPath(), { + outputSchemaRef: { + // missing actorOutputSchemaVersion — invalid + properties: {}, + }, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Output schema is not valid'); + }); + + it('should report error for invalid KVS schema', async () => { + await setupActorConfig(joinPath(), { + kvsSchemaRef: { + // missing actorKeyValueStoreSchemaVersion — invalid + collections: {}, + }, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Key-Value Store schema is not valid'); + }); + + it('should only validate input schema when path arg is provided', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: validDatasetSchemaPath, + outputSchemaRef: validOutputSchemaPath, + kvsSchemaRef: validKvsSchemaPath, + }); + + await testRunCommand(ValidateSchemaCommand, { + args_path: validInputSchemaPath, + }); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).not.toContain('Dataset'); + expect(allMessages).not.toContain('Output'); + expect(allMessages).not.toContain('Key-Value Store'); + }); + + it('should continue validating remaining schemas when one fails', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: { + // invalid dataset schema + fields: {}, + views: {}, + }, + outputSchemaRef: validOutputSchemaPath, + kvsSchemaRef: validKvsSchemaPath, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Dataset schema is not valid'); + expect(allMessages).toContain('Output schema is valid'); + expect(allMessages).toContain('Key-Value Store schema is valid'); + }); }); }); diff --git a/test/local/lib/command-framework.test.ts b/test/local/lib/command-framework.test.ts index c254e6de7..b319330b8 100644 --- a/test/local/lib/command-framework.test.ts +++ b/test/local/lib/command-framework.test.ts @@ -1,10 +1,10 @@ -import { ValidateInputSchemaCommand } from '../../../src/commands/validate-schema.js'; +import { ValidateSchemaCommand } from '../../../src/commands/validate-schema.js'; import { testRunCommand } from '../../../src/lib/command-framework/apify-command.js'; import { validInputSchemaPath } from '../../__setup__/input-schemas/paths.js'; describe('Command Framework', () => { test('testRunCommand helper works', async () => { - await testRunCommand(ValidateInputSchemaCommand, { + await testRunCommand(ValidateSchemaCommand, { args_path: validInputSchemaPath, }); }); diff --git a/yarn.lock b/yarn.lock index 8c5652016..751ba6213 100644 --- a/yarn.lock +++ b/yarn.lock @@ -96,6 +96,16 @@ __metadata: languageName: node linkType: hard +"@apify/json_schemas@npm:^0.13.0": + version: 0.13.0 + resolution: "@apify/json_schemas@npm:0.13.0" + dependencies: + "@apify/consts": "npm:^2.51.0" + ajv: "npm:^8.17.1" + checksum: 10c0/2612be7a73802b810c0bf82fc45ee8a5e3488b94303a66b66af1b13b8dc0b8aa8b4ca88e63fb7340fd9e82f6fa27f3b0d2ab66d51578b4a351ba47f6358887a4 + languageName: node + linkType: hard + "@apify/json_schemas@npm:^0.14.2": version: 0.14.2 resolution: "@apify/json_schemas@npm:0.14.2" @@ -2398,6 +2408,7 @@ __metadata: "@apify/consts": "npm:^2.36.0" "@apify/eslint-config": "npm:^1.0.0" "@apify/input_schema": "npm:^3.17.0" + "@apify/json_schemas": "npm:^0.13.0" "@apify/tsconfig": "npm:^0.1.1" "@apify/utilities": "npm:^2.18.0" "@biomejs/biome": "npm:^2.0.0"