diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx index 9e68974089..9da5a57508 100644 --- a/apps/docs/components/icons.tsx +++ b/apps/docs/components/icons.tsx @@ -4477,6 +4477,17 @@ export function SSHIcon(props: SVGProps) { ) } +export function DatabricksIcon(props: SVGProps) { + return ( + + + + ) +} + export function DatadogIcon(props: SVGProps) { return ( diff --git a/apps/docs/components/ui/icon-mapping.ts b/apps/docs/components/ui/icon-mapping.ts index 822ce48aeb..a1af3619d0 100644 --- a/apps/docs/components/ui/icon-mapping.ts +++ b/apps/docs/components/ui/icon-mapping.ts @@ -24,6 +24,7 @@ import { CloudflareIcon, ConfluenceIcon, CursorIcon, + DatabricksIcon, DatadogIcon, DevinIcon, DiscordIcon, @@ -174,6 +175,7 @@ export const blockTypeToIconMap: Record = { cloudflare: CloudflareIcon, confluence_v2: ConfluenceIcon, cursor_v2: CursorIcon, + databricks: DatabricksIcon, datadog: DatadogIcon, devin: DevinIcon, discord: DiscordIcon, diff --git a/apps/docs/content/docs/en/tools/databricks.mdx b/apps/docs/content/docs/en/tools/databricks.mdx new file mode 100644 index 0000000000..3981776de6 --- /dev/null +++ b/apps/docs/content/docs/en/tools/databricks.mdx @@ -0,0 +1,252 @@ +--- +title: Databricks +description: Run SQL queries and manage jobs on Databricks +--- + +import { BlockInfoCard } from "@/components/ui/block-info-card" + + + +## Usage Instructions + +Connect to Databricks to execute SQL queries against SQL warehouses, trigger and monitor job runs, manage clusters, and retrieve run outputs. Requires a Personal Access Token and workspace host URL. + + + +## Tools + +### `databricks_execute_sql` + +Execute a SQL statement against a Databricks SQL warehouse and return results inline. Supports parameterized queries and Unity Catalog. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | +| `warehouseId` | string | Yes | The ID of the SQL warehouse to execute against | +| `statement` | string | Yes | The SQL statement to execute \(max 16 MiB\) | +| `catalog` | string | No | Unity Catalog name \(equivalent to USE CATALOG\) | +| `schema` | string | No | Schema name \(equivalent to USE SCHEMA\) | +| `rowLimit` | number | No | Maximum number of rows to return | +| `waitTimeout` | string | No | How long to wait for results \(e.g., "50s"\). Range: "0s" or "5s" to "50s". Default: "50s" | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `statementId` | string | Unique identifier for the executed statement | +| `status` | string | Execution status \(SUCCEEDED, PENDING, RUNNING, FAILED, CANCELED, CLOSED\) | +| `columns` | array | Column schema of the result set | +| ↳ `name` | string | Column name | +| ↳ `position` | number | Column position \(0-based\) | +| ↳ `typeName` | string | Column type \(STRING, INT, LONG, DOUBLE, BOOLEAN, TIMESTAMP, DATE, DECIMAL, etc.\) | +| `data` | array | Result rows as a 2D array of strings where each inner array is a row of column values | +| `totalRows` | number | Total number of rows in the result | +| `truncated` | boolean | Whether the result set was truncated due to row_limit or byte_limit | + +### `databricks_list_jobs` + +List all jobs in a Databricks workspace with optional filtering by name. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | +| `limit` | number | No | Maximum number of jobs to return \(range 1-100, default 20\) | +| `offset` | number | No | Offset for pagination | +| `name` | string | No | Filter jobs by exact name \(case-insensitive\) | +| `expandTasks` | boolean | No | Include task and cluster details in the response \(max 100 elements\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `jobs` | array | List of jobs in the workspace | +| ↳ `jobId` | number | Unique job identifier | +| ↳ `name` | string | Job name | +| ↳ `createdTime` | number | Job creation timestamp \(epoch ms\) | +| ↳ `creatorUserName` | string | Email of the job creator | +| ↳ `maxConcurrentRuns` | number | Maximum number of concurrent runs | +| ↳ `format` | string | Job format \(SINGLE_TASK or MULTI_TASK\) | +| `hasMore` | boolean | Whether more jobs are available for pagination | +| `nextPageToken` | string | Token for fetching the next page of results | + +### `databricks_run_job` + +Trigger an existing Databricks job to run immediately with optional job-level or notebook parameters. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | +| `jobId` | number | Yes | The ID of the job to trigger | +| `jobParameters` | string | No | Job-level parameter overrides as a JSON object \(e.g., \{"key": "value"\}\) | +| `notebookParams` | string | No | Notebook task parameters as a JSON object \(e.g., \{"param1": "value1"\}\) | +| `idempotencyToken` | string | No | Idempotency token to prevent duplicate runs \(max 64 characters\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `runId` | number | The globally unique ID of the triggered run | +| `numberInJob` | number | The sequence number of this run among all runs of the job | + +### `databricks_get_run` + +Get the status, timing, and details of a Databricks job run by its run ID. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | +| `runId` | number | Yes | The canonical identifier of the run | +| `includeHistory` | boolean | No | Include repair history in the response | +| `includeResolvedValues` | boolean | No | Include resolved parameter values in the response | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `runId` | number | The run ID | +| `jobId` | number | The job ID this run belongs to | +| `runName` | string | Name of the run | +| `runType` | string | Type of run \(JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN\) | +| `attemptNumber` | number | Retry attempt number \(0 for initial attempt\) | +| `state` | object | Run state information | +| ↳ `lifeCycleState` | string | Lifecycle state \(QUEUED, PENDING, RUNNING, TERMINATING, TERMINATED, SKIPPED, INTERNAL_ERROR, BLOCKED, WAITING_FOR_RETRY\) | +| ↳ `resultState` | string | Result state \(SUCCESS, FAILED, TIMEDOUT, CANCELED, SUCCESS_WITH_FAILURES, UPSTREAM_FAILED, UPSTREAM_CANCELED, EXCLUDED\) | +| ↳ `stateMessage` | string | Descriptive message for the current state | +| ↳ `userCancelledOrTimedout` | boolean | Whether the run was cancelled by user or timed out | +| `startTime` | number | Run start timestamp \(epoch ms\) | +| `endTime` | number | Run end timestamp \(epoch ms, 0 if still running\) | +| `setupDuration` | number | Cluster setup duration \(ms\) | +| `executionDuration` | number | Execution duration \(ms\) | +| `cleanupDuration` | number | Cleanup duration \(ms\) | +| `queueDuration` | number | Time spent in queue before execution \(ms\) | +| `runPageUrl` | string | URL to the run detail page in Databricks UI | +| `creatorUserName` | string | Email of the user who triggered the run | + +### `databricks_list_runs` + +List job runs in a Databricks workspace with optional filtering by job, status, and time range. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | +| `jobId` | number | No | Filter runs by job ID. Omit to list runs across all jobs | +| `activeOnly` | boolean | No | Only include active runs \(PENDING, RUNNING, or TERMINATING\) | +| `completedOnly` | boolean | No | Only include completed runs | +| `limit` | number | No | Maximum number of runs to return \(range 1-24, default 20\) | +| `offset` | number | No | Offset for pagination | +| `runType` | string | No | Filter by run type \(JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN\) | +| `startTimeFrom` | number | No | Filter runs started at or after this timestamp \(epoch ms\) | +| `startTimeTo` | number | No | Filter runs started at or before this timestamp \(epoch ms\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `runs` | array | List of job runs | +| ↳ `runId` | number | Unique run identifier | +| ↳ `jobId` | number | Job this run belongs to | +| ↳ `runName` | string | Run name | +| ↳ `runType` | string | Run type \(JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN\) | +| ↳ `state` | object | Run state information | +| ↳ `lifeCycleState` | string | Lifecycle state \(QUEUED, PENDING, RUNNING, TERMINATING, TERMINATED, SKIPPED, INTERNAL_ERROR, BLOCKED, WAITING_FOR_RETRY\) | +| ↳ `resultState` | string | Result state \(SUCCESS, FAILED, TIMEDOUT, CANCELED, SUCCESS_WITH_FAILURES, UPSTREAM_FAILED, UPSTREAM_CANCELED, EXCLUDED\) | +| ↳ `stateMessage` | string | Descriptive state message | +| ↳ `userCancelledOrTimedout` | boolean | Whether the run was cancelled by user or timed out | +| ↳ `startTime` | number | Run start timestamp \(epoch ms\) | +| ↳ `endTime` | number | Run end timestamp \(epoch ms\) | +| `hasMore` | boolean | Whether more runs are available for pagination | +| `nextPageToken` | string | Token for fetching the next page of results | + +### `databricks_cancel_run` + +Cancel a running or pending Databricks job run. Cancellation is asynchronous; poll the run status to confirm termination. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | +| `runId` | number | Yes | The canonical identifier of the run to cancel | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `success` | boolean | Whether the cancel request was accepted | + +### `databricks_get_run_output` + +Get the output of a completed Databricks job run, including notebook results, error messages, and logs. For multi-task jobs, use the task run ID (not the parent run ID). + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | +| `runId` | number | Yes | The run ID to get output for. For multi-task jobs, use the task run ID | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `notebookOutput` | object | Notebook task output \(from dbutils.notebook.exit\(\)\) | +| ↳ `result` | string | Value passed to dbutils.notebook.exit\(\) \(max 5 MB\) | +| ↳ `truncated` | boolean | Whether the result was truncated | +| `error` | string | Error message if the run failed or output is unavailable | +| `errorTrace` | string | Error stack trace if available | +| `logs` | string | Log output \(last 5 MB\) from spark_jar, spark_python, or python_wheel tasks | +| `logsTruncated` | boolean | Whether the log output was truncated | + +### `databricks_list_clusters` + +List all clusters in a Databricks workspace including their state, configuration, and resource details. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) | +| `apiKey` | string | Yes | Databricks Personal Access Token | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `clusters` | array | List of clusters in the workspace | +| ↳ `clusterId` | string | Unique cluster identifier | +| ↳ `clusterName` | string | Cluster display name | +| ↳ `state` | string | Current state \(PENDING, RUNNING, RESTARTING, RESIZING, TERMINATING, TERMINATED, ERROR, UNKNOWN\) | +| ↳ `stateMessage` | string | Human-readable state description | +| ↳ `creatorUserName` | string | Email of the cluster creator | +| ↳ `sparkVersion` | string | Spark runtime version \(e.g., 13.3.x-scala2.12\) | +| ↳ `nodeTypeId` | string | Worker node type identifier | +| ↳ `driverNodeTypeId` | string | Driver node type identifier | +| ↳ `numWorkers` | number | Number of worker nodes \(for fixed-size clusters\) | +| ↳ `autoscale` | object | Autoscaling configuration \(null for fixed-size clusters\) | +| ↳ `minWorkers` | number | Minimum number of workers | +| ↳ `maxWorkers` | number | Maximum number of workers | +| ↳ `clusterSource` | string | Origin \(API, UI, JOB, MODELS, PIPELINE, PIPELINE_MAINTENANCE, SQL\) | +| ↳ `autoterminationMinutes` | number | Minutes of inactivity before auto-termination \(0 = disabled\) | +| ↳ `startTime` | number | Cluster start timestamp \(epoch ms\) | + + diff --git a/apps/docs/content/docs/en/tools/meta.json b/apps/docs/content/docs/en/tools/meta.json index a089247e2e..ec0851ee59 100644 --- a/apps/docs/content/docs/en/tools/meta.json +++ b/apps/docs/content/docs/en/tools/meta.json @@ -20,6 +20,7 @@ "cloudflare", "confluence", "cursor", + "databricks", "datadog", "devin", "discord", diff --git a/apps/sim/blocks/blocks/databricks.ts b/apps/sim/blocks/blocks/databricks.ts new file mode 100644 index 0000000000..8432164de6 --- /dev/null +++ b/apps/sim/blocks/blocks/databricks.ts @@ -0,0 +1,418 @@ +import { DatabricksIcon } from '@/components/icons' +import type { BlockConfig } from '@/blocks/types' +import { AuthMode } from '@/blocks/types' +import type { DatabricksResponse } from '@/tools/databricks/types' + +export const DatabricksBlock: BlockConfig = { + type: 'databricks', + name: 'Databricks', + description: 'Run SQL queries and manage jobs on Databricks', + authMode: AuthMode.ApiKey, + longDescription: + 'Connect to Databricks to execute SQL queries against SQL warehouses, trigger and monitor job runs, manage clusters, and retrieve run outputs. Requires a Personal Access Token and workspace host URL.', + docsLink: 'https://docs.sim.ai/tools/databricks', + category: 'tools', + bgColor: '#FF3621', + icon: DatabricksIcon, + subBlocks: [ + { + id: 'operation', + title: 'Operation', + type: 'dropdown', + options: [ + { label: 'Execute SQL', id: 'execute_sql' }, + { label: 'List Jobs', id: 'list_jobs' }, + { label: 'Run Job', id: 'run_job' }, + { label: 'Get Run', id: 'get_run' }, + { label: 'List Runs', id: 'list_runs' }, + { label: 'Cancel Run', id: 'cancel_run' }, + { label: 'Get Run Output', id: 'get_run_output' }, + { label: 'List Clusters', id: 'list_clusters' }, + ], + value: () => 'execute_sql', + }, + + // ── Execute SQL ── + { + id: 'warehouseId', + title: 'Warehouse ID', + type: 'short-input', + placeholder: 'Enter SQL warehouse ID', + condition: { field: 'operation', value: 'execute_sql' }, + required: { field: 'operation', value: 'execute_sql' }, + }, + { + id: 'statement', + title: 'SQL Statement', + type: 'code', + placeholder: 'SELECT * FROM my_table LIMIT 10', + condition: { field: 'operation', value: 'execute_sql' }, + required: { field: 'operation', value: 'execute_sql' }, + }, + { + id: 'catalog', + title: 'Catalog', + type: 'short-input', + placeholder: 'Unity Catalog name', + condition: { field: 'operation', value: 'execute_sql' }, + mode: 'advanced', + }, + { + id: 'schema', + title: 'Schema', + type: 'short-input', + placeholder: 'Schema name', + condition: { field: 'operation', value: 'execute_sql' }, + mode: 'advanced', + }, + { + id: 'rowLimit', + title: 'Row Limit', + type: 'short-input', + placeholder: 'Max rows to return', + condition: { field: 'operation', value: 'execute_sql' }, + mode: 'advanced', + }, + { + id: 'waitTimeout', + title: 'Wait Timeout', + type: 'short-input', + placeholder: '50s', + condition: { field: 'operation', value: 'execute_sql' }, + mode: 'advanced', + }, + + // ── List Jobs ── + { + id: 'name', + title: 'Job Name Filter', + type: 'short-input', + placeholder: 'Exact name filter (case-insensitive)', + condition: { field: 'operation', value: 'list_jobs' }, + }, + { + id: 'expandTasks', + title: 'Expand Tasks', + type: 'dropdown', + options: [ + { label: 'No', id: 'false' }, + { label: 'Yes', id: 'true' }, + ], + value: () => 'false', + condition: { field: 'operation', value: 'list_jobs' }, + mode: 'advanced', + }, + { + id: 'limit', + title: 'Limit', + type: 'short-input', + placeholder: '20', + condition: { field: 'operation', value: ['list_jobs', 'list_runs'] }, + mode: 'advanced', + }, + { + id: 'offset', + title: 'Offset', + type: 'short-input', + placeholder: '0', + condition: { field: 'operation', value: ['list_jobs', 'list_runs'] }, + mode: 'advanced', + }, + + // ── Run Job ── + { + id: 'jobId', + title: 'Job ID', + type: 'short-input', + placeholder: 'Enter the job ID', + condition: { field: 'operation', value: ['run_job', 'list_runs'] }, + required: { field: 'operation', value: 'run_job' }, + }, + { + id: 'jobParameters', + title: 'Job Parameters', + type: 'code', + placeholder: '{"key": "value"}', + condition: { field: 'operation', value: 'run_job' }, + mode: 'advanced', + wandConfig: { + enabled: true, + prompt: `Generate a JSON object of job parameters based on the user's description. + +Examples: +- "set date to yesterday" -> {"date": "2024-01-14"} +- "process the sales data for Q4" -> {"quarter": "Q4", "dataset": "sales"} +- "run with debug mode enabled" -> {"debug": "true"} + +Return ONLY a valid JSON object - no explanations, no extra text.`, + placeholder: 'Describe the job parameters (e.g., "set date to yesterday")...', + }, + }, + { + id: 'notebookParams', + title: 'Notebook Parameters', + type: 'code', + placeholder: '{"param1": "value1"}', + condition: { field: 'operation', value: 'run_job' }, + mode: 'advanced', + wandConfig: { + enabled: true, + prompt: `Generate a JSON object of notebook parameters based on the user's description. + +Examples: +- "input path is /data/raw and output path is /data/processed" -> {"input_path": "/data/raw", "output_path": "/data/processed"} +- "batch size 1000, dry run" -> {"batch_size": "1000", "dry_run": "true"} + +Return ONLY a valid JSON object - no explanations, no extra text.`, + placeholder: 'Describe the notebook parameters...', + }, + }, + { + id: 'idempotencyToken', + title: 'Idempotency Token', + type: 'short-input', + placeholder: 'Unique token to prevent duplicate runs (max 64 chars)', + condition: { field: 'operation', value: 'run_job' }, + mode: 'advanced', + }, + + // ── Get Run ── + { + id: 'runId', + title: 'Run ID', + type: 'short-input', + placeholder: 'Enter the run ID', + condition: { field: 'operation', value: ['get_run', 'cancel_run', 'get_run_output'] }, + required: { field: 'operation', value: ['get_run', 'cancel_run', 'get_run_output'] }, + }, + { + id: 'includeHistory', + title: 'Include History', + type: 'dropdown', + options: [ + { label: 'No', id: 'false' }, + { label: 'Yes', id: 'true' }, + ], + value: () => 'false', + condition: { field: 'operation', value: 'get_run' }, + mode: 'advanced', + }, + { + id: 'includeResolvedValues', + title: 'Include Resolved Values', + type: 'dropdown', + options: [ + { label: 'No', id: 'false' }, + { label: 'Yes', id: 'true' }, + ], + value: () => 'false', + condition: { field: 'operation', value: 'get_run' }, + mode: 'advanced', + }, + + // ── List Runs ── + { + id: 'activeOnly', + title: 'Active Only', + type: 'dropdown', + options: [ + { label: 'No', id: 'false' }, + { label: 'Yes', id: 'true' }, + ], + value: () => 'false', + condition: { field: 'operation', value: 'list_runs' }, + mode: 'advanced', + }, + { + id: 'completedOnly', + title: 'Completed Only', + type: 'dropdown', + options: [ + { label: 'No', id: 'false' }, + { label: 'Yes', id: 'true' }, + ], + value: () => 'false', + condition: { field: 'operation', value: 'list_runs' }, + mode: 'advanced', + }, + { + id: 'runType', + title: 'Run Type', + type: 'dropdown', + options: [ + { label: 'All', id: '' }, + { label: 'Job Run', id: 'JOB_RUN' }, + { label: 'Workflow Run', id: 'WORKFLOW_RUN' }, + { label: 'Submit Run', id: 'SUBMIT_RUN' }, + ], + value: () => '', + condition: { field: 'operation', value: 'list_runs' }, + mode: 'advanced', + }, + { + id: 'startTimeFrom', + title: 'Start Time From', + type: 'short-input', + placeholder: 'Epoch ms (e.g., 1700000000000)', + condition: { field: 'operation', value: 'list_runs' }, + mode: 'advanced', + wandConfig: { + enabled: true, + prompt: `Convert the user's date/time description to an epoch timestamp in milliseconds. + +Examples: +- "yesterday" -> epoch ms for yesterday at 00:00 UTC +- "last week" -> epoch ms for 7 days ago at 00:00 UTC +- "2024-01-15" -> epoch ms for 2024-01-15T00:00:00Z +- "start of this month" -> epoch ms for 1st day of current month + +Return ONLY the numeric timestamp in milliseconds - no explanations, no extra text.`, + placeholder: 'Describe the start time (e.g., "yesterday", "last week")...', + generationType: 'timestamp', + }, + }, + { + id: 'startTimeTo', + title: 'Start Time To', + type: 'short-input', + placeholder: 'Epoch ms (e.g., 1700100000000)', + condition: { field: 'operation', value: 'list_runs' }, + mode: 'advanced', + wandConfig: { + enabled: true, + prompt: `Convert the user's date/time description to an epoch timestamp in milliseconds. + +Examples: +- "now" -> current epoch ms +- "today" -> epoch ms for today at 23:59:59 UTC +- "end of last week" -> epoch ms for last Sunday at 23:59:59 UTC +- "2024-01-15" -> epoch ms for 2024-01-15T23:59:59Z + +Return ONLY the numeric timestamp in milliseconds - no explanations, no extra text.`, + placeholder: 'Describe the end time (e.g., "now", "end of last week")...', + generationType: 'timestamp', + }, + }, + + // ── Credentials (common to all operations) ── + { + id: 'host', + title: 'Workspace Host', + type: 'short-input', + placeholder: 'dbc-abc123.cloud.databricks.com', + required: true, + }, + { + id: 'apiKey', + title: 'Access Token', + type: 'short-input', + placeholder: 'Enter your Databricks Personal Access Token', + password: true, + required: true, + }, + ], + tools: { + access: [ + 'databricks_execute_sql', + 'databricks_list_jobs', + 'databricks_run_job', + 'databricks_get_run', + 'databricks_list_runs', + 'databricks_cancel_run', + 'databricks_get_run_output', + 'databricks_list_clusters', + ], + config: { + tool: (params) => `databricks_${params.operation}`, + params: (params) => { + const result: Record = {} + if (params.jobId) result.jobId = Number(params.jobId) + if (params.runId) result.runId = Number(params.runId) + if (params.rowLimit) result.rowLimit = Number(params.rowLimit) + if (params.limit) result.limit = Number(params.limit) + if (params.offset) result.offset = Number(params.offset) + if (params.startTimeFrom) result.startTimeFrom = Number(params.startTimeFrom) + if (params.startTimeTo) result.startTimeTo = Number(params.startTimeTo) + result.includeHistory = params.includeHistory === 'true' + result.includeResolvedValues = params.includeResolvedValues === 'true' + result.activeOnly = params.activeOnly === 'true' + result.completedOnly = params.completedOnly === 'true' + result.expandTasks = params.expandTasks === 'true' + if (params.runType === '') result.runType = undefined + return result + }, + }, + }, + inputs: { + operation: { type: 'string', description: 'Operation to perform' }, + host: { type: 'string', description: 'Databricks workspace host URL' }, + apiKey: { type: 'string', description: 'Databricks Personal Access Token' }, + warehouseId: { type: 'string', description: 'SQL warehouse ID' }, + statement: { type: 'string', description: 'SQL statement to execute' }, + catalog: { type: 'string', description: 'Unity Catalog name' }, + schema: { type: 'string', description: 'Schema name' }, + rowLimit: { type: 'number', description: 'Maximum rows to return' }, + waitTimeout: { type: 'string', description: 'Wait timeout (e.g., "50s")' }, + jobId: { type: 'number', description: 'Job ID' }, + jobParameters: { type: 'string', description: 'Job-level parameters as JSON' }, + notebookParams: { type: 'string', description: 'Notebook task parameters as JSON' }, + idempotencyToken: { type: 'string', description: 'Idempotency token for duplicate prevention' }, + runId: { type: 'number', description: 'Run ID' }, + includeHistory: { type: 'boolean', description: 'Include repair history' }, + includeResolvedValues: { type: 'boolean', description: 'Include resolved parameter values' }, + name: { type: 'string', description: 'Job name filter' }, + limit: { type: 'number', description: 'Maximum results to return' }, + offset: { type: 'number', description: 'Pagination offset' }, + expandTasks: { type: 'boolean', description: 'Include task and cluster details' }, + activeOnly: { type: 'boolean', description: 'Only active runs' }, + completedOnly: { type: 'boolean', description: 'Only completed runs' }, + runType: { type: 'string', description: 'Filter by run type' }, + startTimeFrom: { type: 'number', description: 'Filter runs started after (epoch ms)' }, + startTimeTo: { type: 'number', description: 'Filter runs started before (epoch ms)' }, + }, + outputs: { + // Execute SQL + statementId: { type: 'string', description: 'Statement ID' }, + status: { type: 'string', description: 'Execution status' }, + columns: { type: 'json', description: 'Result column schema' }, + data: { type: 'json', description: 'Result rows as 2D array' }, + totalRows: { type: 'number', description: 'Total row count' }, + truncated: { type: 'boolean', description: 'Whether results were truncated' }, + // List Jobs + jobs: { type: 'json', description: 'List of jobs' }, + hasMore: { type: 'boolean', description: 'Whether more results are available' }, + nextPageToken: { type: 'string', description: 'Pagination token for next page' }, + // Run Job + runId: { type: 'number', description: 'Triggered run ID' }, + numberInJob: { type: 'number', description: 'Run sequence number in job' }, + // Get Run + jobId: { type: 'number', description: 'Job ID the run belongs to' }, + runName: { type: 'string', description: 'Run name' }, + runType: { type: 'string', description: 'Run type (JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN)' }, + attemptNumber: { type: 'number', description: 'Retry attempt number' }, + state: { + type: 'json', + description: 'Run state with lifeCycleState, resultState, stateMessage', + }, + startTime: { type: 'number', description: 'Run start time (epoch ms)' }, + endTime: { type: 'number', description: 'Run end time (epoch ms)' }, + setupDuration: { type: 'number', description: 'Cluster setup duration (ms)' }, + executionDuration: { type: 'number', description: 'Execution duration (ms)' }, + cleanupDuration: { type: 'number', description: 'Cleanup duration (ms)' }, + queueDuration: { type: 'number', description: 'Time spent in queue (ms)' }, + runPageUrl: { type: 'string', description: 'URL to run detail page' }, + creatorUserName: { type: 'string', description: 'Run creator email' }, + // List Runs + runs: { type: 'json', description: 'List of job runs' }, + // Cancel Run + success: { type: 'boolean', description: 'Whether the cancel request was accepted' }, + // Get Run Output + notebookOutput: { type: 'json', description: 'Notebook task output' }, + error: { type: 'string', description: 'Error message if run failed' }, + errorTrace: { type: 'string', description: 'Error stack trace' }, + logs: { type: 'string', description: 'Run log output' }, + logsTruncated: { type: 'boolean', description: 'Whether logs were truncated' }, + // List Clusters + clusters: { type: 'json', description: 'List of clusters' }, + }, +} diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index eff25ffb1d..c203af4f66 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -22,6 +22,7 @@ import { CloudflareBlock } from '@/blocks/blocks/cloudflare' import { ConditionBlock } from '@/blocks/blocks/condition' import { ConfluenceBlock, ConfluenceV2Block } from '@/blocks/blocks/confluence' import { CursorBlock, CursorV2Block } from '@/blocks/blocks/cursor' +import { DatabricksBlock } from '@/blocks/blocks/databricks' import { DatadogBlock } from '@/blocks/blocks/datadog' import { DevinBlock } from '@/blocks/blocks/devin' import { DiscordBlock } from '@/blocks/blocks/discord' @@ -206,6 +207,7 @@ export const registry: Record = { confluence_v2: ConfluenceV2Block, cursor: CursorBlock, cursor_v2: CursorV2Block, + databricks: DatabricksBlock, datadog: DatadogBlock, devin: DevinBlock, discord: DiscordBlock, diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index 9e68974089..9da5a57508 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -4477,6 +4477,17 @@ export function SSHIcon(props: SVGProps) { ) } +export function DatabricksIcon(props: SVGProps) { + return ( + + + + ) +} + export function DatadogIcon(props: SVGProps) { return ( diff --git a/apps/sim/tools/databricks/cancel_run.ts b/apps/sim/tools/databricks/cancel_run.ts new file mode 100644 index 0000000000..982ebc3dbd --- /dev/null +++ b/apps/sim/tools/databricks/cancel_run.ts @@ -0,0 +1,70 @@ +import type { + DatabricksCancelRunParams, + DatabricksCancelRunResponse, +} from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const cancelRunTool: ToolConfig = { + id: 'databricks_cancel_run', + name: 'Databricks Cancel Run', + description: + 'Cancel a running or pending Databricks job run. Cancellation is asynchronous; poll the run status to confirm termination.', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + runId: { + type: 'number', + required: true, + visibility: 'user-or-llm', + description: 'The canonical identifier of the run to cancel', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + return `https://${host}/api/2.1/jobs/runs/cancel` + }, + method: 'POST', + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => ({ + run_id: params.runId, + }), + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + const data = await response.json() + throw new Error(data.message || data.error?.message || 'Failed to cancel run') + } + + return { + success: true, + output: { + success: true, + }, + } + }, + + outputs: { + success: { + type: 'boolean', + description: 'Whether the cancel request was accepted', + }, + }, +} diff --git a/apps/sim/tools/databricks/execute_sql.ts b/apps/sim/tools/databricks/execute_sql.ts new file mode 100644 index 0000000000..72d8f72b65 --- /dev/null +++ b/apps/sim/tools/databricks/execute_sql.ts @@ -0,0 +1,175 @@ +import type { + DatabricksExecuteSqlParams, + DatabricksExecuteSqlResponse, +} from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const executeSqlTool: ToolConfig = + { + id: 'databricks_execute_sql', + name: 'Databricks Execute SQL', + description: + 'Execute a SQL statement against a Databricks SQL warehouse and return results inline. Supports parameterized queries and Unity Catalog.', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + warehouseId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The ID of the SQL warehouse to execute against', + }, + statement: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The SQL statement to execute (max 16 MiB)', + }, + catalog: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Unity Catalog name (equivalent to USE CATALOG)', + }, + schema: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Schema name (equivalent to USE SCHEMA)', + }, + rowLimit: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of rows to return', + }, + waitTimeout: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: + 'How long to wait for results (e.g., "50s"). Range: "0s" or "5s" to "50s". Default: "50s"', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + return `https://${host}/api/2.0/sql/statements/` + }, + method: 'POST', + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + const body: Record = { + warehouse_id: params.warehouseId, + statement: params.statement, + format: 'JSON_ARRAY', + disposition: 'INLINE', + wait_timeout: params.waitTimeout || '50s', + } + if (params.catalog) body.catalog = params.catalog + if (params.schema) body.schema = params.schema + if (params.rowLimit) body.row_limit = params.rowLimit + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || data.error?.message || 'Failed to execute SQL statement') + } + + const status = data.status?.state ?? 'UNKNOWN' + if (status === 'FAILED') { + throw new Error( + data.status?.error?.message || + `SQL statement execution failed: ${data.status?.error?.error_code ?? 'UNKNOWN'}` + ) + } + + const columns = + data.manifest?.schema?.columns?.map( + (col: { name: string; position: number; type_name: string }) => ({ + name: col.name ?? '', + position: col.position ?? 0, + typeName: col.type_name ?? '', + }) + ) ?? null + + return { + success: true, + output: { + statementId: data.statement_id ?? '', + status, + columns, + data: data.result?.data_array ?? null, + totalRows: data.manifest?.total_row_count ?? null, + truncated: data.manifest?.truncated ?? false, + }, + } + }, + + outputs: { + statementId: { + type: 'string', + description: 'Unique identifier for the executed statement', + }, + status: { + type: 'string', + description: 'Execution status (SUCCEEDED, PENDING, RUNNING, FAILED, CANCELED, CLOSED)', + }, + columns: { + type: 'array', + description: 'Column schema of the result set', + optional: true, + items: { + type: 'object', + properties: { + name: { type: 'string', description: 'Column name' }, + position: { type: 'number', description: 'Column position (0-based)' }, + typeName: { + type: 'string', + description: + 'Column type (STRING, INT, LONG, DOUBLE, BOOLEAN, TIMESTAMP, DATE, DECIMAL, etc.)', + }, + }, + }, + }, + data: { + type: 'array', + description: + 'Result rows as a 2D array of strings where each inner array is a row of column values', + optional: true, + items: { + type: 'array', + description: 'A single row of column values as strings', + }, + }, + totalRows: { + type: 'number', + description: 'Total number of rows in the result', + optional: true, + }, + truncated: { + type: 'boolean', + description: 'Whether the result set was truncated due to row_limit or byte_limit', + }, + }, + } diff --git a/apps/sim/tools/databricks/get_run.ts b/apps/sim/tools/databricks/get_run.ts new file mode 100644 index 0000000000..de45ba1c6a --- /dev/null +++ b/apps/sim/tools/databricks/get_run.ts @@ -0,0 +1,177 @@ +import type { DatabricksGetRunParams, DatabricksGetRunResponse } from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const getRunTool: ToolConfig = { + id: 'databricks_get_run', + name: 'Databricks Get Run', + description: 'Get the status, timing, and details of a Databricks job run by its run ID.', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + runId: { + type: 'number', + required: true, + visibility: 'user-or-llm', + description: 'The canonical identifier of the run', + }, + includeHistory: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Include repair history in the response', + }, + includeResolvedValues: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Include resolved parameter values in the response', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + const url = new URL(`https://${host}/api/2.1/jobs/runs/get`) + url.searchParams.set('run_id', String(params.runId)) + if (params.includeHistory) url.searchParams.set('include_history', 'true') + if (params.includeResolvedValues) url.searchParams.set('include_resolved_values', 'true') + return url.toString() + }, + method: 'GET', + headers: (params) => ({ + Accept: 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || data.error?.message || 'Failed to get run details') + } + + return { + success: true, + output: { + runId: data.run_id ?? 0, + jobId: data.job_id ?? 0, + runName: data.run_name ?? '', + runType: data.run_type ?? '', + attemptNumber: data.attempt_number ?? 0, + state: { + lifeCycleState: data.state?.life_cycle_state ?? 'UNKNOWN', + resultState: data.state?.result_state ?? null, + stateMessage: data.state?.state_message ?? '', + userCancelledOrTimedout: data.state?.user_cancelled_or_timedout ?? false, + }, + startTime: data.start_time ?? null, + endTime: data.end_time ?? null, + setupDuration: data.setup_duration ?? null, + executionDuration: data.execution_duration ?? null, + cleanupDuration: data.cleanup_duration ?? null, + queueDuration: data.queue_duration ?? null, + runPageUrl: data.run_page_url ?? '', + creatorUserName: data.creator_user_name ?? '', + }, + } + }, + + outputs: { + runId: { + type: 'number', + description: 'The run ID', + }, + jobId: { + type: 'number', + description: 'The job ID this run belongs to', + }, + runName: { + type: 'string', + description: 'Name of the run', + }, + runType: { + type: 'string', + description: 'Type of run (JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN)', + }, + attemptNumber: { + type: 'number', + description: 'Retry attempt number (0 for initial attempt)', + }, + state: { + type: 'object', + description: 'Run state information', + properties: { + lifeCycleState: { + type: 'string', + description: + 'Lifecycle state (QUEUED, PENDING, RUNNING, TERMINATING, TERMINATED, SKIPPED, INTERNAL_ERROR, BLOCKED, WAITING_FOR_RETRY)', + }, + resultState: { + type: 'string', + description: + 'Result state (SUCCESS, FAILED, TIMEDOUT, CANCELED, SUCCESS_WITH_FAILURES, UPSTREAM_FAILED, UPSTREAM_CANCELED, EXCLUDED)', + optional: true, + }, + stateMessage: { + type: 'string', + description: 'Descriptive message for the current state', + }, + userCancelledOrTimedout: { + type: 'boolean', + description: 'Whether the run was cancelled by user or timed out', + }, + }, + }, + startTime: { + type: 'number', + description: 'Run start timestamp (epoch ms)', + optional: true, + }, + endTime: { + type: 'number', + description: 'Run end timestamp (epoch ms, 0 if still running)', + optional: true, + }, + setupDuration: { + type: 'number', + description: 'Cluster setup duration (ms)', + optional: true, + }, + executionDuration: { + type: 'number', + description: 'Execution duration (ms)', + optional: true, + }, + cleanupDuration: { + type: 'number', + description: 'Cleanup duration (ms)', + optional: true, + }, + queueDuration: { + type: 'number', + description: 'Time spent in queue before execution (ms)', + optional: true, + }, + runPageUrl: { + type: 'string', + description: 'URL to the run detail page in Databricks UI', + }, + creatorUserName: { + type: 'string', + description: 'Email of the user who triggered the run', + }, + }, +} diff --git a/apps/sim/tools/databricks/get_run_output.ts b/apps/sim/tools/databricks/get_run_output.ts new file mode 100644 index 0000000000..79bb9f528d --- /dev/null +++ b/apps/sim/tools/databricks/get_run_output.ts @@ -0,0 +1,111 @@ +import type { + DatabricksGetRunOutputParams, + DatabricksGetRunOutputResponse, +} from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const getRunOutputTool: ToolConfig< + DatabricksGetRunOutputParams, + DatabricksGetRunOutputResponse +> = { + id: 'databricks_get_run_output', + name: 'Databricks Get Run Output', + description: + 'Get the output of a completed Databricks job run, including notebook results, error messages, and logs. For multi-task jobs, use the task run ID (not the parent run ID).', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + runId: { + type: 'number', + required: true, + visibility: 'user-or-llm', + description: 'The run ID to get output for. For multi-task jobs, use the task run ID', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + return `https://${host}/api/2.1/jobs/runs/get-output?run_id=${params.runId}` + }, + method: 'GET', + headers: (params) => ({ + Accept: 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || data.error?.message || 'Failed to get run output') + } + + return { + success: true, + output: { + notebookOutput: data.notebook_output + ? { + result: data.notebook_output.result ?? null, + truncated: data.notebook_output.truncated ?? false, + } + : null, + error: data.error ?? null, + errorTrace: data.error_trace ?? null, + logs: data.logs ?? null, + logsTruncated: data.logs_truncated ?? false, + }, + } + }, + + outputs: { + notebookOutput: { + type: 'object', + description: 'Notebook task output (from dbutils.notebook.exit())', + optional: true, + properties: { + result: { + type: 'string', + description: 'Value passed to dbutils.notebook.exit() (max 5 MB)', + optional: true, + }, + truncated: { + type: 'boolean', + description: 'Whether the result was truncated', + }, + }, + }, + error: { + type: 'string', + description: 'Error message if the run failed or output is unavailable', + optional: true, + }, + errorTrace: { + type: 'string', + description: 'Error stack trace if available', + optional: true, + }, + logs: { + type: 'string', + description: 'Log output (last 5 MB) from spark_jar, spark_python, or python_wheel tasks', + optional: true, + }, + logsTruncated: { + type: 'boolean', + description: 'Whether the log output was truncated', + }, + }, +} diff --git a/apps/sim/tools/databricks/index.ts b/apps/sim/tools/databricks/index.ts new file mode 100644 index 0000000000..48c290d455 --- /dev/null +++ b/apps/sim/tools/databricks/index.ts @@ -0,0 +1,17 @@ +import { cancelRunTool } from '@/tools/databricks/cancel_run' +import { executeSqlTool } from '@/tools/databricks/execute_sql' +import { getRunTool } from '@/tools/databricks/get_run' +import { getRunOutputTool } from '@/tools/databricks/get_run_output' +import { listClustersTool } from '@/tools/databricks/list_clusters' +import { listJobsTool } from '@/tools/databricks/list_jobs' +import { listRunsTool } from '@/tools/databricks/list_runs' +import { runJobTool } from '@/tools/databricks/run_job' + +export const databricksExecuteSqlTool = executeSqlTool +export const databricksListJobsTool = listJobsTool +export const databricksRunJobTool = runJobTool +export const databricksGetRunTool = getRunTool +export const databricksListRunsTool = listRunsTool +export const databricksCancelRunTool = cancelRunTool +export const databricksGetRunOutputTool = getRunOutputTool +export const databricksListClustersTool = listClustersTool diff --git a/apps/sim/tools/databricks/list_clusters.ts b/apps/sim/tools/databricks/list_clusters.ts new file mode 100644 index 0000000000..c06802a727 --- /dev/null +++ b/apps/sim/tools/databricks/list_clusters.ts @@ -0,0 +1,143 @@ +import type { DatabricksBaseParams, DatabricksListClustersResponse } from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const listClustersTool: ToolConfig = { + id: 'databricks_list_clusters', + name: 'Databricks List Clusters', + description: + 'List all clusters in a Databricks workspace including their state, configuration, and resource details.', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + return `https://${host}/api/2.0/clusters/list` + }, + method: 'GET', + headers: (params) => ({ + Accept: 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || data.error?.message || 'Failed to list clusters') + } + + const clusters = (data.clusters ?? []).map( + (cluster: { + cluster_id?: string + cluster_name?: string + state?: string + state_message?: string + creator_user_name?: string + spark_version?: string + node_type_id?: string + driver_node_type_id?: string + num_workers?: number + autoscale?: { min_workers?: number; max_workers?: number } + cluster_source?: string + autotermination_minutes?: number + start_time?: number + }) => ({ + clusterId: cluster.cluster_id ?? '', + clusterName: cluster.cluster_name ?? '', + state: cluster.state ?? 'UNKNOWN', + stateMessage: cluster.state_message ?? '', + creatorUserName: cluster.creator_user_name ?? '', + sparkVersion: cluster.spark_version ?? '', + nodeTypeId: cluster.node_type_id ?? '', + driverNodeTypeId: cluster.driver_node_type_id ?? '', + numWorkers: cluster.num_workers ?? null, + autoscale: cluster.autoscale + ? { + minWorkers: cluster.autoscale.min_workers ?? 0, + maxWorkers: cluster.autoscale.max_workers ?? 0, + } + : null, + clusterSource: cluster.cluster_source ?? '', + autoterminationMinutes: cluster.autotermination_minutes ?? 0, + startTime: cluster.start_time ?? null, + }) + ) + + return { + success: true, + output: { + clusters, + }, + } + }, + + outputs: { + clusters: { + type: 'array', + description: 'List of clusters in the workspace', + items: { + type: 'object', + properties: { + clusterId: { type: 'string', description: 'Unique cluster identifier' }, + clusterName: { type: 'string', description: 'Cluster display name' }, + state: { + type: 'string', + description: + 'Current state (PENDING, RUNNING, RESTARTING, RESIZING, TERMINATING, TERMINATED, ERROR, UNKNOWN)', + }, + stateMessage: { type: 'string', description: 'Human-readable state description' }, + creatorUserName: { type: 'string', description: 'Email of the cluster creator' }, + sparkVersion: { + type: 'string', + description: 'Spark runtime version (e.g., 13.3.x-scala2.12)', + }, + nodeTypeId: { type: 'string', description: 'Worker node type identifier' }, + driverNodeTypeId: { type: 'string', description: 'Driver node type identifier' }, + numWorkers: { + type: 'number', + description: 'Number of worker nodes (for fixed-size clusters)', + optional: true, + }, + autoscale: { + type: 'object', + description: 'Autoscaling configuration (null for fixed-size clusters)', + optional: true, + properties: { + minWorkers: { type: 'number', description: 'Minimum number of workers' }, + maxWorkers: { type: 'number', description: 'Maximum number of workers' }, + }, + }, + clusterSource: { + type: 'string', + description: 'Origin (API, UI, JOB, MODELS, PIPELINE, PIPELINE_MAINTENANCE, SQL)', + }, + autoterminationMinutes: { + type: 'number', + description: 'Minutes of inactivity before auto-termination (0 = disabled)', + }, + startTime: { + type: 'number', + description: 'Cluster start timestamp (epoch ms)', + optional: true, + }, + }, + }, + }, + }, +} diff --git a/apps/sim/tools/databricks/list_jobs.ts b/apps/sim/tools/databricks/list_jobs.ts new file mode 100644 index 0000000000..7d347770c4 --- /dev/null +++ b/apps/sim/tools/databricks/list_jobs.ts @@ -0,0 +1,125 @@ +import type { DatabricksListJobsParams, DatabricksListJobsResponse } from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const listJobsTool: ToolConfig = { + id: 'databricks_list_jobs', + name: 'Databricks List Jobs', + description: 'List all jobs in a Databricks workspace with optional filtering by name.', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + limit: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of jobs to return (range 1-100, default 20)', + }, + offset: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Offset for pagination', + }, + name: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Filter jobs by exact name (case-insensitive)', + }, + expandTasks: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Include task and cluster details in the response (max 100 elements)', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + const url = new URL(`https://${host}/api/2.1/jobs/list`) + if (params.limit) url.searchParams.set('limit', String(params.limit)) + if (params.offset) url.searchParams.set('offset', String(params.offset)) + if (params.name) url.searchParams.set('name', params.name) + if (params.expandTasks) url.searchParams.set('expand_tasks', 'true') + return url.toString() + }, + method: 'GET', + headers: (params) => ({ + Accept: 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || data.error?.message || 'Failed to list jobs') + } + + const jobs = (data.jobs ?? []).map( + (job: { + job_id?: number + settings?: { name?: string; max_concurrent_runs?: number; format?: string } + created_time?: number + creator_user_name?: string + }) => ({ + jobId: job.job_id ?? 0, + name: job.settings?.name ?? '', + createdTime: job.created_time ?? 0, + creatorUserName: job.creator_user_name ?? '', + maxConcurrentRuns: job.settings?.max_concurrent_runs ?? 1, + format: job.settings?.format ?? '', + }) + ) + + return { + success: true, + output: { + jobs, + hasMore: data.has_more ?? false, + nextPageToken: data.next_page_token ?? null, + }, + } + }, + + outputs: { + jobs: { + type: 'array', + description: 'List of jobs in the workspace', + items: { + type: 'object', + properties: { + jobId: { type: 'number', description: 'Unique job identifier' }, + name: { type: 'string', description: 'Job name' }, + createdTime: { type: 'number', description: 'Job creation timestamp (epoch ms)' }, + creatorUserName: { type: 'string', description: 'Email of the job creator' }, + maxConcurrentRuns: { type: 'number', description: 'Maximum number of concurrent runs' }, + format: { type: 'string', description: 'Job format (SINGLE_TASK or MULTI_TASK)' }, + }, + }, + }, + hasMore: { + type: 'boolean', + description: 'Whether more jobs are available for pagination', + }, + nextPageToken: { + type: 'string', + description: 'Token for fetching the next page of results', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/databricks/list_runs.ts b/apps/sim/tools/databricks/list_runs.ts new file mode 100644 index 0000000000..3e7b65bab6 --- /dev/null +++ b/apps/sim/tools/databricks/list_runs.ts @@ -0,0 +1,195 @@ +import type { DatabricksListRunsParams, DatabricksListRunsResponse } from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const listRunsTool: ToolConfig = { + id: 'databricks_list_runs', + name: 'Databricks List Runs', + description: + 'List job runs in a Databricks workspace with optional filtering by job, status, and time range.', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + jobId: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Filter runs by job ID. Omit to list runs across all jobs', + }, + activeOnly: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Only include active runs (PENDING, RUNNING, or TERMINATING)', + }, + completedOnly: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Only include completed runs', + }, + limit: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of runs to return (range 1-24, default 20)', + }, + offset: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Offset for pagination', + }, + runType: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Filter by run type (JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN)', + }, + startTimeFrom: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Filter runs started at or after this timestamp (epoch ms)', + }, + startTimeTo: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Filter runs started at or before this timestamp (epoch ms)', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + const url = new URL(`https://${host}/api/2.1/jobs/runs/list`) + if (params.jobId) url.searchParams.set('job_id', String(params.jobId)) + if (params.activeOnly) url.searchParams.set('active_only', 'true') + if (params.completedOnly) url.searchParams.set('completed_only', 'true') + if (params.limit) url.searchParams.set('limit', String(params.limit)) + if (params.offset) url.searchParams.set('offset', String(params.offset)) + if (params.runType) url.searchParams.set('run_type', params.runType) + if (params.startTimeFrom) + url.searchParams.set('start_time_from', String(params.startTimeFrom)) + if (params.startTimeTo) url.searchParams.set('start_time_to', String(params.startTimeTo)) + return url.toString() + }, + method: 'GET', + headers: (params) => ({ + Accept: 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || data.error?.message || 'Failed to list runs') + } + + const runs = (data.runs ?? []).map( + (run: { + run_id?: number + job_id?: number + run_name?: string + run_type?: string + state?: { + life_cycle_state?: string + result_state?: string + state_message?: string + user_cancelled_or_timedout?: boolean + } + start_time?: number + end_time?: number + }) => ({ + runId: run.run_id ?? 0, + jobId: run.job_id ?? 0, + runName: run.run_name ?? '', + runType: run.run_type ?? '', + state: { + lifeCycleState: run.state?.life_cycle_state ?? 'UNKNOWN', + resultState: run.state?.result_state ?? null, + stateMessage: run.state?.state_message ?? '', + userCancelledOrTimedout: run.state?.user_cancelled_or_timedout ?? false, + }, + startTime: run.start_time ?? null, + endTime: run.end_time ?? null, + }) + ) + + return { + success: true, + output: { + runs, + hasMore: data.has_more ?? false, + nextPageToken: data.next_page_token ?? null, + }, + } + }, + + outputs: { + runs: { + type: 'array', + description: 'List of job runs', + items: { + type: 'object', + properties: { + runId: { type: 'number', description: 'Unique run identifier' }, + jobId: { type: 'number', description: 'Job this run belongs to' }, + runName: { type: 'string', description: 'Run name' }, + runType: { type: 'string', description: 'Run type (JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN)' }, + state: { + type: 'object', + description: 'Run state information', + properties: { + lifeCycleState: { + type: 'string', + description: + 'Lifecycle state (QUEUED, PENDING, RUNNING, TERMINATING, TERMINATED, SKIPPED, INTERNAL_ERROR, BLOCKED, WAITING_FOR_RETRY)', + }, + resultState: { + type: 'string', + description: + 'Result state (SUCCESS, FAILED, TIMEDOUT, CANCELED, SUCCESS_WITH_FAILURES, UPSTREAM_FAILED, UPSTREAM_CANCELED, EXCLUDED)', + optional: true, + }, + stateMessage: { type: 'string', description: 'Descriptive state message' }, + userCancelledOrTimedout: { + type: 'boolean', + description: 'Whether the run was cancelled by user or timed out', + }, + }, + }, + startTime: { + type: 'number', + description: 'Run start timestamp (epoch ms)', + optional: true, + }, + endTime: { type: 'number', description: 'Run end timestamp (epoch ms)', optional: true }, + }, + }, + }, + hasMore: { + type: 'boolean', + description: 'Whether more runs are available for pagination', + }, + nextPageToken: { + type: 'string', + description: 'Token for fetching the next page of results', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/databricks/run_job.ts b/apps/sim/tools/databricks/run_job.ts new file mode 100644 index 0000000000..6117ebcb18 --- /dev/null +++ b/apps/sim/tools/databricks/run_job.ts @@ -0,0 +1,113 @@ +import type { DatabricksRunJobParams, DatabricksRunJobResponse } from '@/tools/databricks/types' +import type { ToolConfig } from '@/tools/types' + +export const runJobTool: ToolConfig = { + id: 'databricks_run_job', + name: 'Databricks Run Job', + description: + 'Trigger an existing Databricks job to run immediately with optional job-level or notebook parameters.', + version: '1.0.0', + + params: { + host: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks workspace host (e.g., dbc-abc123.cloud.databricks.com)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Databricks Personal Access Token', + }, + jobId: { + type: 'number', + required: true, + visibility: 'user-or-llm', + description: 'The ID of the job to trigger', + }, + jobParameters: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Job-level parameter overrides as a JSON object (e.g., {"key": "value"})', + }, + notebookParams: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Notebook task parameters as a JSON object (e.g., {"param1": "value1"})', + }, + idempotencyToken: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Idempotency token to prevent duplicate runs (max 64 characters)', + }, + }, + + request: { + url: (params) => { + const host = params.host.replace(/^https?:\/\//, '').replace(/\/$/, '') + return `https://${host}/api/2.1/jobs/run-now` + }, + method: 'POST', + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + const body: Record = { + job_id: params.jobId, + } + if (params.jobParameters) { + try { + body.job_parameters = JSON.parse(params.jobParameters) + } catch (error) { + throw new Error( + `Invalid JSON in jobParameters: ${error instanceof Error ? error.message : 'unknown error'}` + ) + } + } + if (params.notebookParams) { + try { + body.notebook_params = JSON.parse(params.notebookParams) + } catch (error) { + throw new Error( + `Invalid JSON in notebookParams: ${error instanceof Error ? error.message : 'unknown error'}` + ) + } + } + if (params.idempotencyToken) body.idempotency_token = params.idempotencyToken + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || data.error?.message || 'Failed to trigger job run') + } + + return { + success: true, + output: { + runId: data.run_id ?? 0, + numberInJob: data.number_in_job ?? 0, + }, + } + }, + + outputs: { + runId: { + type: 'number', + description: 'The globally unique ID of the triggered run', + }, + numberInJob: { + type: 'number', + description: 'The sequence number of this run among all runs of the job', + }, + }, +} diff --git a/apps/sim/tools/databricks/types.ts b/apps/sim/tools/databricks/types.ts new file mode 100644 index 0000000000..362fc21acb --- /dev/null +++ b/apps/sim/tools/databricks/types.ts @@ -0,0 +1,191 @@ +import type { ToolResponse } from '@/tools/types' + +/** Base parameters shared by all Databricks tools */ +export interface DatabricksBaseParams { + apiKey: string + host: string +} + +/** Execute SQL Statement */ +export interface DatabricksExecuteSqlParams extends DatabricksBaseParams { + warehouseId: string + statement: string + catalog?: string + schema?: string + rowLimit?: number + waitTimeout?: string +} + +export interface DatabricksExecuteSqlResponse extends ToolResponse { + output: { + statementId: string + status: string + columns: Array<{ name: string; position: number; typeName: string }> | null + data: string[][] | null + totalRows: number | null + truncated: boolean + } +} + +/** List Jobs */ +export interface DatabricksListJobsParams extends DatabricksBaseParams { + limit?: number + offset?: number + name?: string + expandTasks?: boolean +} + +export interface DatabricksListJobsResponse extends ToolResponse { + output: { + jobs: Array<{ + jobId: number + name: string + createdTime: number + creatorUserName: string + maxConcurrentRuns: number + format: string + }> + hasMore: boolean + nextPageToken: string | null + } +} + +/** Run Job */ +export interface DatabricksRunJobParams extends DatabricksBaseParams { + jobId: number + jobParameters?: string + notebookParams?: string + idempotencyToken?: string +} + +export interface DatabricksRunJobResponse extends ToolResponse { + output: { + runId: number + numberInJob: number + } +} + +/** Get Run */ +export interface DatabricksGetRunParams extends DatabricksBaseParams { + runId: number + includeHistory?: boolean + includeResolvedValues?: boolean +} + +export interface DatabricksGetRunResponse extends ToolResponse { + output: { + runId: number + jobId: number + runName: string + runType: string + attemptNumber: number + state: { + lifeCycleState: string + resultState: string | null + stateMessage: string + userCancelledOrTimedout: boolean + } + startTime: number | null + endTime: number | null + setupDuration: number | null + executionDuration: number | null + cleanupDuration: number | null + queueDuration: number | null + runPageUrl: string + creatorUserName: string + } +} + +/** List Runs */ +export interface DatabricksListRunsParams extends DatabricksBaseParams { + jobId?: number + activeOnly?: boolean + completedOnly?: boolean + limit?: number + offset?: number + runType?: string + startTimeFrom?: number + startTimeTo?: number +} + +export interface DatabricksListRunsResponse extends ToolResponse { + output: { + runs: Array<{ + runId: number + jobId: number + runName: string + runType: string + state: { + lifeCycleState: string + resultState: string | null + stateMessage: string + userCancelledOrTimedout: boolean + } + startTime: number | null + endTime: number | null + }> + hasMore: boolean + nextPageToken: string | null + } +} + +/** Cancel Run */ +export interface DatabricksCancelRunParams extends DatabricksBaseParams { + runId: number +} + +export interface DatabricksCancelRunResponse extends ToolResponse { + output: { + success: boolean + } +} + +/** Get Run Output */ +export interface DatabricksGetRunOutputParams extends DatabricksBaseParams { + runId: number +} + +export interface DatabricksGetRunOutputResponse extends ToolResponse { + output: { + notebookOutput: { + result: string | null + truncated: boolean + } | null + error: string | null + errorTrace: string | null + logs: string | null + logsTruncated: boolean + } +} + +/** List Clusters */ +export interface DatabricksListClustersResponse extends ToolResponse { + output: { + clusters: Array<{ + clusterId: string + clusterName: string + state: string + stateMessage: string + creatorUserName: string + sparkVersion: string + nodeTypeId: string + driverNodeTypeId: string + numWorkers: number | null + autoscale: { minWorkers: number; maxWorkers: number } | null + clusterSource: string + autoterminationMinutes: number + startTime: number | null + }> + } +} + +/** Union type for all Databricks responses */ +export type DatabricksResponse = + | DatabricksExecuteSqlResponse + | DatabricksListJobsResponse + | DatabricksRunJobResponse + | DatabricksGetRunResponse + | DatabricksListRunsResponse + | DatabricksCancelRunResponse + | DatabricksGetRunOutputResponse + | DatabricksListClustersResponse diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index d8302770c3..4bcbc9a13a 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -248,6 +248,16 @@ import { cursorStopAgentTool, cursorStopAgentV2Tool, } from '@/tools/cursor' +import { + databricksCancelRunTool, + databricksExecuteSqlTool, + databricksGetRunOutputTool, + databricksGetRunTool, + databricksListClustersTool, + databricksListJobsTool, + databricksListRunsTool, + databricksRunJobTool, +} from '@/tools/databricks' import { datadogCancelDowntimeTool, datadogCreateDowntimeTool, @@ -2595,6 +2605,14 @@ export const tools: Record = { devin_get_session: devinGetSessionTool, devin_list_sessions: devinListSessionsTool, devin_send_message: devinSendMessageTool, + databricks_cancel_run: databricksCancelRunTool, + databricks_execute_sql: databricksExecuteSqlTool, + databricks_get_run: databricksGetRunTool, + databricks_get_run_output: databricksGetRunOutputTool, + databricks_list_clusters: databricksListClustersTool, + databricks_list_jobs: databricksListJobsTool, + databricks_list_runs: databricksListRunsTool, + databricks_run_job: databricksRunJobTool, duckduckgo_search: duckduckgoSearchTool, dspy_predict: predictTool, dspy_chain_of_thought: chainOfThoughtTool,