Skip to content

Commit 563169c

Browse files
waleedlatif1claude
andcommitted
feat(databricks): add Databricks integration with 8 tools
Add complete Databricks integration supporting SQL execution, job management, run monitoring, and cluster listing via Personal Access Token authentication. Tools: execute_sql, list_jobs, run_job, get_run, list_runs, cancel_run, get_run_output, list_clusters Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4ccb573 commit 563169c

File tree

18 files changed

+1984
-0
lines changed

18 files changed

+1984
-0
lines changed

apps/docs/components/icons.tsx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4477,6 +4477,17 @@ export function SSHIcon(props: SVGProps<SVGSVGElement>) {
44774477
)
44784478
}
44794479

4480+
export function DatabricksIcon(props: SVGProps<SVGSVGElement>) {
4481+
return (
4482+
<svg {...props} viewBox='0 0 241 266' fill='none' xmlns='http://www.w3.org/2000/svg'>
4483+
<path
4484+
d='M228.085 109.654L120.615 171.674L5.53493 105.41L0 108.475V156.582L120.615 225.911L228.085 164.128V189.596L120.615 251.615L5.53493 185.351L0 188.417V196.67L120.615 266L241 196.67V148.564L235.465 145.498L120.615 211.527L12.9148 149.743V124.275L120.615 186.059L241 116.729V69.3298L235.004 65.7925L120.615 131.585L18.4498 73.1028L120.615 14.3848L204.562 62.7269L211.942 58.4823V52.5869L120.615 0L0 69.3298V76.8759L120.615 146.206L228.085 84.1862V109.654Z'
4485+
fill='#F9F7F4'
4486+
/>
4487+
</svg>
4488+
)
4489+
}
4490+
44804491
export function DatadogIcon(props: SVGProps<SVGSVGElement>) {
44814492
return (
44824493
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 64 64'>

apps/docs/components/ui/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import {
2424
CloudflareIcon,
2525
ConfluenceIcon,
2626
CursorIcon,
27+
DatabricksIcon,
2728
DatadogIcon,
2829
DevinIcon,
2930
DiscordIcon,
@@ -174,6 +175,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
174175
cloudflare: CloudflareIcon,
175176
confluence_v2: ConfluenceIcon,
176177
cursor_v2: CursorIcon,
178+
databricks: DatabricksIcon,
177179
datadog: DatadogIcon,
178180
devin: DevinIcon,
179181
discord: DiscordIcon,
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
---
2+
title: Databricks
3+
description: Run SQL queries and manage jobs on Databricks
4+
---
5+
6+
import { BlockInfoCard } from "@/components/ui/block-info-card"
7+
8+
<BlockInfoCard
9+
type="databricks"
10+
color="#FF3621"
11+
/>
12+
13+
## Usage Instructions
14+
15+
Connect to Databricks to execute SQL queries against SQL warehouses, trigger and monitor job runs, manage clusters, and retrieve run outputs. Requires a Personal Access Token and workspace host URL.
16+
17+
18+
19+
## Tools
20+
21+
### `databricks_execute_sql`
22+
23+
Execute a SQL statement against a Databricks SQL warehouse and return results inline. Supports parameterized queries and Unity Catalog.
24+
25+
#### Input
26+
27+
| Parameter | Type | Required | Description |
28+
| --------- | ---- | -------- | ----------- |
29+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
30+
| `apiKey` | string | Yes | Databricks Personal Access Token |
31+
| `warehouseId` | string | Yes | The ID of the SQL warehouse to execute against |
32+
| `statement` | string | Yes | The SQL statement to execute \(max 16 MiB\) |
33+
| `catalog` | string | No | Unity Catalog name \(equivalent to USE CATALOG\) |
34+
| `schema` | string | No | Schema name \(equivalent to USE SCHEMA\) |
35+
| `rowLimit` | number | No | Maximum number of rows to return |
36+
| `waitTimeout` | string | No | How long to wait for results \(e.g., "50s"\). Range: "0s" or "5s" to "50s". Default: "10s" |
37+
38+
#### Output
39+
40+
| Parameter | Type | Description |
41+
| --------- | ---- | ----------- |
42+
| `statementId` | string | Unique identifier for the executed statement |
43+
| `status` | string | Execution status \(SUCCEEDED, PENDING, RUNNING, FAILED, CANCELED, CLOSED\) |
44+
| `columns` | array | Column schema of the result set |
45+
|`name` | string | Column name |
46+
|`position` | number | Column position \(0-based\) |
47+
|`typeName` | string | Column type \(STRING, INT, LONG, DOUBLE, BOOLEAN, TIMESTAMP, DATE, DECIMAL, etc.\) |
48+
| `data` | array | Result rows as a 2D array of strings |
49+
| `totalRows` | number | Total number of rows in the result |
50+
| `truncated` | boolean | Whether the result set was truncated due to row_limit or byte_limit |
51+
52+
### `databricks_list_jobs`
53+
54+
List all jobs in a Databricks workspace with optional filtering by name.
55+
56+
#### Input
57+
58+
| Parameter | Type | Required | Description |
59+
| --------- | ---- | -------- | ----------- |
60+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
61+
| `apiKey` | string | Yes | Databricks Personal Access Token |
62+
| `limit` | number | No | Maximum number of jobs to return \(range 1-100, default 20\) |
63+
| `offset` | number | No | Offset for pagination |
64+
| `name` | string | No | Filter jobs by name \(case-insensitive match\) |
65+
| `expandTasks` | boolean | No | Include task and cluster details in the response \(max 100 elements\) |
66+
67+
#### Output
68+
69+
| Parameter | Type | Description |
70+
| --------- | ---- | ----------- |
71+
| `jobs` | array | List of jobs in the workspace |
72+
|`jobId` | number | Unique job identifier |
73+
|`name` | string | Job name |
74+
|`createdTime` | number | Job creation timestamp \(epoch ms\) |
75+
|`creatorUserName` | string | Email of the job creator |
76+
|`maxConcurrentRuns` | number | Maximum number of concurrent runs |
77+
|`format` | string | Job format \(SINGLE_TASK or MULTI_TASK\) |
78+
| `hasMore` | boolean | Whether more jobs are available for pagination |
79+
| `nextPageToken` | string | Token for fetching the next page of results |
80+
81+
### `databricks_run_job`
82+
83+
Trigger an existing Databricks job to run immediately with optional job-level or notebook parameters.
84+
85+
#### Input
86+
87+
| Parameter | Type | Required | Description |
88+
| --------- | ---- | -------- | ----------- |
89+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
90+
| `apiKey` | string | Yes | Databricks Personal Access Token |
91+
| `jobId` | number | Yes | The ID of the job to trigger |
92+
| `jobParameters` | string | No | Job-level parameter overrides as a JSON object \(e.g., \{"key": "value"\}\) |
93+
| `notebookParams` | string | No | Notebook task parameters as a JSON object \(e.g., \{"param1": "value1"\}\) |
94+
| `idempotencyToken` | string | No | Idempotency token to prevent duplicate runs \(max 64 characters\) |
95+
96+
#### Output
97+
98+
| Parameter | Type | Description |
99+
| --------- | ---- | ----------- |
100+
| `runId` | number | The globally unique ID of the triggered run |
101+
| `numberInJob` | number | The sequence number of this run among all runs of the job |
102+
103+
### `databricks_get_run`
104+
105+
Get the status, timing, and details of a Databricks job run by its run ID.
106+
107+
#### Input
108+
109+
| Parameter | Type | Required | Description |
110+
| --------- | ---- | -------- | ----------- |
111+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
112+
| `apiKey` | string | Yes | Databricks Personal Access Token |
113+
| `runId` | number | Yes | The canonical identifier of the run |
114+
| `includeHistory` | boolean | No | Include repair history in the response |
115+
| `includeResolvedValues` | boolean | No | Include resolved parameter values in the response |
116+
117+
#### Output
118+
119+
| Parameter | Type | Description |
120+
| --------- | ---- | ----------- |
121+
| `runId` | number | The run ID |
122+
| `jobId` | number | The job ID this run belongs to |
123+
| `runName` | string | Name of the run |
124+
| `runType` | string | Type of run \(JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN\) |
125+
| `attemptNumber` | number | Retry attempt number \(0 for initial attempt\) |
126+
| `state` | object | Run state information |
127+
|`lifeCycleState` | string | Lifecycle state \(QUEUED, PENDING, RUNNING, TERMINATING, TERMINATED, SKIPPED, INTERNAL_ERROR\) |
128+
|`resultState` | string | Result state \(SUCCESS, FAILED, TIMEDOUT, CANCELED\) |
129+
|`stateMessage` | string | Descriptive message for the current state |
130+
|`userCancelledOrTimedout` | boolean | Whether the run was cancelled by user or timed out |
131+
| `startTime` | number | Run start timestamp \(epoch ms\) |
132+
| `endTime` | number | Run end timestamp \(epoch ms, 0 if still running\) |
133+
| `setupDuration` | number | Cluster setup duration \(ms\) |
134+
| `executionDuration` | number | Execution duration \(ms\) |
135+
| `cleanupDuration` | number | Cleanup duration \(ms\) |
136+
| `runPageUrl` | string | URL to the run detail page in Databricks UI |
137+
| `creatorUserName` | string | Email of the user who triggered the run |
138+
139+
### `databricks_list_runs`
140+
141+
List job runs in a Databricks workspace with optional filtering by job, status, and time range.
142+
143+
#### Input
144+
145+
| Parameter | Type | Required | Description |
146+
| --------- | ---- | -------- | ----------- |
147+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
148+
| `apiKey` | string | Yes | Databricks Personal Access Token |
149+
| `jobId` | number | No | Filter runs by job ID. Omit to list runs across all jobs |
150+
| `activeOnly` | boolean | No | Only include active runs \(PENDING, RUNNING, or TERMINATING\) |
151+
| `completedOnly` | boolean | No | Only include completed runs |
152+
| `limit` | number | No | Maximum number of runs to return \(range 1-25, default 20\) |
153+
| `offset` | number | No | Offset for pagination |
154+
| `runType` | string | No | Filter by run type \(JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN\) |
155+
| `startTimeFrom` | number | No | Filter runs started at or after this timestamp \(epoch ms\) |
156+
| `startTimeTo` | number | No | Filter runs started at or before this timestamp \(epoch ms\) |
157+
158+
#### Output
159+
160+
| Parameter | Type | Description |
161+
| --------- | ---- | ----------- |
162+
| `runs` | array | List of job runs |
163+
|`runId` | number | Unique run identifier |
164+
|`jobId` | number | Job this run belongs to |
165+
|`runName` | string | Run name |
166+
|`runType` | string | Run type \(JOB_RUN, WORKFLOW_RUN, SUBMIT_RUN\) |
167+
|`state` | object | Run state information |
168+
|`lifeCycleState` | string | Lifecycle state \(QUEUED, PENDING, RUNNING, TERMINATING, TERMINATED, SKIPPED, INTERNAL_ERROR\) |
169+
|`resultState` | string | Result state \(SUCCESS, FAILED, TIMEDOUT, CANCELED\) |
170+
|`stateMessage` | string | Descriptive state message |
171+
|`startTime` | number | Run start timestamp \(epoch ms\) |
172+
|`endTime` | number | Run end timestamp \(epoch ms\) |
173+
| `hasMore` | boolean | Whether more runs are available for pagination |
174+
| `nextPageToken` | string | Token for fetching the next page of results |
175+
176+
### `databricks_cancel_run`
177+
178+
Cancel a running or pending Databricks job run. Cancellation is asynchronous; poll the run status to confirm termination.
179+
180+
#### Input
181+
182+
| Parameter | Type | Required | Description |
183+
| --------- | ---- | -------- | ----------- |
184+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
185+
| `apiKey` | string | Yes | Databricks Personal Access Token |
186+
| `runId` | number | Yes | The canonical identifier of the run to cancel |
187+
188+
#### Output
189+
190+
| Parameter | Type | Description |
191+
| --------- | ---- | ----------- |
192+
| `success` | boolean | Whether the cancel request was accepted |
193+
194+
### `databricks_get_run_output`
195+
196+
Get the output of a completed Databricks job run, including notebook results, error messages, and logs. For multi-task jobs, use the task run ID (not the parent run ID).
197+
198+
#### Input
199+
200+
| Parameter | Type | Required | Description |
201+
| --------- | ---- | -------- | ----------- |
202+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
203+
| `apiKey` | string | Yes | Databricks Personal Access Token |
204+
| `runId` | number | Yes | The run ID to get output for. For multi-task jobs, use the task run ID |
205+
206+
#### Output
207+
208+
| Parameter | Type | Description |
209+
| --------- | ---- | ----------- |
210+
| `notebookOutput` | object | Notebook task output \(from dbutils.notebook.exit\(\)\) |
211+
|`result` | string | Value passed to dbutils.notebook.exit\(\) \(max 1 MB\) |
212+
|`truncated` | boolean | Whether the result was truncated |
213+
| `error` | string | Error message if the run failed or output is unavailable |
214+
| `errorTrace` | string | Error stack trace if available |
215+
| `logs` | string | Log output from the run if available |
216+
217+
### `databricks_list_clusters`
218+
219+
List all clusters in a Databricks workspace including their state, configuration, and resource details.
220+
221+
#### Input
222+
223+
| Parameter | Type | Required | Description |
224+
| --------- | ---- | -------- | ----------- |
225+
| `host` | string | Yes | Databricks workspace host \(e.g., dbc-abc123.cloud.databricks.com\) |
226+
| `apiKey` | string | Yes | Databricks Personal Access Token |
227+
228+
#### Output
229+
230+
| Parameter | Type | Description |
231+
| --------- | ---- | ----------- |
232+
| `clusters` | array | List of clusters in the workspace |
233+
|`clusterId` | string | Unique cluster identifier |
234+
|`clusterName` | string | Cluster display name |
235+
|`state` | string | Current state \(PENDING, RUNNING, RESTARTING, RESIZING, TERMINATING, TERMINATED, ERROR, UNKNOWN\) |
236+
|`stateMessage` | string | Human-readable state description |
237+
|`creatorUserName` | string | Email of the cluster creator |
238+
|`sparkVersion` | string | Spark runtime version \(e.g., 13.3.x-scala2.12\) |
239+
|`nodeTypeId` | string | Worker node type identifier |
240+
|`driverNodeTypeId` | string | Driver node type identifier |
241+
|`numWorkers` | number | Number of worker nodes \(for fixed-size clusters\) |
242+
|`autoscale` | object | Autoscaling configuration \(null for fixed-size clusters\) |
243+
|`minWorkers` | number | Minimum number of workers |
244+
|`maxWorkers` | number | Maximum number of workers |
245+
|`clusterSource` | string | Origin \(API, UI, JOB, MODELS, PIPELINE, SQL\) |
246+
|`autoterminationMinutes` | number | Minutes of inactivity before auto-termination \(0 = disabled\) |
247+
|`startTime` | number | Cluster start timestamp \(epoch ms\) |
248+
249+

apps/docs/content/docs/en/tools/meta.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"cloudflare",
2121
"confluence",
2222
"cursor",
23+
"databricks",
2324
"datadog",
2425
"devin",
2526
"discord",

0 commit comments

Comments
 (0)