Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
bundle:
name: "for_each_task_validation"

resources:
jobs:
test_job:
name: "Test Job"
tasks:
- task_key: "parent_task"
for_each_task:
inputs: "[1, 2, 3]"
task:
task_key: "child_task"
notebook_task:
notebook_path: "test.py"
max_retries: 3

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions acceptance/bundle/validate/for_each_task_max_retries/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Error: Invalid max_retries configuration for for_each_task
at resources.jobs.test_job.tasks[0]
in databricks.yml:9:11

Task "parent_task" has max_retries defined at the parent level, but it uses for_each_task.
When using for_each_task, max_retries must be defined on the nested task (for_each_task.task.max_retries), not on the parent task.

Name: for_each_task_validation
Target: default
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/for_each_task_validation/default

Found 1 error

Exit code: 1
2 changes: 2 additions & 0 deletions acceptance/bundle/validate/for_each_task_max_retries/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
errcode $CLI bundle validate
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Databricks notebook source
print("test")
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
bundle:
name: "for_each_task_validation"

resources:
jobs:
test_job:
name: "Test Job"
tasks:
- task_key: "parent_task"
for_each_task:
inputs: "[1, 2, 3]"
task:
task_key: "child_task"
notebook_task:
notebook_path: "test.py"
max_retries: 3
min_retry_interval_millis: 1000
retry_on_timeout: true

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Error: Invalid max_retries configuration for for_each_task
at resources.jobs.test_job.tasks[0]
in databricks.yml:9:11

Task "parent_task" has max_retries defined at the parent level, but it uses for_each_task.
When using for_each_task, max_retries must be defined on the nested task (for_each_task.task.max_retries), not on the parent task.

Warning: Invalid min_retry_interval_millis configuration for for_each_task
at resources.jobs.test_job.tasks[0]
in databricks.yml:9:11

Task "parent_task" has min_retry_interval_millis defined at the parent level, but it uses for_each_task.
When using for_each_task, min_retry_interval_millis must be defined on the nested task (for_each_task.task.min_retry_interval_millis), not on the parent task.

Warning: Invalid retry_on_timeout configuration for for_each_task
at resources.jobs.test_job.tasks[0]
in databricks.yml:9:11

Task "parent_task" has retry_on_timeout defined at the parent level, but it uses for_each_task.
When using for_each_task, retry_on_timeout must be defined on the nested task (for_each_task.task.retry_on_timeout), not on the parent task.

Name: for_each_task_validation
Target: default
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/for_each_task_validation/default

Found 1 error and 2 warnings

Exit code: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
errcode $CLI bundle validate
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Databricks notebook source
print("test")
16 changes: 16 additions & 0 deletions acceptance/bundle/validate/for_each_task_valid/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
bundle:
name: "for_each_task_validation"

resources:
jobs:
test_job:
name: "Test Job"
tasks:
- task_key: "parent_task"
for_each_task:
inputs: "[1, 2, 3]"
task:
task_key: "child_task"
max_retries: 3
notebook_task:
notebook_path: "test.py"

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions acceptance/bundle/validate/for_each_task_valid/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Name: for_each_task_validation
Target: default
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/for_each_task_validation/default

Validation OK!
2 changes: 2 additions & 0 deletions acceptance/bundle/validate/for_each_task_valid/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
$CLI bundle validate
2 changes: 2 additions & 0 deletions acceptance/bundle/validate/for_each_task_valid/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Databricks notebook source
print("test")
1 change: 1 addition & 0 deletions bundle/config/validate/fast_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func (f *fastValidate) Apply(ctx context.Context, rb *bundle.Bundle) diag.Diagno
// Fast mutators with only in-memory checks
JobClusterKeyDefined(),
JobTaskClusterSpec(),
ForEachTask(),

// Blocking mutators. Deployments will fail if these checks fail.
ValidateArtifactPath(),
Expand Down
76 changes: 76 additions & 0 deletions bundle/config/validate/for_each_task.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package validate

import (
"context"
"fmt"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/jobs"
)

// ForEachTask validates constraints for for_each_task configuration
func ForEachTask() bundle.ReadOnlyMutator {
return &forEachTask{}
}

type forEachTask struct{ bundle.RO }

func (v *forEachTask) Name() string {
return "validate:for_each_task"
}

func (v *forEachTask) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
diags := diag.Diagnostics{}

jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs"))

for resourceName, job := range b.Config.Resources.Jobs {
resourcePath := jobsPath.Append(dyn.Key(resourceName))

for taskIndex, task := range job.Tasks {
taskPath := resourcePath.Append(dyn.Key("tasks"), dyn.Index(taskIndex))

if task.ForEachTask != nil {
diags = diags.Extend(validateForEachTask(b, task, taskPath))
}
}
}

return diags
}

func validateForEachTask(b *bundle.Bundle, task jobs.Task, taskPath dyn.Path) diag.Diagnostics {
diags := diag.Diagnostics{}

if task.MaxRetries != 0 {
diags = diags.Append(invalidRetryFieldDiag(b, task, taskPath, "max_retries", diag.Error))
}

if task.MinRetryIntervalMillis != 0 {
diags = diags.Append(invalidRetryFieldDiag(b, task, taskPath, "min_retry_interval_millis", diag.Warning))
}

if task.RetryOnTimeout {
diags = diags.Append(invalidRetryFieldDiag(b, task, taskPath, "retry_on_timeout", diag.Warning))
}

return diags
}

func invalidRetryFieldDiag(b *bundle.Bundle, task jobs.Task, taskPath dyn.Path, fieldName string, severity diag.Severity) diag.Diagnostic {
detail := fmt.Sprintf(
"Task %q has %s defined at the parent level, but it uses for_each_task.\n"+
"When using for_each_task, %s must be defined on the nested task (for_each_task.task.%s), not on the parent task.",
task.TaskKey, fieldName, fieldName, fieldName,
)

return diag.Diagnostic{
Severity: severity,
Summary: fmt.Sprintf("Invalid %s configuration for for_each_task", fieldName),
Detail: detail,
Locations: b.Config.GetLocations(taskPath.String()),
Paths: []dyn.Path{taskPath},
}
}