Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion databricks-skills/databricks-app-python/4-deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,21 @@ env:
| FastAPI | `["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]` |
| Reflex | `["reflex", "run", "--env", "prod"]` |

### Excluded directories

When uploading via the SDK's `upload_folder()` / `upload_to_workspace()`, the following directories are automatically skipped to keep uploads fast:

`node_modules`, `__pycache__`, `.venv`, `venv`, `.tox`, `.pytest_cache`, `.mypy_cache`, `.ruff_cache`, `dist`, `build`, `.eggs`, `*.egg-info`

If you use `databricks workspace import-dir` directly, it does **not** apply these exclusions. Either clean the directory first or use the SDK upload functions instead.

### Step 2: Create and Deploy

```bash
# Create the app
databricks apps create <app-name>

# Upload source code
# Upload source code (make sure to exclude node_modules, venv, etc.)
databricks workspace mkdirs /Workspace/Users/<user>/apps/<app-name>
databricks workspace import-dir . /Workspace/Users/<user>/apps/<app-name>

Expand Down
36 changes: 31 additions & 5 deletions databricks-tools-core/databricks_tools_core/file/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,24 @@

from ..auth import get_workspace_client

# Directories that should never be uploaded to a Databricks workspace.
# These are build artifacts, dependency caches, and virtual environments
# that bloat uploads and slow down deployments.
EXCLUDED_DIRS = frozenset({
"node_modules",
"__pycache__",
".venv",
"venv",
".tox",
".pytest_cache",
".mypy_cache",
".ruff_cache",
"dist",
"build",
".eggs",
"*.egg-info",
})


@dataclass
class UploadResult:
Expand Down Expand Up @@ -122,10 +140,15 @@ def _collect_files(local_folder: str) -> List[tuple]:
files = []
local_folder = os.path.abspath(local_folder)

for dirpath, _, filenames in os.walk(local_folder):
for dirpath, dirnames, filenames in os.walk(local_folder):
# Prune excluded directories so os.walk doesn't descend into them
dirnames[:] = [
d for d in dirnames
if not d.startswith(".") and d not in EXCLUDED_DIRS and not d.endswith(".egg-info")
]

for filename in filenames:
# Skip hidden files and __pycache__
if filename.startswith(".") or "__pycache__" in dirpath:
if filename.startswith("."):
continue

local_path = os.path.join(dirpath, filename)
Expand All @@ -149,8 +172,11 @@ def _collect_directories(local_folder: str) -> List[str]:
local_folder = os.path.abspath(local_folder)

for dirpath, dirnames, _ in os.walk(local_folder):
# Skip hidden directories and __pycache__
dirnames[:] = [d for d in dirnames if not d.startswith(".") and d != "__pycache__"]
# Skip hidden directories and common non-deployable directories
dirnames[:] = [
d for d in dirnames
if not d.startswith(".") and d not in EXCLUDED_DIRS and not d.endswith(".egg-info")
]

for dirname in dirnames:
full_path = os.path.join(dirpath, dirname)
Expand Down
57 changes: 57 additions & 0 deletions databricks-tools-core/tests/unit/test_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,45 @@ def test_skips_pycache(self, tmp_path):
assert len(files) == 1
assert files[0][1] == "file.py"

def test_skips_node_modules(self, tmp_path):
"""Should skip node_modules directories."""
(tmp_path / "app.py").write_text("content")
(tmp_path / "node_modules").mkdir()
(tmp_path / "node_modules" / "lodash").mkdir()
(tmp_path / "node_modules" / "lodash" / "index.js").write_text("module")

files = _collect_files(str(tmp_path))

assert len(files) == 1
assert files[0][1] == "app.py"

def test_skips_venv_directories(self, tmp_path):
"""Should skip venv and .venv directories."""
(tmp_path / "app.py").write_text("content")
(tmp_path / "venv").mkdir()
(tmp_path / "venv" / "bin").mkdir()
(tmp_path / "venv" / "bin" / "python").write_text("binary")
(tmp_path / ".venv").mkdir()
(tmp_path / ".venv" / "lib").mkdir()

files = _collect_files(str(tmp_path))

assert len(files) == 1
assert files[0][1] == "app.py"

def test_skips_build_artifacts(self, tmp_path):
"""Should skip dist and build directories."""
(tmp_path / "app.py").write_text("content")
(tmp_path / "dist").mkdir()
(tmp_path / "dist" / "bundle.js").write_text("bundled")
(tmp_path / "build").mkdir()
(tmp_path / "build" / "output.js").write_text("built")

files = _collect_files(str(tmp_path))

assert len(files) == 1
assert files[0][1] == "app.py"


class TestCollectDirectories:
"""Tests for _collect_directories helper function."""
Expand All @@ -86,6 +125,24 @@ def test_skips_hidden_directories(self, tmp_path):
assert "visible" in dirs
assert ".hidden" not in dirs

def test_skips_excluded_directories(self, tmp_path):
"""Should skip node_modules, venv, dist, build, and other excluded dirs."""
(tmp_path / "src").mkdir()
(tmp_path / "node_modules").mkdir()
(tmp_path / "venv").mkdir()
(tmp_path / "dist").mkdir()
(tmp_path / "build").mkdir()
(tmp_path / "__pycache__").mkdir()

dirs = _collect_directories(str(tmp_path))

assert "src" in dirs
assert "node_modules" not in dirs
assert "venv" not in dirs
assert "dist" not in dirs
assert "build" not in dirs
assert "__pycache__" not in dirs


class TestUploadToWorkspace:
"""Tests for upload_to_workspace function."""
Expand Down