Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 5 additions & 21 deletions photomap/backend/routers/curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from ..embeddings import _open_npz_file, get_fps_indices_global, get_kmeans_indices_global
from ..progress import IndexStatus, progress_tracker
from .album import validate_album_exists, validate_image_access
from .index import check_album_lock

router = APIRouter()
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -278,8 +277,8 @@ async def export_dataset(request: ExportRequest):
Returns:
JSON response with success count and any errors.
"""
check_album_lock() # May raise a 403 exception
# Validate and sanitize the output folder to prevent path traversal
# Export is not a destructive album-management operation; the per-album
# lock check is already handled inside validate_album_exists() below.
if not request.output_folder:
raise HTTPException(status_code=400, detail="Output folder required")

Expand All @@ -290,26 +289,11 @@ async def export_dataset(request: ExportRequest):
except Exception as e:
raise HTTPException(status_code=400, detail=f"Invalid output folder: {e}") from e

# Define the base directory under which exports are allowed
# Use user's home directory as the base to prevent system-wide access
base_dir = Path.home().resolve()

# Ensure the export directory is within the allowed base directory
def is_within_base_dir(target_dir: Path, base: Path) -> bool:
"""Check if target directory is within the base directory."""
if os.name == "nt":
# On Windows, also ensure the drive matches
return target_dir.drive.lower() == base.drive.lower() and (target_dir == base or base in target_dir.parents)
else:
return target_dir == base or base in target_dir.parents

if not is_within_base_dir(output_dir, base_dir):
raise HTTPException(status_code=400, detail="Output folder is outside the allowed export directory")

# Resolve the album so we can verify each source path lives inside it —
# otherwise a caller could ask us to copy /etc/passwd into their export
# dir. Validated after the output-folder checks so cheap input errors
# surface as 400 even when the album key is bogus.
# dir. Source-path security is handled by validate_image_access(); no
# home-dir restriction is placed on the destination so users can export
# to external drives, network mounts, etc.
album_config = validate_album_exists(request.album)

if not output_dir.exists():
Expand Down
4 changes: 4 additions & 0 deletions photomap/frontend/static/javascript/curation.js
Original file line number Diff line number Diff line change
Expand Up @@ -585,11 +585,15 @@ function setupEventListeners() {
}),
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.detail || `Export failed (${response.status})`);
}
alert(`Exported ${data.exported} files.`);
setStatus("Export Complete.", "success");
} catch (e) {
console.error(e);
alert("Export failed: " + e.message);
setStatus("Export failed.", "error");
}
};

Expand Down
79 changes: 30 additions & 49 deletions tests/backend/test_curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
Tests for the curation functionality (Model Training Dataset Curator).
"""

import tempfile
import time
from pathlib import Path

import pytest
from fixtures import build_index
Expand Down Expand Up @@ -269,29 +267,27 @@ def test_export_endpoint(client, new_album, monkeypatch, tmp_path):
data = response.json()
selected_files = data["selected_files"]

# Create export folder within home directory (as required by endpoint)
with tempfile.TemporaryDirectory(dir=Path.home()) as temp_dir:
export_folder = Path(temp_dir) / "exported_images"
export_folder = tmp_path / "exported_images"

# Export the files
response = client.post(
"/api/curation/export",
json={
"album": new_album["key"],
"filenames": selected_files,
"output_folder": str(export_folder)
}
)
assert response.status_code == 200
result = response.json()
assert result["status"] == "success"
assert "exported" in result
assert result["exported"] > 0
# Export the files
response = client.post(
"/api/curation/export",
json={
"album": new_album["key"],
"filenames": selected_files,
"output_folder": str(export_folder)
}
)
assert response.status_code == 200
result = response.json()
assert result["status"] == "success"
assert "exported" in result
assert result["exported"] > 0

# Verify files were actually exported
assert export_folder.exists()
exported_files = list(export_folder.iterdir())
assert len(exported_files) > 0
# Verify files were actually exported
assert export_folder.exists()
exported_files = list(export_folder.iterdir())
assert len(exported_files) > 0


def test_export_validation(client, tmp_path):
Expand Down Expand Up @@ -321,37 +317,22 @@ def test_export_validation(client, tmp_path):
assert response.status_code == 400


def test_export_path_traversal_protection(client):
"""Test that export prevents path traversal attacks."""
# Output-folder validation runs before album resolution.
def test_export_nonexistent_files(client, new_album, tmp_path):
"""Test export with nonexistent files."""
export_folder = tmp_path / "export_test"

response = client.post(
"/api/curation/export",
json={
"album": "any-key",
"filenames": ["some_file.jpg"],
"output_folder": "/etc"
"album": new_album["key"],
"filenames": ["/nonexistent/file1.jpg", "/nonexistent/file2.jpg"],
"output_folder": str(export_folder)
}
)
assert response.status_code == 400


def test_export_nonexistent_files(client, new_album):
"""Test export with nonexistent files."""
with tempfile.TemporaryDirectory(dir=Path.home()) as temp_dir:
export_folder = Path(temp_dir) / "export_test"

response = client.post(
"/api/curation/export",
json={
"album": new_album["key"],
"filenames": ["/nonexistent/file1.jpg", "/nonexistent/file2.jpg"],
"output_folder": str(export_folder)
}
)
assert response.status_code == 200
result = response.json()
# Should succeed but with 0 exported
assert result["exported"] == 0
assert response.status_code == 200
result = response.json()
# Should succeed but with 0 exported
assert result["exported"] == 0


def test_curate_multiple_iterations(client, new_album, monkeypatch):
Expand Down
Loading