From 9aef0ad6c09333d278b5cfcdeab53edc8e72180b Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Wed, 20 May 2026 16:20:45 -0400
Subject: [PATCH] chore(dedupe): collapse low-risk redundancies from code
 review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backend:

* drop the duplicate `/filetree/home` endpoint from `routers/album.py`;
  `routers/filetree.py` already serves the canonical one (whichever
  router was included last had been winning)
* extract `_compute_curation()` and `_validate_curation_request()` —
  `/curate` (async background task) and `/curate_sync` were carrying
  ~150 lines of identical Monte Carlo / mapping / selection logic.
  The async path passes a small `on_iteration` callback to drive its
  ProgressTracker
* route the three index endpoints (`delete_image`, `move_images`,
  `copy_images`) through the existing `get_embeddings_for_album`
  helper instead of each re-instantiating `Embeddings` directly
* add `_l2_normalize(x, axis, eps=1e-12)` to `embeddings.py` and use
  it in all three numpy-normalize sites — also closes the no-epsilon
  NaN risk in `find_duplicate_clusters`
* add `_normalized_filtered_embeddings()` shared loader for FPS /
  KMeans (npz open + valid mask + normalize); each function is now
  ~15 lines shorter and the "load" half is provably identical

Frontend:

* delete the duplicate `createPathField` in `album-manager.js`
  (defined twice with identical bodies — the second shadowed the
  first)
* share the exported `scoreDisplay` singleton in `seek-slider.js`
  instead of constructing a second `ScoreDisplay`. The duplicate
  instance was double-binding the star click handler on `#scoreText`,
  so a bookmark toggle fired twice and immediately cancelled itself

Net: +172 / −263 across 6 files.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 photomap/backend/embeddings.py                |  93 +++---
 photomap/backend/routers/album.py             |  12 -
 photomap/backend/routers/curation.py          | 278 +++++++-----------
 photomap/backend/routers/index.py             |  22 +-
 .../static/javascript/album-manager.js        |  23 --
 .../frontend/static/javascript/seek-slider.js |   7 +-
 6 files changed, 172 insertions(+), 263 deletions(-)

diff --git a/photomap/backend/embeddings.py b/photomap/backend/embeddings.py
index cab98bb5..06299720 100644
--- a/photomap/backend/embeddings.py
+++ b/photomap/backend/embeddings.py
@@ -69,6 +69,44 @@ def _get_indexing_semaphore() -> asyncio.Semaphore:
     return _indexing_semaphore
 
 
+def _l2_normalize(x: np.ndarray, axis: int = -1, eps: float = 1e-12) -> np.ndarray:
+    """L2-normalize ``x`` along ``axis`` with an epsilon guard against zero vectors.
+
+    Several call sites previously open-coded ``x / (norm + 1e-10)`` (or worse,
+    no epsilon at all in ``find_duplicate_clusters``, which would NaN on any
+    all-zero embedding). Funneling through one helper makes the epsilon
+    consistent and the intent obvious.
+    """
+    norms = np.linalg.norm(x, axis=axis, keepdims=True)
+    return x / (norms + eps)
+
+
+def _normalized_filtered_embeddings(
+    embeddings_path: Path,
+    ignore_indices: list[int] | None,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Open the .npz, drop ``ignore_indices``, and L2-normalize the survivors.
+
+    Returns ``(normalized_vectors, valid_global_indices, filenames)``. When
+    every row is masked out, ``normalized_vectors`` is empty (shape ``(0,)``)
+    and the caller is expected to return early.
+    """
+    data = _open_npz_file(embeddings_path)
+    embeddings = data["embeddings"]
+    filenames = data["filenames"]
+
+    valid_mask = np.ones(len(embeddings), dtype=bool)
+    if ignore_indices:
+        valid_mask[ignore_indices] = False
+    valid_global_indices = np.where(valid_mask)[0]
+    filtered = embeddings[valid_global_indices]
+
+    if len(filtered) == 0:
+        return filtered, valid_global_indices, filenames
+
+    return _l2_normalize(filtered, axis=1), valid_global_indices, filenames
+
+
 register_heif_opener()  # Register HEIF opener for PIL
 SUPPORTED_EXTENSIONS = {
     ".jpg",
@@ -104,33 +142,15 @@ def get_fps_indices_global(
     Returns:
         List of selected filenames.
     """
-    data = _open_npz_file(embeddings_path)
-    embeddings = data["embeddings"]
-    filenames = data["filenames"]
-
-    n_total = len(embeddings)
-
-    # Create a mask of VALID indices (True = Keep, False = Ignore)
-    valid_mask = np.ones(n_total, dtype=bool)
-    if ignore_indices:
-        valid_mask[ignore_indices] = False
-
-    # Get the indices that are actually available
-    valid_global_indices = np.where(valid_mask)[0]
-
-    # Filter embeddings to only valid ones
-    filtered_embeddings = embeddings[valid_global_indices]
-
-    n_samples = len(filtered_embeddings)
+    vectors, valid_global_indices, filenames = _normalized_filtered_embeddings(
+        embeddings_path, ignore_indices
+    )
+    n_samples = len(vectors)
     if n_samples == 0:
         return []
     if n_target >= n_samples:
         return filenames[valid_global_indices].tolist()
 
-    # Normalize
-    norms = np.linalg.norm(filtered_embeddings, axis=1, keepdims=True)
-    vectors = filtered_embeddings / (norms + 1e-10)
-
     # Standard FPS Logic on the FILTERED set
     rng = np.random.RandomState(seed)
     # Pick random index relative to the FILTERED set
@@ -175,29 +195,15 @@ def get_kmeans_indices_global(
     Returns:
         List of selected filenames.
     """
-    data = _open_npz_file(embeddings_path)
-    embeddings = data["embeddings"]
-    filenames = data["filenames"]
-
-    n_total = len(embeddings)
-
-    valid_mask = np.ones(n_total, dtype=bool)
-    if ignore_indices:
-        valid_mask[ignore_indices] = False
-
-    valid_global_indices = np.where(valid_mask)[0]
-    filtered_embeddings = embeddings[valid_global_indices]
-
-    n_samples = len(filtered_embeddings)
+    vectors, valid_global_indices, filenames = _normalized_filtered_embeddings(
+        embeddings_path, ignore_indices
+    )
+    n_samples = len(vectors)
     if n_samples == 0:
         return []
     if n_target >= n_samples:
         return filenames[valid_global_indices].tolist()
 
-    # Normalize
-    norms = np.linalg.norm(filtered_embeddings, axis=1, keepdims=True)
-    vectors = filtered_embeddings / (norms + 1e-10)
-
     # Cluster
     kmeans = KMeans(n_clusters=n_target, random_state=seed, n_init=10)
     labels = kmeans.fit_predict(vectors)
@@ -1363,10 +1369,9 @@ def find_duplicate_clusters(self, similarity_threshold=0.995):
         embeddings = data["embeddings"]
         filenames = data["filenames"]
 
-        # Normalize embeddings
-        norm_embeddings = embeddings / np.linalg.norm(
-            embeddings, axis=-1, keepdims=True
-        )
+        # Normalize embeddings. ``_l2_normalize`` carries an epsilon guard so
+        # an all-zero row can't produce NaN here.
+        norm_embeddings = _l2_normalize(embeddings, axis=-1)
         assert isinstance(
             norm_embeddings, np.ndarray
         ), "Normalization failed, expected np.ndarray"
diff --git a/photomap/backend/routers/album.py b/photomap/backend/routers/album.py
index e06f0270..0450d6fd 100644
--- a/photomap/backend/routers/album.py
+++ b/photomap/backend/routers/album.py
@@ -311,15 +311,3 @@ def validate_image_access(album_config, image_path: Path) -> bool:
     )
 
 
-@album_router.get("/filetree/home", tags=["File Management"])
-async def get_home_directory():
-    """Get the home directory path for the current user."""
-    check_album_lock()  # May raise a 403 exception
-    # In a real application, you would determine the home directory based on the user's
-    # profile or configuration. Here, we just return a fixed path for demonstration.
-    try:
-        home_dir = str(Path.home())
-        return {"homePath": home_dir}
-    except Exception as e:
-        logger.error(f"Error getting home directory: {e}")
-        return {"homePath": ""}
diff --git a/photomap/backend/routers/curation.py b/photomap/backend/routers/curation.py
index 225e26bc..6442f9cc 100644
--- a/photomap/backend/routers/curation.py
+++ b/photomap/backend/routers/curation.py
@@ -5,6 +5,7 @@
 import threading
 import uuid
 from collections import Counter
+from collections.abc import Callable
 from pathlib import Path
 from typing import Any
 
@@ -44,97 +45,120 @@ class ExportRequest(BaseModel):
     filenames: list[str]
     output_folder: str
 
+def _validate_curation_request(request: CurationRequest) -> None:
+    """Clamp / reject obviously bad inputs. Shared by the async and sync endpoints."""
+    if request.target_count <= 0:
+        raise HTTPException(status_code=400, detail="target_count must be positive")
+    if request.target_count > 100000:
+        raise HTTPException(status_code=400, detail="target_count exceeds reasonable limit")
+    if request.iterations < 1:
+        request.iterations = 1
+    if request.iterations > 30:
+        request.iterations = 30
+
+
+def _compute_curation(
+    request: CurationRequest,
+    on_iteration: Callable[[int], None] | None = None,
+) -> dict[str, Any]:
+    """Run the Monte Carlo curation and build the structured response dict.
+
+    ``on_iteration(i)`` (1-indexed) fires after each Monte Carlo iteration so
+    the async endpoint can advance its ProgressTracker; the sync endpoint
+    passes ``None`` and just runs to completion.
+
+    Raises ``LookupError`` if the album key doesn't resolve — callers decide
+    whether that surfaces as a 404 (sync endpoint) or a ProgressTracker error
+    (background task).
+    """
+    config_manager = get_config_manager()
+    album_config = config_manager.get_album(request.album)
+    if not album_config:
+        raise LookupError("Album not found")
+
+    index_path = Path(album_config.index)
+    vote_counter: Counter = Counter()
+
+    for i in range(request.iterations):
+        run_seed = random.randint(0, 1000000)
+        if request.method == "kmeans":
+            selected_files = get_kmeans_indices_global(
+                index_path, request.target_count, run_seed, request.excluded_indices
+            )
+        else:
+            selected_files = get_fps_indices_global(
+                index_path, request.target_count, run_seed, request.excluded_indices
+            )
+        vote_counter.update(selected_files)
+        if on_iteration is not None:
+            on_iteration(i + 1)
+
+    data = _open_npz_file(index_path)
+    filename_map = data["filename_map"]
+    norm_map = {os.path.normpath(k).lower(): v for k, v in filename_map.items()}
+
+    # Every image that received a vote goes into the analysis table; the
+    # exclusion check defends against algorithms that returned an excluded
+    # index (e.g. from index drift after a recent re-index).
+    analysis_results = []
+    for filepath, count in vote_counter.most_common():
+        f_norm = os.path.normpath(filepath).lower()
+        if f_norm in norm_map:
+            idx = int(norm_map[f_norm])
+            if idx in request.excluded_indices:
+                continue
+
+            analysis_results.append({
+                "filename": os.path.basename(filepath),
+                "subfolder": os.path.basename(os.path.dirname(filepath)),
+                "filepath": filepath,
+                "index": idx,
+                "count": count,
+                "frequency": round((count / request.iterations) * 100, 1),
+            })
+
+    # Top-N consensus winners.
+    consensus_files = [x["filepath"] for x in analysis_results[: request.target_count]]
+    selected_indices: list[int] = []
+    final_file_list: list[str] = []
+    for f in consensus_files:
+        f_norm = os.path.normpath(f).lower()
+        if f_norm in norm_map:
+            selected_indices.append(int(norm_map[f_norm]))
+            final_file_list.append(f)
+
+    return {
+        "status": "success",
+        "count": len(selected_indices),
+        "target_count": request.target_count,
+        "selected_indices": selected_indices,
+        "selected_files": final_file_list,
+        "analysis_results": analysis_results,
+    }
+
+
 def _run_curation_task(job_id: str, request: CurationRequest):
     """
     Background task to run curation process with progress tracking.
     """
     try:
-        # Start progress tracking
         progress_tracker.start_operation(job_id, request.iterations, "curating")
-
-        config_manager = get_config_manager()
-        album_config = config_manager.get_album(request.album)
-        if not album_config:
-            progress_tracker.set_error(job_id, "Album not found")
-            return
-
-        index_path = Path(album_config.index)
         logger.info(f"Curation Job {job_id}: Running {request.method.upper()} x{request.iterations}...")
 
-        vote_counter = Counter()
-
-        # Run Monte Carlo with progress updates
-        for i in range(request.iterations):
-            run_seed = random.randint(0, 1000000)
-            if request.method == "kmeans":
-                selected_files = get_kmeans_indices_global(
-                    index_path, request.target_count, run_seed, request.excluded_indices
-                )
-            else:
-                selected_files = get_fps_indices_global(
-                    index_path, request.target_count, run_seed, request.excluded_indices
-                )
-            vote_counter.update(selected_files)
-
-            # Update progress after each iteration
+        def _on_iter(i: int) -> None:
             progress_tracker.update_progress(
-                job_id,
-                i + 1,
-                f"Iteration {i + 1}/{request.iterations}"
+                job_id, i, f"Iteration {i}/{request.iterations}"
             )
 
-        # Prepare Data needed for mapping
-        data = _open_npz_file(index_path)
-        filename_map = data["filename_map"]
-        norm_map = {os.path.normpath(k).lower(): v for k, v in filename_map.items()}
-
-        # Generate CSV Data (Analysis Results)
-        analysis_results = []
-        for filepath, count in vote_counter.most_common():
-            f_norm = os.path.normpath(filepath).lower()
-            if f_norm in norm_map:
-                idx = int(norm_map[f_norm])
-
-                if idx in request.excluded_indices:
-                    continue
-
-                subfolder = os.path.basename(os.path.dirname(filepath))
-
-                analysis_results.append({
-                    "filename": os.path.basename(filepath),
-                    "subfolder": subfolder,
-                    "filepath": filepath,
-                    "index": idx,
-                    "count": count,
-                    "frequency": round((count / request.iterations) * 100, 1)
-                })
-
-        # Generate Selection (top N winners)
-        consensus_files = [x['filepath'] for x in analysis_results[:request.target_count]]
-
-        selected_indices = []
-        final_file_list = []
-
-        for f in consensus_files:
-            f_norm = os.path.normpath(f).lower()
-            if f_norm in norm_map:
-                selected_indices.append(int(norm_map[f_norm]))
-                final_file_list.append(f)
-
-        result = {
-            "status": "success",
-            "count": len(selected_indices),
-            "target_count": request.target_count,
-            "selected_indices": selected_indices,
-            "selected_files": final_file_list,
-            "analysis_results": analysis_results
-        }
+        try:
+            result = _compute_curation(request, on_iteration=_on_iter)
+        except LookupError as exc:
+            progress_tracker.set_error(job_id, str(exc))
+            return
 
-        # Store result
         with _curation_results_lock:
             _curation_results[job_id] = result
 
-        # Mark as completed
         progress_tracker.complete_operation(job_id, "Curation completed")
         logger.info(f"Curation Job {job_id}: Completed successfully")
 
@@ -154,17 +178,7 @@ async def run_curation(request: CurationRequest, background_tasks: BackgroundTas
     Returns a job_id that can be used to poll for progress and results.
     """
     try:
-        # Validate target_count parameter
-        if request.target_count <= 0:
-            raise HTTPException(status_code=400, detail="target_count must be positive")
-        if request.target_count > 100000:
-            raise HTTPException(status_code=400, detail="target_count exceeds reasonable limit")
-
-        # Validate and cap iterations
-        if request.iterations < 1:
-            request.iterations = 1
-        if request.iterations > 30:
-            request.iterations = 30
+        _validate_curation_request(request)
 
         # Generate unique job ID
         job_id = f"curation_{uuid.uuid4().hex[:8]}"
@@ -178,6 +192,8 @@ async def run_curation(request: CurationRequest, background_tasks: BackgroundTas
             "iterations": request.iterations
         }
 
+    except HTTPException:
+        raise
     except Exception as e:
         logger.error(f"Failed to start curation: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e)) from e
@@ -239,90 +255,14 @@ async def run_curation_sync(request: CurationRequest):
         JSON response with status, selected indices, files, and analysis results.
     """
     try:
-        # Validate target_count parameter
-        if request.target_count <= 0:
-            raise HTTPException(status_code=400, detail="target_count must be positive")
-        if request.target_count > 100000:
-            raise HTTPException(status_code=400, detail="target_count exceeds reasonable limit")
-
-        # Validate and cap iterations
-        if request.iterations < 1:
-            request.iterations = 1
-        if request.iterations > 30:
-            request.iterations = 30
-
-        config_manager = get_config_manager()
-        album_config = config_manager.get_album(request.album)
-        if not album_config:
-            raise HTTPException(status_code=404, detail="Album not found")
-        index_path = Path(album_config.index)
-
+        _validate_curation_request(request)
         logger.info(f"Curation: Running {request.method.upper()} x{request.iterations}...")
+        return _compute_curation(request)
 
-        vote_counter = Counter()
-
-        # 1. Run Monte Carlo
-        for _i in range(request.iterations):
-            run_seed = random.randint(0, 1000000)
-            if request.method == "kmeans":
-                selected_files = get_kmeans_indices_global(
-                    index_path, request.target_count, run_seed, request.excluded_indices
-                )
-            else:
-                selected_files = get_fps_indices_global(
-                    index_path, request.target_count, run_seed, request.excluded_indices
-                )
-            vote_counter.update(selected_files)
-
-        # 2. Prepare Data needed for mapping
-        data = _open_npz_file(index_path)
-        filename_map = data["filename_map"]
-        norm_map = {os.path.normpath(k).lower(): v for k, v in filename_map.items()}
-
-        # 3. Generate CSV Data (Analysis Results) - Includes EVERY image that got a vote
-        analysis_results = []
-        for filepath, count in vote_counter.most_common():
-            f_norm = os.path.normpath(filepath).lower()
-            if f_norm in norm_map:
-                idx = int(norm_map[f_norm])
-
-                # CRITICAL FIX: Strictly enforce exclusion.
-                # Even if the algo returned it (e.g. due to index drift), we MUST drop it here.
-                if idx in request.excluded_indices:
-                    continue
-
-                subfolder = os.path.basename(os.path.dirname(filepath))
-
-                analysis_results.append({
-                    "filename": os.path.basename(filepath),
-                    "subfolder": subfolder,
-                    "filepath": filepath,
-                    "index": idx,
-                    "count": count,
-                    "frequency": round((count / request.iterations) * 100, 1)
-                })
-
-        # 4. Generate Selection (Green Dots) - Just the top N winners
-        consensus_files = [x['filepath'] for x in analysis_results[:request.target_count]]
-
-        selected_indices = []
-        final_file_list = []
-
-        for f in consensus_files:
-            f_norm = os.path.normpath(f).lower()
-            if f_norm in norm_map:
-                selected_indices.append(int(norm_map[f_norm]))
-                final_file_list.append(f)
-
-        return {
-            "status": "success",
-            "count": len(selected_indices),
-            "target_count": request.target_count,
-            "selected_indices": selected_indices,
-            "selected_files": final_file_list,
-            "analysis_results": analysis_results
-        }
-
+    except HTTPException:
+        raise
+    except LookupError as e:
+        raise HTTPException(status_code=404, detail=str(e)) from e
     except Exception as e:
         logger.error(f"Curation Error: {e}")
         raise HTTPException(status_code=500, detail=str(e)) from e
diff --git a/photomap/backend/routers/index.py b/photomap/backend/routers/index.py
index 6f9e479b..30db6875 100644
--- a/photomap/backend/routers/index.py
+++ b/photomap/backend/routers/index.py
@@ -16,7 +16,12 @@
 from ..config import get_config_manager
 from ..embeddings import Embeddings, peek_encoder_spec
 from ..progress import progress_tracker
-from .album import check_album_lock, validate_album_exists, validate_image_access
+from .album import (
+    check_album_lock,
+    get_embeddings_for_album,
+    validate_album_exists,
+    validate_image_access,
+)
 
 index_router = APIRouter()
 
@@ -253,10 +258,7 @@ async def delete_image(album_key: str, index: int) -> JSONResponse:
     check_album_lock()  # May raise a 403 exception
     try:
         album_config = validate_album_exists(album_key)
-        embeddings = Embeddings(
-            embeddings_path=Path(album_config.index),
-            encoder_spec=album_config.encoder_spec,
-        )
+        embeddings = get_embeddings_for_album(album_key)
         image_path = embeddings.get_image_path(index)
 
         if not validate_image_access(album_config, image_path):
@@ -289,10 +291,7 @@ async def move_images(album_key: str, req: MoveImagesRequest) -> JSONResponse:
     check_album_lock()  # May raise a 403 exception
     try:
         album_config = validate_album_exists(album_key)
-        embeddings = Embeddings(
-            embeddings_path=Path(album_config.index),
-            encoder_spec=album_config.encoder_spec,
-        )
+        embeddings = get_embeddings_for_album(album_key)
 
         target_dir = Path(req.target_directory)
 
@@ -380,10 +379,7 @@ async def copy_images(album_key: str, req: CopyImagesRequest) -> JSONResponse:
     # Note: No album lock check - copying doesn't modify the album
     try:
         album_config = validate_album_exists(album_key)
-        embeddings = Embeddings(
-            embeddings_path=Path(album_config.index),
-            encoder_spec=album_config.encoder_spec,
-        )
+        embeddings = get_embeddings_for_album(album_key)
 
         target_dir = Path(req.target_directory)
 
diff --git a/photomap/frontend/static/javascript/album-manager.js b/photomap/frontend/static/javascript/album-manager.js
index 6eb37713..338046c6 100644
--- a/photomap/frontend/static/javascript/album-manager.js
+++ b/photomap/frontend/static/javascript/album-manager.js
@@ -931,29 +931,6 @@ export class AlbumManager {
   }
 
   // Path field methods
-  createPathField(path = "", cardElement) {
-    const container = cardElement.querySelector(".edit-album-paths-container");
-    return this._createAlbumPathRow({
-      path,
-      container,
-      onAddRow: () => this.addPathField("", cardElement),
-      onRemoveRow: () => {
-        if (container && container.children.length === 0) {
-          this.addPathField("", cardElement);
-        }
-      },
-      onFolderPick: (currentPath, setPath) => {
-        createSimpleDirectoryPicker(
-          (selectedPath) => {
-            setPath(selectedPath);
-          },
-          currentPath,
-          { showCreateFolder: true }
-        );
-      },
-    });
-  }
-
   addPathField(path = "", cardElement) {
     const container = cardElement.querySelector(".edit-album-paths-container");
     if (container) {
diff --git a/photomap/frontend/static/javascript/seek-slider.js b/photomap/frontend/static/javascript/seek-slider.js
index f334c156..29f269ab 100644
--- a/photomap/frontend/static/javascript/seek-slider.js
+++ b/photomap/frontend/static/javascript/seek-slider.js
@@ -1,6 +1,6 @@
 import { backStack } from "./back-stack.js";
 import { bookmarkManager } from "./bookmarks.js";
-import { ScoreDisplay } from "./score-display.js";
+import { scoreDisplay } from "./score-display.js";
 import { getCurrentSlideIndex, slideState } from "./slide-state.js";
 import { state } from "./state.js";
 import { debounce } from "./utils.js";
@@ -35,7 +35,10 @@ class SeekSlider {
     this.sliderContainer = document.getElementById("sliderWithTicksContainer");
     this.scoreDisplayElement = document.getElementById("fixedScoreDisplay");
     this.scoreSliderRow = document.getElementById("scoreSliderRow");
-    this.scoreDisplayObj = new ScoreDisplay();
+    // Share the singleton from score-display.js. Constructing a second
+    // instance double-binds the star click handler on #scoreText, so a
+    // bookmark toggle would fire twice and immediately cancel itself.
+    this.scoreDisplayObj = scoreDisplay;
     this.hoverStrip = document.getElementById("sliderHoverStrip");
 
     this.scoreText = document.getElementById("scoreText");