github · RbBtSn0w · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
@@ -35,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- feat(cli): support for JSONC (JSON with comments) in VSCode settings.json merging
+- feat(cli): polite deep merge for JSON files that preserves existing user settings
+- feat(cli): basic validation for JSON object structures during initialization
+- feat(cli): strip_json_comments utility for handling VSCode configuration files
 - feat(presets): Pluggable preset system with preset catalog and template resolver
 - Preset manifest (`preset.yml`) with validation for artifact, command, and script types
 - `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py`

@@ -661,7 +661,10 @@ def log(message, color="green"):
 
     try:
         with open(sub_item, 'r', encoding='utf-8') as f:
-            new_settings = json.load(f)
+            raw_new = f.read()
+            # Handle comments even in template files for consistency
+            clean_new = strip_json_comments(raw_new)
+            new_settings = json.loads(clean_new)
 
         if dest_file.exists():
             merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker)
@@ -677,14 +680,102 @@ def log(message, color="green"):
         log(f"Warning: Could not merge, copying instead: {e}", "yellow")
         shutil.copy2(sub_item, dest_file)
 
+def strip_json_comments(content: str) -> str:
+    """Remove // and /* */ comments from a JSON string, respecting string literals.
+
+    This implementation handles VSCode settings.json files by:
+    1. Skipping comments inside "string literals"
+    2. Correctly handling escaped quotes by counting backslashes
+    3. Removing single-line (//) and multi-line (/* */) comments
+    4. Handling trailing commas safely (only outside strings/comments)
+    """
+    import re
+
+    result = []
+    i = 0
+    length = len(content)
+    in_string = False
+    string_char = None
+
+    while i < length:
+        char = content[i]
+
+        # Handle string literals
+        if char in ('"', "'"):
+            # Check if this quote is escaped by counting backslashes before it
+            bs_count = 0
+            j = i - 1
+            while j >= 0 and content[j] == '\\':
+                bs_count += 1
+                j -= 1
+
+            is_escaped = (bs_count % 2 != 0)
+
+            if not is_escaped:
+                if not in_string:
+                    in_string = True
+                    string_char = char
+                elif char == string_char:
+                    in_string = False
+
+            result.append(char)
+            i += 1
+            continue
+
+        if in_string:
+            result.append(char)
+            i += 1
+            continue
+
+        # Handle single-line comments //
+        if i + 1 < length and content[i:i+2] == '//':
+            i += 2
+            while i < length and content[i] != '\n':
+                i += 1
+            continue
+
+        # Handle multi-line comments /* */
+        if i + 1 < length and content[i:i+2] == '/*':
+            i += 2
+            while i + 1 < length and content[i:i+2] != '*/':
+                i += 1
+            if i + 1 < length:
+                i += 2
+            else:
+                i = length # End of content
+            continue
+
+        # Handle trailing commas (safe because we're outside strings and comments)
+        if char == ',':
+            # Peek ahead to find the next non-whitespace character
+            next_i = i + 1
+            is_trailing = False
+            while next_i < length:
+                next_char = content[next_i]
+                if next_char in (' ', '\t', '\n', '\r'):
+                    next_i += 1
+                    continue
+                if next_char in (']', '}'):
+                    is_trailing = True
+                break
+
+            if is_trailing:
+                i += 1 # Skip the comma
+                continue
+
+        result.append(char)
+        i += 1
+
+    return "".join(result)
+
 def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict:
     """Merge new JSON content into existing JSON file.
 
-    Performs a deep merge where:
+    Performs a polite deep merge where:
     - New keys are added
-    - Existing keys are preserved unless overwritten by new content
+    - Existing keys are PRESERVED (not overwritten) unless they are dictionaries
     - Nested dictionaries are merged recursively
-    - Lists and other values are replaced (not merged)
+    - Lists and other values are preserved from base if they exist
 
     Args:
         existing_path: Path to existing JSON file
@@ -694,26 +785,54 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
     Returns:
         Merged JSON content as dict
     """
-    try:
-        with open(existing_path, 'r', encoding='utf-8') as f:
-            existing_content = json.load(f)
-    except (FileNotFoundError, json.JSONDecodeError):
-        # If file doesn't exist or is invalid, just use new content
+    # Load existing content first to have a safe fallback
+    existing_content = {}
+    exists = existing_path.exists()
+
+    if exists:
+        try:
+            with open(existing_path, 'r', encoding='utf-8') as f:
+                raw_content = f.read().lstrip('\ufeff')
+                # Handle comments (JSONC)
+                clean_content = strip_json_comments(raw_content)
+                existing_content = json.loads(clean_content)
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            if verbose and not isinstance(e, FileNotFoundError):
+                console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}).[/yellow]")
+            # fallback to empty dict for merging
+
+    # Validate template content
+    if not isinstance(new_content, dict):
+        if verbose:
+            console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]")
+        return existing_content if isinstance(existing_content, dict) else {}
+
+    if not exists:
         return new_content
 
-    def deep_merge(base: dict, update: dict) -> dict:
-        """Recursively merge update dict into base dict."""
+    # If existing content parsed but is not a dict, skip merge to avoid data loss
+    if not isinstance(existing_content, dict):
+        if verbose:
+            console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]")
+        return existing_content
+
+    def deep_merge_polite(base: dict, update: dict) -> dict:
+        """Recursively merge update dict into base dict, preserving base values."""
         result = base.copy()
         for key, value in update.items():
-            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            if key not in result:
+                # Add new key
+                result[key] = value
+            elif isinstance(result[key], dict) and isinstance(value, dict):
                 # Recursively merge nested dictionaries
-                result[key] = deep_merge(result[key], value)
+                result[key] = deep_merge_polite(result[key], value)
             else:
-                # Add new key or replace existing value
-                result[key] = value
+                # Key already exists and is not a dict, PRESERVE existing value
+                # This ensures user settings aren't overwritten by template defaults
+                pass
         return result
 
-    merged = deep_merge(existing_content, new_content)
+    merged = deep_merge_polite(existing_content, new_content)
 
     if verbose:
         console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}")

@@ -0,0 +1,184 @@
+import json
+from pathlib import Path
+from specify_cli import strip_json_comments, merge_json_files
+
+# --- Dimension 1: JSONC Parsing & Tokenization ---
+
+def test_strip_json_comments_edge_cases():
+    """Test complex comment scenarios and string protections."""
+    jsonc = """
+    {
+        "url": "https://github.com/spec-kit", // URL with //
+        "path": "C:\\\\Users\\\\Admin\\\\Documents", /* Path with backslashes */
+        "escaped": "String with \\\"quote\\\" inside",
+        "nested_comments": "This /* is not */ a comment",
+        "mixed": {
+            "a": [1, /* mid-array comment */ 2],
+            "b": 3 // end of line
+        },
+        "even_backslashes": "\\\\\\\\",
+        "odd_backslashes": "\\\\\\\""
+    }
+    """
+    stripped = strip_json_comments(jsonc)
+    parsed = json.loads(stripped)
+    assert parsed["url"] == "https://github.com/spec-kit"
+    assert parsed["path"] == "C:\\Users\\Admin\\Documents"
+    assert parsed["escaped"] == 'String with "quote" inside'
+    assert parsed["nested_comments"] == "This /* is not */ a comment"
+    assert parsed["mixed"]["a"] == [1, 2]
+    assert parsed["even_backslashes"] == "\\\\"
+    assert parsed["odd_backslashes"] == "\\\""
+
+def test_strip_json_comments_trailing_commas_advanced():
+    """Test various trailing comma positions (nested, arrays, objects)."""
+    jsonc = """
+    {
+        "list": [
+            {"id": 1,},
+            {"id": 2,},
+        ],
+        "metadata": {
+            "tags": ["a", "b", ],
+        },
+    }
+    """
+    stripped = strip_json_comments(jsonc)
+    parsed = json.loads(stripped)
+    assert len(parsed["list"]) == 2
+    assert parsed["list"][0]["id"] == 1
+    assert parsed["metadata"]["tags"] == ["a", "b"]
+
+def test_strip_json_comments_safe_commas_in_strings():
+    """Verify that commas followed by closing characters INSIDE strings are not removed."""
+    jsonc = """
+    {
+        "tricky_string": "value, ]",
+        "another_one": "nested, }",
+        "normal": [1, 2, ]
+    }
+    """
+    stripped = strip_json_comments(jsonc)
+    parsed = json.loads(stripped)
+    assert parsed["tricky_string"] == "value, ]"
+    assert parsed["another_one"] == "nested, }"
+    assert parsed["normal"] == [1, 2]
+
+def test_strip_json_comments_whitespace():
+    """Ensure stripping works with various whitespace and empty values."""
+    jsonc = "{   \n\n   \"a\": 1\n, \n \"b\": 2 // comment \n }"
+    parsed = json.loads(strip_json_comments(jsonc))
+    assert parsed == {"a": 1, "b": 2}
+
+# --- Dimension 2: Polite Deep Merge Strategy ---
+
+def test_merge_json_files_type_mismatch_preservation(tmp_path):
+    """If user has a string but template wants a dict, PRESERVE user's string."""
+    existing_file = tmp_path / "settings.json"
+    # User might have overridden a setting with a simple string or different type
+    existing_file.write_text('{"chat.editor.fontFamily": "CustomFont"}')
+
+    # Template might expect a dict for the same key (hypothetically)
+    new_settings = {
+        "chat.editor.fontFamily": {"font": "TemplateFont"}
+    }
+
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged["chat.editor.fontFamily"] == "CustomFont"
+
+def test_merge_json_files_deep_nesting(tmp_path):
+    """Verify deep recursive merging of new keys."""
+    existing_file = tmp_path / "settings.json"
+    existing_file.write_text("""
+    {
+        "a": {
+            "b": {
+                "c": 1
+            }
+        }
+    }
+    """)
+
+    new_settings = {
+        "a": {
+            "b": {
+                "d": 2  # New nested key
+            },
+            "e": 3      # New mid-level key
+        }
+    }
+
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged["a"]["b"]["c"] == 1
+    assert merged["a"]["b"]["d"] == 2
+    assert merged["a"]["e"] == 3
+
+def test_merge_json_files_empty_existing(tmp_path):
+    """Merging into an empty/new file."""
+    existing_file = tmp_path / "empty.json"
+    existing_file.write_text("{}")
+
+    new_settings = {"a": 1}
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged == {"a": 1}
+
+# --- Dimension 3: Real-world Simulation ---
+
+def test_merge_vscode_realistic_scenario(tmp_path):
+    """A realistic VSCode settings.json with many existing preferences."""
+    existing_file = tmp_path / "vscode_settings.json"
+    existing_file.write_text("""
+    {
+        "editor.fontSize": 12,
+        "editor.formatOnSave": true,
+        "files.exclude": {
+            "**/.git": true,
+            "**/node_modules": true
+        },
+        "chat.promptFilesRecommendations": {
+            "existing.tool": true
+        } // User comment
+    }
+    """)
+
+    template_settings = {
+        "chat.promptFilesRecommendations": {
+            "speckit.specify": True,
+            "speckit.plan": True
+        },
+        "chat.tools.terminal.autoApprove": {
+            ".specify/scripts/bash/": True
+        }
+    }
+
+    merged = merge_json_files(existing_file, template_settings)
+
+    # Check preservation
+    assert merged["editor.fontSize"] == 12
+    assert merged["files.exclude"]["**/.git"] is True
+    assert merged["chat.promptFilesRecommendations"]["existing.tool"] is True
+
+    # Check additions
+    assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True
+    assert merged["chat.tools.terminal.autoApprove"][".specify/scripts/bash/"] is True
+
+# --- Dimension 4: Error Handling & Robustness ---
+
+def test_merge_json_files_with_bom(tmp_path):
+    """Test files with UTF-8 BOM (sometimes created on Windows)."""
+    existing_file = tmp_path / "bom.json"
+    content = '{"a": 1}'
+    # Prepend UTF-8 BOM
+    existing_file.write_bytes(b'\xef\xbb\xbf' + content.encode('utf-8'))
+
+    new_settings = {"b": 2}
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged == {"a": 1, "b": 2}
+
+def test_merge_json_files_not_a_dictionary_template(tmp_path):
+    """If for some reason new_content is not a dict, PRESERVE existing settings instead of wiping."""
+    existing_file = tmp_path / "ok.json"
+    existing_file.write_text('{"a": 1}')
+
+    # Secure fallback: return existing settings to avoid clobbering
+    assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1}