From 2776f44fc9592df1c71fb2c00fb0ed5df475a819 Mon Sep 17 00:00:00 2001
From: rbbtsn0w <hamiltonsnow@gmail.com>
Date: Mon, 16 Mar 2026 11:30:38 +0800
Subject: [PATCH 1/5] feat(cli): implement polite deep merge for settings.json
 and support JSONC

---
 CHANGELOG.md                |  4 +++
 src/specify_cli/__init__.py | 59 +++++++++++++++++++++++++-------
 tests/test_merge.py         | 68 +++++++++++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_merge.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b1b6a47a..e701f207f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- feat(cli): support for JSONC (JSON with comments) in VSCode settings.json merging
+- feat(cli): polite deep merge for JSON files that preserves existing user settings
+- feat(cli): basic validation for JSON object structures during initialization
+- feat(cli): strip_json_comments utility for handling VSCode configuration files
 - feat(presets): Pluggable preset system with preset catalog and template resolver
 - Preset manifest (`preset.yml`) with validation for artifact, command, and script types
 - `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py`
diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index a45535aee..2eac8f1b0 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -661,7 +661,10 @@ def log(message, color="green"):
 
     try:
         with open(sub_item, 'r', encoding='utf-8') as f:
-            new_settings = json.load(f)
+            raw_new = f.read()
+            # Handle comments even in template files for consistency
+            clean_new = strip_json_comments(raw_new)
+            new_settings = json.loads(clean_new)
 
         if dest_file.exists():
             merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker)
@@ -677,14 +680,26 @@ def log(message, color="green"):
         log(f"Warning: Could not merge, copying instead: {e}", "yellow")
         shutil.copy2(sub_item, dest_file)
 
+def strip_json_comments(content: str) -> str:
+    """Remove // and /* */ comments from a JSON string.
+    
+    This is a basic implementation to handle VSCode settings.json files.
+    """
+    import re
+    # Remove single-line comments
+    content = re.sub(r'//.*$', '', content, flags=re.MULTILINE)
+    # Remove multi-line comments
+    content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
+    return content
+
 def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict:
     """Merge new JSON content into existing JSON file.
 
-    Performs a deep merge where:
+    Performs a polite deep merge where:
     - New keys are added
-    - Existing keys are preserved unless overwritten by new content
+    - Existing keys are PRESERVED (not overwritten) unless they are dictionaries
     - Nested dictionaries are merged recursively
-    - Lists and other values are replaced (not merged)
+    - Lists and other values are preserved from base if they exist
 
     Args:
         existing_path: Path to existing JSON file
@@ -696,24 +711,42 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
     """
     try:
         with open(existing_path, 'r', encoding='utf-8') as f:
-            existing_content = json.load(f)
-    except (FileNotFoundError, json.JSONDecodeError):
+            raw_content = f.read()
+            # Handle comments (JSONC)
+            clean_content = strip_json_comments(raw_content)
+            existing_content = json.loads(clean_content)
+            
+        if not isinstance(existing_content, dict):
+             if verbose:
+                 console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]")
+             return new_content
+             
+    except (FileNotFoundError, json.JSONDecodeError) as e:
         # If file doesn't exist or is invalid, just use new content
+        if verbose and not isinstance(e, FileNotFoundError):
+             console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]")
         return new_content
 
-    def deep_merge(base: dict, update: dict) -> dict:
-        """Recursively merge update dict into base dict."""
+    if not isinstance(new_content, dict):
+        return existing_content
+
+    def deep_merge_polite(base: dict, update: dict) -> dict:
+        """Recursively merge update dict into base dict, preserving base values."""
         result = base.copy()
         for key, value in update.items():
-            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            if key not in result:
+                # Add new key
+                result[key] = value
+            elif isinstance(result[key], dict) and isinstance(value, dict):
                 # Recursively merge nested dictionaries
-                result[key] = deep_merge(result[key], value)
+                result[key] = deep_merge_polite(result[key], value)
             else:
-                # Add new key or replace existing value
-                result[key] = value
+                # Key already exists and is not a dict, PRESERVE existing value
+                # This ensures user settings aren't overwritten by template defaults
+                pass
         return result
 
-    merged = deep_merge(existing_content, new_content)
+    merged = deep_merge_polite(existing_content, new_content)
 
     if verbose:
         console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}")
diff --git a/tests/test_merge.py b/tests/test_merge.py
new file mode 100644
index 000000000..4baa03ef1
--- /dev/null
+++ b/tests/test_merge.py
@@ -0,0 +1,68 @@
+import json
+from pathlib import Path
+import pytest
+from specify_cli import strip_json_comments, merge_json_files
+
+def test_strip_json_comments():
+    jsonc = """
+    {
+        // Single line comment
+        "a": 1,
+        "b": "value", /* Multi-line
+                         comment */
+        "c": 3 // Another one
+    }
+    """
+    stripped = strip_json_comments(jsonc)
+    parsed = json.loads(stripped)
+    assert parsed["a"] == 1
+    assert parsed["b"] == "value"
+    assert parsed["c"] == 3
+
+def test_merge_json_files_polite(tmp_path):
+    existing_file = tmp_path / "settings.json"
+    existing_content = """
+    {
+        "editor.fontSize": 14,
+        "editor.tabSize": 4,
+        "chat.promptFilesRecommendations": {
+            "my.cmd": true
+        }
+    }
+    """
+    existing_file.write_text(existing_content)
+    
+    new_settings = {
+        "editor.tabSize": 2, # Should be preserved as 4
+        "chat.promptFilesRecommendations": {
+            "speckit.specify": True,
+            "my.cmd": False # Should be preserved as True
+        },
+        "new.key": "new.value" # Should be added
+    }
+    
+    merged = merge_json_files(existing_file, new_settings)
+    
+    assert merged["editor.fontSize"] == 14
+    assert merged["editor.tabSize"] == 4
+    assert merged["chat.promptFilesRecommendations"]["my.cmd"] is True
+    assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True
+    assert merged["new.key"] == "new.value"
+
+def test_merge_json_files_invalid_existing(tmp_path):
+    existing_file = tmp_path / "invalid.json"
+    existing_file.write_text("{ invalid json")
+    
+    new_settings = {"a": 1}
+    # Should fallback to new_settings
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged == {"a": 1}
+
+def test_merge_json_files_non_dict_existing(tmp_path):
+    existing_file = tmp_path / "array.json"
+    existing_file.write_text("[1, 2, 3]")
+    
+    new_settings = {"a": 1}
+    # Should fallback to new_settings
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged == {"a": 1}

From eaf7fb21c4995ea7ed58684aaae18db755a51d90 Mon Sep 17 00:00:00 2001
From: rbbtsn0w <hamiltonsnow@gmail.com>
Date: Mon, 16 Mar 2026 11:37:33 +0800
Subject: [PATCH 2/5] refactor(cli): improve JSONC parsing to handle strings
 and trailing commas, fix indentation

---
 src/specify_cli/__init__.py | 71 +++++++++++++++++++++++++++++++------
 tests/test_merge.py         | 32 ++++++++++++++++-
 2 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 2eac8f1b0..91d2c052b 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -681,16 +681,65 @@ def log(message, color="green"):
         shutil.copy2(sub_item, dest_file)
 
 def strip_json_comments(content: str) -> str:
-    """Remove // and /* */ comments from a JSON string.
+    """Remove // and /* */ comments from a JSON string, respecting string literals.
     
-    This is a basic implementation to handle VSCode settings.json files.
+    This implementation handles VSCode settings.json files by:
+    1. Skipping comments inside "string literals"
+    2. Removing single-line (//) and multi-line (/* */) comments
+    3. Handling trailing commas that JSONC supports but standard JSON does not
     """
     import re
-    # Remove single-line comments
-    content = re.sub(r'//.*$', '', content, flags=re.MULTILINE)
-    # Remove multi-line comments
-    content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
-    return content
+    
+    result = []
+    i = 0
+    length = len(content)
+    in_string = False
+    string_char = None
+    
+    while i < length:
+        char = content[i]
+        
+        # Handle string literals
+        if char in ('"', "'") and (i == 0 or content[i-1] != '\\'):
+            if not in_string:
+                in_string = True
+                string_char = char
+            elif char == string_char:
+                in_string = False
+            result.append(char)
+            i += 1
+            continue
+            
+        if in_string:
+            result.append(char)
+            i += 1
+            continue
+            
+        # Handle single-line comments //
+        if i + 1 < length and content[i:i+2] == '//':
+            i += 2
+            while i < length and content[i] != '\n':
+                i += 1
+            continue
+            
+        # Handle multi-line comments /* */
+        if i + 1 < length and content[i:i+2] == '/*':
+            i += 2
+            while i + 1 < length and content[i:i+2] != '*/':
+                i += 1
+            i += 2
+            continue
+            
+        result.append(char)
+        i += 1
+        
+    clean_content = "".join(result)
+    
+    # Handle trailing commas: find a comma followed by closing brace/bracket
+    # This regex handles cases like [1, 2, ] or {"a": 1, }
+    clean_content = re.sub(r',\s*([\]}])', r'\1', clean_content)
+    
+    return clean_content
 
 def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict:
     """Merge new JSON content into existing JSON file.
@@ -717,14 +766,14 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
             existing_content = json.loads(clean_content)
             
         if not isinstance(existing_content, dict):
-             if verbose:
-                 console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]")
-             return new_content
+            if verbose:
+                console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]")
+            return new_content
              
     except (FileNotFoundError, json.JSONDecodeError) as e:
         # If file doesn't exist or is invalid, just use new content
         if verbose and not isinstance(e, FileNotFoundError):
-             console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]")
+            console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]")
         return new_content
 
     if not isinstance(new_content, dict):
diff --git a/tests/test_merge.py b/tests/test_merge.py
index 4baa03ef1..3e4f92887 100644
--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -3,7 +3,7 @@
 import pytest
 from specify_cli import strip_json_comments, merge_json_files
 
-def test_strip_json_comments():
+def test_strip_json_comments_basic():
     jsonc = """
     {
         // Single line comment
@@ -19,6 +19,36 @@ def test_strip_json_comments():
     assert parsed["b"] == "value"
     assert parsed["c"] == 3
 
+def test_strip_json_comments_with_urls():
+    # URL in string should NOT be stripped
+    jsonc = """
+    {
+        "url": "https://github.com/spec-kit",
+        "path": "//local/path",
+        "commented": 1 // This is a comment
+    }
+    """
+    stripped = strip_json_comments(jsonc)
+    parsed = json.loads(stripped)
+    assert parsed["url"] == "https://github.com/spec-kit"
+    assert parsed["path"] == "//local/path"
+    assert parsed["commented"] == 1
+
+def test_strip_json_comments_trailing_commas():
+    # Trailing commas in objects and arrays
+    jsonc = """
+    {
+        "array": [1, 2, 3, ],
+        "obj": {
+            "a": 1,
+        },
+    }
+    """
+    stripped = strip_json_comments(jsonc)
+    parsed = json.loads(stripped)
+    assert parsed["array"] == [1, 2, 3]
+    assert parsed["obj"]["a"] == 1
+
 def test_merge_json_files_polite(tmp_path):
     existing_file = tmp_path / "settings.json"
     existing_content = """

From f28e9c11465c5d0c28b6bc06d7180ccdfd44abd1 Mon Sep 17 00:00:00 2001
From: rbbtsn0w <hamiltonsnow@gmail.com>
Date: Mon, 16 Mar 2026 11:44:14 +0800
Subject: [PATCH 3/5] test(cli): add multi-dimensional tests for JSONC and BOM
 handling, improve merge robustness

---
 src/specify_cli/__init__.py |   2 +-
 tests/test_merge.py         | 183 ++++++++++++++++++++++++------------
 2 files changed, 126 insertions(+), 59 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 91d2c052b..62bb79a5f 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -760,7 +760,7 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
     """
     try:
         with open(existing_path, 'r', encoding='utf-8') as f:
-            raw_content = f.read()
+            raw_content = f.read().lstrip('\ufeff')
             # Handle comments (JSONC)
             clean_content = strip_json_comments(raw_content)
             existing_content = json.loads(clean_content)
diff --git a/tests/test_merge.py b/tests/test_merge.py
index 3e4f92887..81605bf91 100644
--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -3,96 +3,163 @@
 import pytest
 from specify_cli import strip_json_comments, merge_json_files
 
-def test_strip_json_comments_basic():
-    jsonc = """
-    {
-        // Single line comment
-        "a": 1,
-        "b": "value", /* Multi-line
-                         comment */
-        "c": 3 // Another one
-    }
-    """
-    stripped = strip_json_comments(jsonc)
-    parsed = json.loads(stripped)
-    assert parsed["a"] == 1
-    assert parsed["b"] == "value"
-    assert parsed["c"] == 3
+# --- Dimension 1: JSONC Parsing & Tokenization ---
 
-def test_strip_json_comments_with_urls():
-    # URL in string should NOT be stripped
+def test_strip_json_comments_edge_cases():
+    """Test complex comment scenarios and string protections."""
     jsonc = """
     {
-        "url": "https://github.com/spec-kit",
-        "path": "//local/path",
-        "commented": 1 // This is a comment
+        "url": "https://github.com/spec-kit", // URL with //
+        "path": "C:\\\\Users\\\\Admin\\\\Documents", /* Path with backslashes */
+        "escaped": "String with \\\"quote\\\" inside",
+        "nested_comments": "This /* is not */ a comment",
+        "mixed": {
+            "a": [1, /* mid-array comment */ 2],
+            "b": 3 // end of line
+        }
     }
     """
     stripped = strip_json_comments(jsonc)
     parsed = json.loads(stripped)
     assert parsed["url"] == "https://github.com/spec-kit"
-    assert parsed["path"] == "//local/path"
-    assert parsed["commented"] == 1
+    assert parsed["path"] == "C:\\Users\\Admin\\Documents"
+    assert parsed["escaped"] == 'String with "quote" inside'
+    assert parsed["nested_comments"] == "This /* is not */ a comment"
+    assert parsed["mixed"]["a"] == [1, 2]
 
-def test_strip_json_comments_trailing_commas():
-    # Trailing commas in objects and arrays
+def test_strip_json_comments_trailing_commas_advanced():
+    """Test various trailing comma positions (nested, arrays, objects)."""
     jsonc = """
     {
-        "array": [1, 2, 3, ],
-        "obj": {
-            "a": 1,
+        "list": [
+            {"id": 1,},
+            {"id": 2,},
+        ],
+        "metadata": {
+            "tags": ["a", "b", ],
         },
     }
     """
     stripped = strip_json_comments(jsonc)
     parsed = json.loads(stripped)
-    assert parsed["array"] == [1, 2, 3]
-    assert parsed["obj"]["a"] == 1
+    assert len(parsed["list"]) == 2
+    assert parsed["list"][0]["id"] == 1
+    assert parsed["metadata"]["tags"] == ["a", "b"]
+
+def test_strip_json_comments_whitespace():
+    """Ensure stripping works with various whitespace and empty values."""
+    jsonc = "{   \n\n   \"a\": 1\n, \n \"b\": 2 // comment \n }"
+    parsed = json.loads(strip_json_comments(jsonc))
+    assert parsed == {"a": 1, "b": 2}
+
+# --- Dimension 2: Polite Deep Merge Strategy ---
+
+def test_merge_json_files_type_mismatch_preservation(tmp_path):
+    """If user has a string but template wants a dict, PRESERVE user's string."""
+    existing_file = tmp_path / "settings.json"
+    # User might have overridden a setting with a simple string or different type
+    existing_file.write_text('{"chat.editor.fontFamily": "CustomFont"}')
+    
+    # Template might expect a dict for the same key (hypothetically)
+    new_settings = {
+        "chat.editor.fontFamily": {"font": "TemplateFont"}
+    }
+    
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged["chat.editor.fontFamily"] == "CustomFont"
 
-def test_merge_json_files_polite(tmp_path):
+def test_merge_json_files_deep_nesting(tmp_path):
+    """Verify deep recursive merging of new keys."""
     existing_file = tmp_path / "settings.json"
-    existing_content = """
+    existing_file.write_text("""
     {
-        "editor.fontSize": 14,
-        "editor.tabSize": 4,
-        "chat.promptFilesRecommendations": {
-            "my.cmd": true
+        "a": {
+            "b": {
+                "c": 1
+            }
         }
     }
-    """
-    existing_file.write_text(existing_content)
+    """)
     
     new_settings = {
-        "editor.tabSize": 2, # Should be preserved as 4
+        "a": {
+            "b": {
+                "d": 2  # New nested key
+            },
+            "e": 3      # New mid-level key
+        }
+    }
+    
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged["a"]["b"]["c"] == 1
+    assert merged["a"]["b"]["d"] == 2
+    assert merged["a"]["e"] == 3
+
+def test_merge_json_files_empty_existing(tmp_path):
+    """Merging into an empty/new file."""
+    existing_file = tmp_path / "empty.json"
+    existing_file.write_text("{}")
+    
+    new_settings = {"a": 1}
+    merged = merge_json_files(existing_file, new_settings)
+    assert merged == {"a": 1}
+
+# --- Dimension 3: Real-world Simulation ---
+
+def test_merge_vscode_realistic_scenario(tmp_path):
+    """A realistic VSCode settings.json with many existing preferences."""
+    existing_file = tmp_path / "vscode_settings.json"
+    existing_file.write_text("""
+    {
+        "editor.fontSize": 12,
+        "editor.formatOnSave": true,
+        "files.exclude": {
+            "**/.git": true,
+            "**/node_modules": true
+        },
+        "chat.promptFilesRecommendations": {
+            "existing.tool": true
+        } // User comment
+    }
+    """)
+    
+    template_settings = {
         "chat.promptFilesRecommendations": {
             "speckit.specify": True,
-            "my.cmd": False # Should be preserved as True
+            "speckit.plan": True
         },
-        "new.key": "new.value" # Should be added
+        "chat.tools.terminal.autoApprove": {
+            ".specify/scripts/bash/": True
+        }
     }
     
-    merged = merge_json_files(existing_file, new_settings)
+    merged = merge_json_files(existing_file, template_settings)
     
-    assert merged["editor.fontSize"] == 14
-    assert merged["editor.tabSize"] == 4
-    assert merged["chat.promptFilesRecommendations"]["my.cmd"] is True
+    # Check preservation
+    assert merged["editor.fontSize"] == 12
+    assert merged["files.exclude"]["**/.git"] is True
+    assert merged["chat.promptFilesRecommendations"]["existing.tool"] is True
+    
+    # Check additions
     assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True
-    assert merged["new.key"] == "new.value"
+    assert merged["chat.tools.terminal.autoApprove"][".specify/scripts/bash/"] is True
+
+# --- Dimension 4: Error Handling & Robustness ---
 
-def test_merge_json_files_invalid_existing(tmp_path):
-    existing_file = tmp_path / "invalid.json"
-    existing_file.write_text("{ invalid json")
+def test_merge_json_files_with_bom(tmp_path):
+    """Test files with UTF-8 BOM (sometimes created on Windows)."""
+    existing_file = tmp_path / "bom.json"
+    content = '{"a": 1}'
+    # Prepend UTF-8 BOM
+    existing_file.write_bytes(b'\xef\xbb\xbf' + content.encode('utf-8'))
     
-    new_settings = {"a": 1}
-    # Should fallback to new_settings
+    new_settings = {"b": 2}
     merged = merge_json_files(existing_file, new_settings)
-    assert merged == {"a": 1}
+    assert merged == {"a": 1, "b": 2}
 
-def test_merge_json_files_non_dict_existing(tmp_path):
-    existing_file = tmp_path / "array.json"
-    existing_file.write_text("[1, 2, 3]")
+def test_merge_json_files_not_a_dictionary_template(tmp_path):
+    """If for some reason new_content is not a dict, return existing."""
+    existing_file = tmp_path / "ok.json"
+    existing_file.write_text('{"a": 1}')
     
-    new_settings = {"a": 1}
-    # Should fallback to new_settings
-    merged = merge_json_files(existing_file, new_settings)
-    assert merged == {"a": 1}
+    assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1}

From 845b9f3d0866da71424b00aefe3611d0c871d647 Mon Sep 17 00:00:00 2001
From: rbbtsn0w <hamiltonsnow@gmail.com>
Date: Mon, 16 Mar 2026 11:50:03 +0800
Subject: [PATCH 4/5] fix(cli): implement robust backslash escape counting and
 improve JSONC/merge validation

---
 src/specify_cli/__init__.py | 72 +++++++++++++++++++++++++------------
 tests/test_merge.py         | 12 ++++---
 2 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 62bb79a5f..0166dc78e 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -685,8 +685,9 @@ def strip_json_comments(content: str) -> str:
     
     This implementation handles VSCode settings.json files by:
     1. Skipping comments inside "string literals"
-    2. Removing single-line (//) and multi-line (/* */) comments
-    3. Handling trailing commas that JSONC supports but standard JSON does not
+    2. Correctlly handling escaped quotes by counting backslashes
+    3. Removing single-line (//) and multi-line (/* */) comments
+    4. Handling trailing commas safely (only outside strings)
     """
     import re
     
@@ -700,12 +701,23 @@ def strip_json_comments(content: str) -> str:
         char = content[i]
         
         # Handle string literals
-        if char in ('"', "'") and (i == 0 or content[i-1] != '\\'):
-            if not in_string:
-                in_string = True
-                string_char = char
-            elif char == string_char:
-                in_string = False
+        if char in ('"', "'"):
+            # Check if this quote is escaped by counting backslashes before it
+            bs_count = 0
+            j = i - 1
+            while j >= 0 and content[j] == '\\':
+                bs_count += 1
+                j -= 1
+            
+            is_escaped = (bs_count % 2 != 0)
+            
+            if not is_escaped:
+                if not in_string:
+                    in_string = True
+                    string_char = char
+                elif char == string_char:
+                    in_string = False
+            
             result.append(char)
             i += 1
             continue
@@ -727,7 +739,10 @@ def strip_json_comments(content: str) -> str:
             i += 2
             while i + 1 < length and content[i:i+2] != '*/':
                 i += 1
-            i += 2
+            if i + 1 < length:
+                i += 2
+            else:
+                i = length # End of content
             continue
             
         result.append(char)
@@ -736,7 +751,13 @@ def strip_json_comments(content: str) -> str:
     clean_content = "".join(result)
     
     # Handle trailing commas: find a comma followed by closing brace/bracket
-    # This regex handles cases like [1, 2, ] or {"a": 1, }
+    # This is now safer as we've already stripped comments and identified strings
+    # However, to be 100% safe against commas in strings, we should have handled this 
+    # during the character loop. Let's refine the regex to be very specific to 
+    # JSON structure (comma followed by whitespace and closing char).
+    # Since we've already joined the result, we can apply a final cleanup.
+    # To truly avoid commas in strings, we'd need to mark string ranges.
+    # For now, this regex is standard for basic JSONC to JSON conversion.
     clean_content = re.sub(r',\s*([\]}])', r'\1', clean_content)
     
     return clean_content
@@ -758,27 +779,32 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
     Returns:
         Merged JSON content as dict
     """
+    if not isinstance(new_content, dict):
+        if verbose:
+            console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Skipping merge.[/yellow]")
+        return {}
+
     try:
-        with open(existing_path, 'r', encoding='utf-8') as f:
-            raw_content = f.read().lstrip('\ufeff')
-            # Handle comments (JSONC)
-            clean_content = strip_json_comments(raw_content)
-            existing_content = json.loads(clean_content)
-            
-        if not isinstance(existing_content, dict):
-            if verbose:
-                console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]")
+        if existing_path.exists():
+            with open(existing_path, 'r', encoding='utf-8') as f:
+                raw_content = f.read().lstrip('\ufeff')
+                # Handle comments (JSONC)
+                clean_content = strip_json_comments(raw_content)
+                existing_content = json.loads(clean_content)
+                
+            if not isinstance(existing_content, dict):
+                if verbose:
+                    console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]")
+                return new_content
+        else:
             return new_content
              
     except (FileNotFoundError, json.JSONDecodeError) as e:
-        # If file doesn't exist or is invalid, just use new content
+        # If file is invalid, just use new content
         if verbose and not isinstance(e, FileNotFoundError):
             console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]")
         return new_content
 
-    if not isinstance(new_content, dict):
-        return existing_content
-
     def deep_merge_polite(base: dict, update: dict) -> dict:
         """Recursively merge update dict into base dict, preserving base values."""
         result = base.copy()
diff --git a/tests/test_merge.py b/tests/test_merge.py
index 81605bf91..e72a6ec64 100644
--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -1,6 +1,5 @@
 import json
 from pathlib import Path
-import pytest
 from specify_cli import strip_json_comments, merge_json_files
 
 # --- Dimension 1: JSONC Parsing & Tokenization ---
@@ -16,7 +15,9 @@ def test_strip_json_comments_edge_cases():
         "mixed": {
             "a": [1, /* mid-array comment */ 2],
             "b": 3 // end of line
-        }
+        },
+        "even_backslashes": "\\\\\\\\",
+        "odd_backslashes": "\\\\\\\""
     }
     """
     stripped = strip_json_comments(jsonc)
@@ -26,6 +27,8 @@ def test_strip_json_comments_edge_cases():
     assert parsed["escaped"] == 'String with "quote" inside'
     assert parsed["nested_comments"] == "This /* is not */ a comment"
     assert parsed["mixed"]["a"] == [1, 2]
+    assert parsed["even_backslashes"] == "\\\\"
+    assert parsed["odd_backslashes"] == "\\\""
 
 def test_strip_json_comments_trailing_commas_advanced():
     """Test various trailing comma positions (nested, arrays, objects)."""
@@ -158,8 +161,9 @@ def test_merge_json_files_with_bom(tmp_path):
     assert merged == {"a": 1, "b": 2}
 
 def test_merge_json_files_not_a_dictionary_template(tmp_path):
-    """If for some reason new_content is not a dict, return existing."""
+    """If for some reason new_content is not a dict, return empty dict as safety."""
     existing_file = tmp_path / "ok.json"
     existing_file.write_text('{"a": 1}')
     
-    assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1}
+    # In the updated implementation, if new_content is not a dict, we return {}
+    assert merge_json_files(existing_file, ["not", "a", "dict"]) == {}

From 67f56c8fd0fc6d31d23cc4e99ec1c23f15ac385c Mon Sep 17 00:00:00 2001
From: rbbtsn0w <hamiltonsnow@gmail.com>
Date: Mon, 16 Mar 2026 12:00:24 +0800
Subject: [PATCH 5/5] fix(cli): finalize JSONC parsing with safe trailing
 commas and secure fallback logic

---
 src/specify_cli/__init__.py | 79 +++++++++++++++++++++----------------
 tests/test_merge.py         | 21 ++++++++--
 2 files changed, 63 insertions(+), 37 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 0166dc78e..04cec3774 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -685,9 +685,9 @@ def strip_json_comments(content: str) -> str:
     
     This implementation handles VSCode settings.json files by:
     1. Skipping comments inside "string literals"
-    2. Correctlly handling escaped quotes by counting backslashes
+    2. Correctly handling escaped quotes by counting backslashes
     3. Removing single-line (//) and multi-line (/* */) comments
-    4. Handling trailing commas safely (only outside strings)
+    4. Handling trailing commas safely (only outside strings/comments)
     """
     import re
     
@@ -745,22 +745,28 @@ def strip_json_comments(content: str) -> str:
                 i = length # End of content
             continue
             
+        # Handle trailing commas (safe because we're outside strings and comments)
+        if char == ',':
+            # Peek ahead to find the next non-whitespace character
+            next_i = i + 1
+            is_trailing = False
+            while next_i < length:
+                next_char = content[next_i]
+                if next_char in (' ', '\t', '\n', '\r'):
+                    next_i += 1
+                    continue
+                if next_char in (']', '}'):
+                    is_trailing = True
+                break
+            
+            if is_trailing:
+                i += 1 # Skip the comma
+                continue
+
         result.append(char)
         i += 1
         
-    clean_content = "".join(result)
-    
-    # Handle trailing commas: find a comma followed by closing brace/bracket
-    # This is now safer as we've already stripped comments and identified strings
-    # However, to be 100% safe against commas in strings, we should have handled this 
-    # during the character loop. Let's refine the regex to be very specific to 
-    # JSON structure (comma followed by whitespace and closing char).
-    # Since we've already joined the result, we can apply a final cleanup.
-    # To truly avoid commas in strings, we'd need to mark string ranges.
-    # For now, this regex is standard for basic JSONC to JSON conversion.
-    clean_content = re.sub(r',\s*([\]}])', r'\1', clean_content)
-    
-    return clean_content
+    return "".join(result)
 
 def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict:
     """Merge new JSON content into existing JSON file.
@@ -779,32 +785,37 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
     Returns:
         Merged JSON content as dict
     """
-    if not isinstance(new_content, dict):
-        if verbose:
-            console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Skipping merge.[/yellow]")
-        return {}
-
-    try:
-        if existing_path.exists():
+    # Load existing content first to have a safe fallback
+    existing_content = {}
+    exists = existing_path.exists()
+    
+    if exists:
+        try:
             with open(existing_path, 'r', encoding='utf-8') as f:
                 raw_content = f.read().lstrip('\ufeff')
                 # Handle comments (JSONC)
                 clean_content = strip_json_comments(raw_content)
                 existing_content = json.loads(clean_content)
-                
-            if not isinstance(existing_content, dict):
-                if verbose:
-                    console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]")
-                return new_content
-        else:
-            return new_content
-             
-    except (FileNotFoundError, json.JSONDecodeError) as e:
-        # If file is invalid, just use new content
-        if verbose and not isinstance(e, FileNotFoundError):
-            console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]")
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            if verbose and not isinstance(e, FileNotFoundError):
+                console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}).[/yellow]")
+            # fallback to empty dict for merging
+
+    # Validate template content
+    if not isinstance(new_content, dict):
+        if verbose:
+            console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]")
+        return existing_content if isinstance(existing_content, dict) else {}
+
+    if not exists:
         return new_content
 
+    # If existing content parsed but is not a dict, skip merge to avoid data loss
+    if not isinstance(existing_content, dict):
+        if verbose:
+            console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]")
+        return existing_content
+
     def deep_merge_polite(base: dict, update: dict) -> dict:
         """Recursively merge update dict into base dict, preserving base values."""
         result = base.copy()
diff --git a/tests/test_merge.py b/tests/test_merge.py
index e72a6ec64..591542f6f 100644
--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -49,6 +49,21 @@ def test_strip_json_comments_trailing_commas_advanced():
     assert parsed["list"][0]["id"] == 1
     assert parsed["metadata"]["tags"] == ["a", "b"]
 
+def test_strip_json_comments_safe_commas_in_strings():
+    """Verify that commas followed by closing characters INSIDE strings are not removed."""
+    jsonc = """
+    {
+        "tricky_string": "value, ]",
+        "another_one": "nested, }",
+        "normal": [1, 2, ]
+    }
+    """
+    stripped = strip_json_comments(jsonc)
+    parsed = json.loads(stripped)
+    assert parsed["tricky_string"] == "value, ]"
+    assert parsed["another_one"] == "nested, }"
+    assert parsed["normal"] == [1, 2]
+
 def test_strip_json_comments_whitespace():
     """Ensure stripping works with various whitespace and empty values."""
     jsonc = "{   \n\n   \"a\": 1\n, \n \"b\": 2 // comment \n }"
@@ -161,9 +176,9 @@ def test_merge_json_files_with_bom(tmp_path):
     assert merged == {"a": 1, "b": 2}
 
 def test_merge_json_files_not_a_dictionary_template(tmp_path):
-    """If for some reason new_content is not a dict, return empty dict as safety."""
+    """If for some reason new_content is not a dict, PRESERVE existing settings instead of wiping."""
     existing_file = tmp_path / "ok.json"
     existing_file.write_text('{"a": 1}')
     
-    # In the updated implementation, if new_content is not a dict, we return {}
-    assert merge_json_files(existing_file, ["not", "a", "dict"]) == {}
+    # Secure fallback: return existing settings to avoid clobbering
+    assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1}