From 2776f44fc9592df1c71fb2c00fb0ed5df475a819 Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Mon, 16 Mar 2026 11:30:38 +0800 Subject: [PATCH 1/5] feat(cli): implement polite deep merge for settings.json and support JSONC --- CHANGELOG.md | 4 +++ src/specify_cli/__init__.py | 59 +++++++++++++++++++++++++------- tests/test_merge.py | 68 +++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 13 deletions(-) create mode 100644 tests/test_merge.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b1b6a47a..e701f207f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- feat(cli): support for JSONC (JSON with comments) in VSCode settings.json merging +- feat(cli): polite deep merge for JSON files that preserves existing user settings +- feat(cli): basic validation for JSON object structures during initialization +- feat(cli): strip_json_comments utility for handling VSCode configuration files - feat(presets): Pluggable preset system with preset catalog and template resolver - Preset manifest (`preset.yml`) with validation for artifact, command, and script types - `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py` diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index a45535aee..2eac8f1b0 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -661,7 +661,10 @@ def log(message, color="green"): try: with open(sub_item, 'r', encoding='utf-8') as f: - new_settings = json.load(f) + raw_new = f.read() + # Handle comments even in template files for consistency + clean_new = strip_json_comments(raw_new) + new_settings = json.loads(clean_new) if dest_file.exists(): merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker) @@ -677,14 +680,26 @@ def log(message, color="green"): log(f"Warning: Could not merge, copying instead: {e}", "yellow") shutil.copy2(sub_item, dest_file) +def strip_json_comments(content: str) -> str: + """Remove // and /* */ comments from a JSON string. + + This is a basic implementation to handle VSCode settings.json files. + """ + import re + # Remove single-line comments + content = re.sub(r'//.*$', '', content, flags=re.MULTILINE) + # Remove multi-line comments + content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL) + return content + def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict: """Merge new JSON content into existing JSON file. - Performs a deep merge where: + Performs a polite deep merge where: - New keys are added - - Existing keys are preserved unless overwritten by new content + - Existing keys are PRESERVED (not overwritten) unless they are dictionaries - Nested dictionaries are merged recursively - - Lists and other values are replaced (not merged) + - Lists and other values are preserved from base if they exist Args: existing_path: Path to existing JSON file @@ -696,24 +711,42 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal """ try: with open(existing_path, 'r', encoding='utf-8') as f: - existing_content = json.load(f) - except (FileNotFoundError, json.JSONDecodeError): + raw_content = f.read() + # Handle comments (JSONC) + clean_content = strip_json_comments(raw_content) + existing_content = json.loads(clean_content) + + if not isinstance(existing_content, dict): + if verbose: + console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]") + return new_content + + except (FileNotFoundError, json.JSONDecodeError) as e: # If file doesn't exist or is invalid, just use new content + if verbose and not isinstance(e, FileNotFoundError): + console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]") return new_content - def deep_merge(base: dict, update: dict) -> dict: - """Recursively merge update dict into base dict.""" + if not isinstance(new_content, dict): + return existing_content + + def deep_merge_polite(base: dict, update: dict) -> dict: + """Recursively merge update dict into base dict, preserving base values.""" result = base.copy() for key, value in update.items(): - if key in result and isinstance(result[key], dict) and isinstance(value, dict): + if key not in result: + # Add new key + result[key] = value + elif isinstance(result[key], dict) and isinstance(value, dict): # Recursively merge nested dictionaries - result[key] = deep_merge(result[key], value) + result[key] = deep_merge_polite(result[key], value) else: - # Add new key or replace existing value - result[key] = value + # Key already exists and is not a dict, PRESERVE existing value + # This ensures user settings aren't overwritten by template defaults + pass return result - merged = deep_merge(existing_content, new_content) + merged = deep_merge_polite(existing_content, new_content) if verbose: console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}") diff --git a/tests/test_merge.py b/tests/test_merge.py new file mode 100644 index 000000000..4baa03ef1 --- /dev/null +++ b/tests/test_merge.py @@ -0,0 +1,68 @@ +import json +from pathlib import Path +import pytest +from specify_cli import strip_json_comments, merge_json_files + +def test_strip_json_comments(): + jsonc = """ + { + // Single line comment + "a": 1, + "b": "value", /* Multi-line + comment */ + "c": 3 // Another one + } + """ + stripped = strip_json_comments(jsonc) + parsed = json.loads(stripped) + assert parsed["a"] == 1 + assert parsed["b"] == "value" + assert parsed["c"] == 3 + +def test_merge_json_files_polite(tmp_path): + existing_file = tmp_path / "settings.json" + existing_content = """ + { + "editor.fontSize": 14, + "editor.tabSize": 4, + "chat.promptFilesRecommendations": { + "my.cmd": true + } + } + """ + existing_file.write_text(existing_content) + + new_settings = { + "editor.tabSize": 2, # Should be preserved as 4 + "chat.promptFilesRecommendations": { + "speckit.specify": True, + "my.cmd": False # Should be preserved as True + }, + "new.key": "new.value" # Should be added + } + + merged = merge_json_files(existing_file, new_settings) + + assert merged["editor.fontSize"] == 14 + assert merged["editor.tabSize"] == 4 + assert merged["chat.promptFilesRecommendations"]["my.cmd"] is True + assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True + assert merged["new.key"] == "new.value" + +def test_merge_json_files_invalid_existing(tmp_path): + existing_file = tmp_path / "invalid.json" + existing_file.write_text("{ invalid json") + + new_settings = {"a": 1} + # Should fallback to new_settings + merged = merge_json_files(existing_file, new_settings) + assert merged == {"a": 1} + +def test_merge_json_files_non_dict_existing(tmp_path): + existing_file = tmp_path / "array.json" + existing_file.write_text("[1, 2, 3]") + + new_settings = {"a": 1} + # Should fallback to new_settings + merged = merge_json_files(existing_file, new_settings) + assert merged == {"a": 1} From eaf7fb21c4995ea7ed58684aaae18db755a51d90 Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Mon, 16 Mar 2026 11:37:33 +0800 Subject: [PATCH 2/5] refactor(cli): improve JSONC parsing to handle strings and trailing commas, fix indentation --- src/specify_cli/__init__.py | 71 +++++++++++++++++++++++++++++++------ tests/test_merge.py | 32 ++++++++++++++++- 2 files changed, 91 insertions(+), 12 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 2eac8f1b0..91d2c052b 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -681,16 +681,65 @@ def log(message, color="green"): shutil.copy2(sub_item, dest_file) def strip_json_comments(content: str) -> str: - """Remove // and /* */ comments from a JSON string. + """Remove // and /* */ comments from a JSON string, respecting string literals. - This is a basic implementation to handle VSCode settings.json files. + This implementation handles VSCode settings.json files by: + 1. Skipping comments inside "string literals" + 2. Removing single-line (//) and multi-line (/* */) comments + 3. Handling trailing commas that JSONC supports but standard JSON does not """ import re - # Remove single-line comments - content = re.sub(r'//.*$', '', content, flags=re.MULTILINE) - # Remove multi-line comments - content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL) - return content + + result = [] + i = 0 + length = len(content) + in_string = False + string_char = None + + while i < length: + char = content[i] + + # Handle string literals + if char in ('"', "'") and (i == 0 or content[i-1] != '\\'): + if not in_string: + in_string = True + string_char = char + elif char == string_char: + in_string = False + result.append(char) + i += 1 + continue + + if in_string: + result.append(char) + i += 1 + continue + + # Handle single-line comments // + if i + 1 < length and content[i:i+2] == '//': + i += 2 + while i < length and content[i] != '\n': + i += 1 + continue + + # Handle multi-line comments /* */ + if i + 1 < length and content[i:i+2] == '/*': + i += 2 + while i + 1 < length and content[i:i+2] != '*/': + i += 1 + i += 2 + continue + + result.append(char) + i += 1 + + clean_content = "".join(result) + + # Handle trailing commas: find a comma followed by closing brace/bracket + # This regex handles cases like [1, 2, ] or {"a": 1, } + clean_content = re.sub(r',\s*([\]}])', r'\1', clean_content) + + return clean_content def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict: """Merge new JSON content into existing JSON file. @@ -717,14 +766,14 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal existing_content = json.loads(clean_content) if not isinstance(existing_content, dict): - if verbose: - console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]") - return new_content + if verbose: + console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]") + return new_content except (FileNotFoundError, json.JSONDecodeError) as e: # If file doesn't exist or is invalid, just use new content if verbose and not isinstance(e, FileNotFoundError): - console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]") + console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]") return new_content if not isinstance(new_content, dict): diff --git a/tests/test_merge.py b/tests/test_merge.py index 4baa03ef1..3e4f92887 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -3,7 +3,7 @@ import pytest from specify_cli import strip_json_comments, merge_json_files -def test_strip_json_comments(): +def test_strip_json_comments_basic(): jsonc = """ { // Single line comment @@ -19,6 +19,36 @@ def test_strip_json_comments(): assert parsed["b"] == "value" assert parsed["c"] == 3 +def test_strip_json_comments_with_urls(): + # URL in string should NOT be stripped + jsonc = """ + { + "url": "https://github.com/spec-kit", + "path": "//local/path", + "commented": 1 // This is a comment + } + """ + stripped = strip_json_comments(jsonc) + parsed = json.loads(stripped) + assert parsed["url"] == "https://github.com/spec-kit" + assert parsed["path"] == "//local/path" + assert parsed["commented"] == 1 + +def test_strip_json_comments_trailing_commas(): + # Trailing commas in objects and arrays + jsonc = """ + { + "array": [1, 2, 3, ], + "obj": { + "a": 1, + }, + } + """ + stripped = strip_json_comments(jsonc) + parsed = json.loads(stripped) + assert parsed["array"] == [1, 2, 3] + assert parsed["obj"]["a"] == 1 + def test_merge_json_files_polite(tmp_path): existing_file = tmp_path / "settings.json" existing_content = """ From f28e9c11465c5d0c28b6bc06d7180ccdfd44abd1 Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Mon, 16 Mar 2026 11:44:14 +0800 Subject: [PATCH 3/5] test(cli): add multi-dimensional tests for JSONC and BOM handling, improve merge robustness --- src/specify_cli/__init__.py | 2 +- tests/test_merge.py | 183 ++++++++++++++++++++++++------------ 2 files changed, 126 insertions(+), 59 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 91d2c052b..62bb79a5f 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -760,7 +760,7 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal """ try: with open(existing_path, 'r', encoding='utf-8') as f: - raw_content = f.read() + raw_content = f.read().lstrip('\ufeff') # Handle comments (JSONC) clean_content = strip_json_comments(raw_content) existing_content = json.loads(clean_content) diff --git a/tests/test_merge.py b/tests/test_merge.py index 3e4f92887..81605bf91 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -3,96 +3,163 @@ import pytest from specify_cli import strip_json_comments, merge_json_files -def test_strip_json_comments_basic(): - jsonc = """ - { - // Single line comment - "a": 1, - "b": "value", /* Multi-line - comment */ - "c": 3 // Another one - } - """ - stripped = strip_json_comments(jsonc) - parsed = json.loads(stripped) - assert parsed["a"] == 1 - assert parsed["b"] == "value" - assert parsed["c"] == 3 +# --- Dimension 1: JSONC Parsing & Tokenization --- -def test_strip_json_comments_with_urls(): - # URL in string should NOT be stripped +def test_strip_json_comments_edge_cases(): + """Test complex comment scenarios and string protections.""" jsonc = """ { - "url": "https://github.com/spec-kit", - "path": "//local/path", - "commented": 1 // This is a comment + "url": "https://github.com/spec-kit", // URL with // + "path": "C:\\\\Users\\\\Admin\\\\Documents", /* Path with backslashes */ + "escaped": "String with \\\"quote\\\" inside", + "nested_comments": "This /* is not */ a comment", + "mixed": { + "a": [1, /* mid-array comment */ 2], + "b": 3 // end of line + } } """ stripped = strip_json_comments(jsonc) parsed = json.loads(stripped) assert parsed["url"] == "https://github.com/spec-kit" - assert parsed["path"] == "//local/path" - assert parsed["commented"] == 1 + assert parsed["path"] == "C:\\Users\\Admin\\Documents" + assert parsed["escaped"] == 'String with "quote" inside' + assert parsed["nested_comments"] == "This /* is not */ a comment" + assert parsed["mixed"]["a"] == [1, 2] -def test_strip_json_comments_trailing_commas(): - # Trailing commas in objects and arrays +def test_strip_json_comments_trailing_commas_advanced(): + """Test various trailing comma positions (nested, arrays, objects).""" jsonc = """ { - "array": [1, 2, 3, ], - "obj": { - "a": 1, + "list": [ + {"id": 1,}, + {"id": 2,}, + ], + "metadata": { + "tags": ["a", "b", ], }, } """ stripped = strip_json_comments(jsonc) parsed = json.loads(stripped) - assert parsed["array"] == [1, 2, 3] - assert parsed["obj"]["a"] == 1 + assert len(parsed["list"]) == 2 + assert parsed["list"][0]["id"] == 1 + assert parsed["metadata"]["tags"] == ["a", "b"] + +def test_strip_json_comments_whitespace(): + """Ensure stripping works with various whitespace and empty values.""" + jsonc = "{ \n\n \"a\": 1\n, \n \"b\": 2 // comment \n }" + parsed = json.loads(strip_json_comments(jsonc)) + assert parsed == {"a": 1, "b": 2} + +# --- Dimension 2: Polite Deep Merge Strategy --- + +def test_merge_json_files_type_mismatch_preservation(tmp_path): + """If user has a string but template wants a dict, PRESERVE user's string.""" + existing_file = tmp_path / "settings.json" + # User might have overridden a setting with a simple string or different type + existing_file.write_text('{"chat.editor.fontFamily": "CustomFont"}') + + # Template might expect a dict for the same key (hypothetically) + new_settings = { + "chat.editor.fontFamily": {"font": "TemplateFont"} + } + + merged = merge_json_files(existing_file, new_settings) + assert merged["chat.editor.fontFamily"] == "CustomFont" -def test_merge_json_files_polite(tmp_path): +def test_merge_json_files_deep_nesting(tmp_path): + """Verify deep recursive merging of new keys.""" existing_file = tmp_path / "settings.json" - existing_content = """ + existing_file.write_text(""" { - "editor.fontSize": 14, - "editor.tabSize": 4, - "chat.promptFilesRecommendations": { - "my.cmd": true + "a": { + "b": { + "c": 1 + } } } - """ - existing_file.write_text(existing_content) + """) new_settings = { - "editor.tabSize": 2, # Should be preserved as 4 + "a": { + "b": { + "d": 2 # New nested key + }, + "e": 3 # New mid-level key + } + } + + merged = merge_json_files(existing_file, new_settings) + assert merged["a"]["b"]["c"] == 1 + assert merged["a"]["b"]["d"] == 2 + assert merged["a"]["e"] == 3 + +def test_merge_json_files_empty_existing(tmp_path): + """Merging into an empty/new file.""" + existing_file = tmp_path / "empty.json" + existing_file.write_text("{}") + + new_settings = {"a": 1} + merged = merge_json_files(existing_file, new_settings) + assert merged == {"a": 1} + +# --- Dimension 3: Real-world Simulation --- + +def test_merge_vscode_realistic_scenario(tmp_path): + """A realistic VSCode settings.json with many existing preferences.""" + existing_file = tmp_path / "vscode_settings.json" + existing_file.write_text(""" + { + "editor.fontSize": 12, + "editor.formatOnSave": true, + "files.exclude": { + "**/.git": true, + "**/node_modules": true + }, + "chat.promptFilesRecommendations": { + "existing.tool": true + } // User comment + } + """) + + template_settings = { "chat.promptFilesRecommendations": { "speckit.specify": True, - "my.cmd": False # Should be preserved as True + "speckit.plan": True }, - "new.key": "new.value" # Should be added + "chat.tools.terminal.autoApprove": { + ".specify/scripts/bash/": True + } } - merged = merge_json_files(existing_file, new_settings) + merged = merge_json_files(existing_file, template_settings) - assert merged["editor.fontSize"] == 14 - assert merged["editor.tabSize"] == 4 - assert merged["chat.promptFilesRecommendations"]["my.cmd"] is True + # Check preservation + assert merged["editor.fontSize"] == 12 + assert merged["files.exclude"]["**/.git"] is True + assert merged["chat.promptFilesRecommendations"]["existing.tool"] is True + + # Check additions assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True - assert merged["new.key"] == "new.value" + assert merged["chat.tools.terminal.autoApprove"][".specify/scripts/bash/"] is True + +# --- Dimension 4: Error Handling & Robustness --- -def test_merge_json_files_invalid_existing(tmp_path): - existing_file = tmp_path / "invalid.json" - existing_file.write_text("{ invalid json") +def test_merge_json_files_with_bom(tmp_path): + """Test files with UTF-8 BOM (sometimes created on Windows).""" + existing_file = tmp_path / "bom.json" + content = '{"a": 1}' + # Prepend UTF-8 BOM + existing_file.write_bytes(b'\xef\xbb\xbf' + content.encode('utf-8')) - new_settings = {"a": 1} - # Should fallback to new_settings + new_settings = {"b": 2} merged = merge_json_files(existing_file, new_settings) - assert merged == {"a": 1} + assert merged == {"a": 1, "b": 2} -def test_merge_json_files_non_dict_existing(tmp_path): - existing_file = tmp_path / "array.json" - existing_file.write_text("[1, 2, 3]") +def test_merge_json_files_not_a_dictionary_template(tmp_path): + """If for some reason new_content is not a dict, return existing.""" + existing_file = tmp_path / "ok.json" + existing_file.write_text('{"a": 1}') - new_settings = {"a": 1} - # Should fallback to new_settings - merged = merge_json_files(existing_file, new_settings) - assert merged == {"a": 1} + assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1} From 845b9f3d0866da71424b00aefe3611d0c871d647 Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Mon, 16 Mar 2026 11:50:03 +0800 Subject: [PATCH 4/5] fix(cli): implement robust backslash escape counting and improve JSONC/merge validation --- src/specify_cli/__init__.py | 72 +++++++++++++++++++++++++------------ tests/test_merge.py | 12 ++++--- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 62bb79a5f..0166dc78e 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -685,8 +685,9 @@ def strip_json_comments(content: str) -> str: This implementation handles VSCode settings.json files by: 1. Skipping comments inside "string literals" - 2. Removing single-line (//) and multi-line (/* */) comments - 3. Handling trailing commas that JSONC supports but standard JSON does not + 2. Correctlly handling escaped quotes by counting backslashes + 3. Removing single-line (//) and multi-line (/* */) comments + 4. Handling trailing commas safely (only outside strings) """ import re @@ -700,12 +701,23 @@ def strip_json_comments(content: str) -> str: char = content[i] # Handle string literals - if char in ('"', "'") and (i == 0 or content[i-1] != '\\'): - if not in_string: - in_string = True - string_char = char - elif char == string_char: - in_string = False + if char in ('"', "'"): + # Check if this quote is escaped by counting backslashes before it + bs_count = 0 + j = i - 1 + while j >= 0 and content[j] == '\\': + bs_count += 1 + j -= 1 + + is_escaped = (bs_count % 2 != 0) + + if not is_escaped: + if not in_string: + in_string = True + string_char = char + elif char == string_char: + in_string = False + result.append(char) i += 1 continue @@ -727,7 +739,10 @@ def strip_json_comments(content: str) -> str: i += 2 while i + 1 < length and content[i:i+2] != '*/': i += 1 - i += 2 + if i + 1 < length: + i += 2 + else: + i = length # End of content continue result.append(char) @@ -736,7 +751,13 @@ def strip_json_comments(content: str) -> str: clean_content = "".join(result) # Handle trailing commas: find a comma followed by closing brace/bracket - # This regex handles cases like [1, 2, ] or {"a": 1, } + # This is now safer as we've already stripped comments and identified strings + # However, to be 100% safe against commas in strings, we should have handled this + # during the character loop. Let's refine the regex to be very specific to + # JSON structure (comma followed by whitespace and closing char). + # Since we've already joined the result, we can apply a final cleanup. + # To truly avoid commas in strings, we'd need to mark string ranges. + # For now, this regex is standard for basic JSONC to JSON conversion. clean_content = re.sub(r',\s*([\]}])', r'\1', clean_content) return clean_content @@ -758,27 +779,32 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal Returns: Merged JSON content as dict """ + if not isinstance(new_content, dict): + if verbose: + console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Skipping merge.[/yellow]") + return {} + try: - with open(existing_path, 'r', encoding='utf-8') as f: - raw_content = f.read().lstrip('\ufeff') - # Handle comments (JSONC) - clean_content = strip_json_comments(raw_content) - existing_content = json.loads(clean_content) - - if not isinstance(existing_content, dict): - if verbose: - console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]") + if existing_path.exists(): + with open(existing_path, 'r', encoding='utf-8') as f: + raw_content = f.read().lstrip('\ufeff') + # Handle comments (JSONC) + clean_content = strip_json_comments(raw_content) + existing_content = json.loads(clean_content) + + if not isinstance(existing_content, dict): + if verbose: + console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]") + return new_content + else: return new_content except (FileNotFoundError, json.JSONDecodeError) as e: - # If file doesn't exist or is invalid, just use new content + # If file is invalid, just use new content if verbose and not isinstance(e, FileNotFoundError): console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]") return new_content - if not isinstance(new_content, dict): - return existing_content - def deep_merge_polite(base: dict, update: dict) -> dict: """Recursively merge update dict into base dict, preserving base values.""" result = base.copy() diff --git a/tests/test_merge.py b/tests/test_merge.py index 81605bf91..e72a6ec64 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -1,6 +1,5 @@ import json from pathlib import Path -import pytest from specify_cli import strip_json_comments, merge_json_files # --- Dimension 1: JSONC Parsing & Tokenization --- @@ -16,7 +15,9 @@ def test_strip_json_comments_edge_cases(): "mixed": { "a": [1, /* mid-array comment */ 2], "b": 3 // end of line - } + }, + "even_backslashes": "\\\\\\\\", + "odd_backslashes": "\\\\\\\"" } """ stripped = strip_json_comments(jsonc) @@ -26,6 +27,8 @@ def test_strip_json_comments_edge_cases(): assert parsed["escaped"] == 'String with "quote" inside' assert parsed["nested_comments"] == "This /* is not */ a comment" assert parsed["mixed"]["a"] == [1, 2] + assert parsed["even_backslashes"] == "\\\\" + assert parsed["odd_backslashes"] == "\\\"" def test_strip_json_comments_trailing_commas_advanced(): """Test various trailing comma positions (nested, arrays, objects).""" @@ -158,8 +161,9 @@ def test_merge_json_files_with_bom(tmp_path): assert merged == {"a": 1, "b": 2} def test_merge_json_files_not_a_dictionary_template(tmp_path): - """If for some reason new_content is not a dict, return existing.""" + """If for some reason new_content is not a dict, return empty dict as safety.""" existing_file = tmp_path / "ok.json" existing_file.write_text('{"a": 1}') - assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1} + # In the updated implementation, if new_content is not a dict, we return {} + assert merge_json_files(existing_file, ["not", "a", "dict"]) == {} From 67f56c8fd0fc6d31d23cc4e99ec1c23f15ac385c Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Mon, 16 Mar 2026 12:00:24 +0800 Subject: [PATCH 5/5] fix(cli): finalize JSONC parsing with safe trailing commas and secure fallback logic --- src/specify_cli/__init__.py | 79 +++++++++++++++++++++---------------- tests/test_merge.py | 21 ++++++++-- 2 files changed, 63 insertions(+), 37 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 0166dc78e..04cec3774 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -685,9 +685,9 @@ def strip_json_comments(content: str) -> str: This implementation handles VSCode settings.json files by: 1. Skipping comments inside "string literals" - 2. Correctlly handling escaped quotes by counting backslashes + 2. Correctly handling escaped quotes by counting backslashes 3. Removing single-line (//) and multi-line (/* */) comments - 4. Handling trailing commas safely (only outside strings) + 4. Handling trailing commas safely (only outside strings/comments) """ import re @@ -745,22 +745,28 @@ def strip_json_comments(content: str) -> str: i = length # End of content continue + # Handle trailing commas (safe because we're outside strings and comments) + if char == ',': + # Peek ahead to find the next non-whitespace character + next_i = i + 1 + is_trailing = False + while next_i < length: + next_char = content[next_i] + if next_char in (' ', '\t', '\n', '\r'): + next_i += 1 + continue + if next_char in (']', '}'): + is_trailing = True + break + + if is_trailing: + i += 1 # Skip the comma + continue + result.append(char) i += 1 - clean_content = "".join(result) - - # Handle trailing commas: find a comma followed by closing brace/bracket - # This is now safer as we've already stripped comments and identified strings - # However, to be 100% safe against commas in strings, we should have handled this - # during the character loop. Let's refine the regex to be very specific to - # JSON structure (comma followed by whitespace and closing char). - # Since we've already joined the result, we can apply a final cleanup. - # To truly avoid commas in strings, we'd need to mark string ranges. - # For now, this regex is standard for basic JSONC to JSON conversion. - clean_content = re.sub(r',\s*([\]}])', r'\1', clean_content) - - return clean_content + return "".join(result) def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict: """Merge new JSON content into existing JSON file. @@ -779,32 +785,37 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal Returns: Merged JSON content as dict """ - if not isinstance(new_content, dict): - if verbose: - console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Skipping merge.[/yellow]") - return {} - - try: - if existing_path.exists(): + # Load existing content first to have a safe fallback + existing_content = {} + exists = existing_path.exists() + + if exists: + try: with open(existing_path, 'r', encoding='utf-8') as f: raw_content = f.read().lstrip('\ufeff') # Handle comments (JSONC) clean_content = strip_json_comments(raw_content) existing_content = json.loads(clean_content) - - if not isinstance(existing_content, dict): - if verbose: - console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]") - return new_content - else: - return new_content - - except (FileNotFoundError, json.JSONDecodeError) as e: - # If file is invalid, just use new content - if verbose and not isinstance(e, FileNotFoundError): - console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]") + except (FileNotFoundError, json.JSONDecodeError) as e: + if verbose and not isinstance(e, FileNotFoundError): + console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}).[/yellow]") + # fallback to empty dict for merging + + # Validate template content + if not isinstance(new_content, dict): + if verbose: + console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]") + return existing_content if isinstance(existing_content, dict) else {} + + if not exists: return new_content + # If existing content parsed but is not a dict, skip merge to avoid data loss + if not isinstance(existing_content, dict): + if verbose: + console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]") + return existing_content + def deep_merge_polite(base: dict, update: dict) -> dict: """Recursively merge update dict into base dict, preserving base values.""" result = base.copy() diff --git a/tests/test_merge.py b/tests/test_merge.py index e72a6ec64..591542f6f 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -49,6 +49,21 @@ def test_strip_json_comments_trailing_commas_advanced(): assert parsed["list"][0]["id"] == 1 assert parsed["metadata"]["tags"] == ["a", "b"] +def test_strip_json_comments_safe_commas_in_strings(): + """Verify that commas followed by closing characters INSIDE strings are not removed.""" + jsonc = """ + { + "tricky_string": "value, ]", + "another_one": "nested, }", + "normal": [1, 2, ] + } + """ + stripped = strip_json_comments(jsonc) + parsed = json.loads(stripped) + assert parsed["tricky_string"] == "value, ]" + assert parsed["another_one"] == "nested, }" + assert parsed["normal"] == [1, 2] + def test_strip_json_comments_whitespace(): """Ensure stripping works with various whitespace and empty values.""" jsonc = "{ \n\n \"a\": 1\n, \n \"b\": 2 // comment \n }" @@ -161,9 +176,9 @@ def test_merge_json_files_with_bom(tmp_path): assert merged == {"a": 1, "b": 2} def test_merge_json_files_not_a_dictionary_template(tmp_path): - """If for some reason new_content is not a dict, return empty dict as safety.""" + """If for some reason new_content is not a dict, PRESERVE existing settings instead of wiping.""" existing_file = tmp_path / "ok.json" existing_file.write_text('{"a": 1}') - # In the updated implementation, if new_content is not a dict, we return {} - assert merge_json_files(existing_file, ["not", "a", "dict"]) == {} + # Secure fallback: return existing settings to avoid clobbering + assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1}