diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b1b6a47a..e701f207f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- feat(cli): support for JSONC (JSON with comments) in VSCode settings.json merging +- feat(cli): polite deep merge for JSON files that preserves existing user settings +- feat(cli): basic validation for JSON object structures during initialization +- feat(cli): strip_json_comments utility for handling VSCode configuration files - feat(presets): Pluggable preset system with preset catalog and template resolver - Preset manifest (`preset.yml`) with validation for artifact, command, and script types - `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py` diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index a45535aee..04cec3774 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -661,7 +661,10 @@ def log(message, color="green"): try: with open(sub_item, 'r', encoding='utf-8') as f: - new_settings = json.load(f) + raw_new = f.read() + # Handle comments even in template files for consistency + clean_new = strip_json_comments(raw_new) + new_settings = json.loads(clean_new) if dest_file.exists(): merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker) @@ -677,14 +680,102 @@ def log(message, color="green"): log(f"Warning: Could not merge, copying instead: {e}", "yellow") shutil.copy2(sub_item, dest_file) +def strip_json_comments(content: str) -> str: + """Remove // and /* */ comments from a JSON string, respecting string literals. + + This implementation handles VSCode settings.json files by: + 1. Skipping comments inside "string literals" + 2. Correctly handling escaped quotes by counting backslashes + 3. Removing single-line (//) and multi-line (/* */) comments + 4. Handling trailing commas safely (only outside strings/comments) + """ + import re + + result = [] + i = 0 + length = len(content) + in_string = False + string_char = None + + while i < length: + char = content[i] + + # Handle string literals + if char in ('"', "'"): + # Check if this quote is escaped by counting backslashes before it + bs_count = 0 + j = i - 1 + while j >= 0 and content[j] == '\\': + bs_count += 1 + j -= 1 + + is_escaped = (bs_count % 2 != 0) + + if not is_escaped: + if not in_string: + in_string = True + string_char = char + elif char == string_char: + in_string = False + + result.append(char) + i += 1 + continue + + if in_string: + result.append(char) + i += 1 + continue + + # Handle single-line comments // + if i + 1 < length and content[i:i+2] == '//': + i += 2 + while i < length and content[i] != '\n': + i += 1 + continue + + # Handle multi-line comments /* */ + if i + 1 < length and content[i:i+2] == '/*': + i += 2 + while i + 1 < length and content[i:i+2] != '*/': + i += 1 + if i + 1 < length: + i += 2 + else: + i = length # End of content + continue + + # Handle trailing commas (safe because we're outside strings and comments) + if char == ',': + # Peek ahead to find the next non-whitespace character + next_i = i + 1 + is_trailing = False + while next_i < length: + next_char = content[next_i] + if next_char in (' ', '\t', '\n', '\r'): + next_i += 1 + continue + if next_char in (']', '}'): + is_trailing = True + break + + if is_trailing: + i += 1 # Skip the comma + continue + + result.append(char) + i += 1 + + return "".join(result) + def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict: """Merge new JSON content into existing JSON file. - Performs a deep merge where: + Performs a polite deep merge where: - New keys are added - - Existing keys are preserved unless overwritten by new content + - Existing keys are PRESERVED (not overwritten) unless they are dictionaries - Nested dictionaries are merged recursively - - Lists and other values are replaced (not merged) + - Lists and other values are preserved from base if they exist Args: existing_path: Path to existing JSON file @@ -694,26 +785,54 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal Returns: Merged JSON content as dict """ - try: - with open(existing_path, 'r', encoding='utf-8') as f: - existing_content = json.load(f) - except (FileNotFoundError, json.JSONDecodeError): - # If file doesn't exist or is invalid, just use new content + # Load existing content first to have a safe fallback + existing_content = {} + exists = existing_path.exists() + + if exists: + try: + with open(existing_path, 'r', encoding='utf-8') as f: + raw_content = f.read().lstrip('\ufeff') + # Handle comments (JSONC) + clean_content = strip_json_comments(raw_content) + existing_content = json.loads(clean_content) + except (FileNotFoundError, json.JSONDecodeError) as e: + if verbose and not isinstance(e, FileNotFoundError): + console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}).[/yellow]") + # fallback to empty dict for merging + + # Validate template content + if not isinstance(new_content, dict): + if verbose: + console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]") + return existing_content if isinstance(existing_content, dict) else {} + + if not exists: return new_content - def deep_merge(base: dict, update: dict) -> dict: - """Recursively merge update dict into base dict.""" + # If existing content parsed but is not a dict, skip merge to avoid data loss + if not isinstance(existing_content, dict): + if verbose: + console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]") + return existing_content + + def deep_merge_polite(base: dict, update: dict) -> dict: + """Recursively merge update dict into base dict, preserving base values.""" result = base.copy() for key, value in update.items(): - if key in result and isinstance(result[key], dict) and isinstance(value, dict): + if key not in result: + # Add new key + result[key] = value + elif isinstance(result[key], dict) and isinstance(value, dict): # Recursively merge nested dictionaries - result[key] = deep_merge(result[key], value) + result[key] = deep_merge_polite(result[key], value) else: - # Add new key or replace existing value - result[key] = value + # Key already exists and is not a dict, PRESERVE existing value + # This ensures user settings aren't overwritten by template defaults + pass return result - merged = deep_merge(existing_content, new_content) + merged = deep_merge_polite(existing_content, new_content) if verbose: console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}") diff --git a/tests/test_merge.py b/tests/test_merge.py new file mode 100644 index 000000000..591542f6f --- /dev/null +++ b/tests/test_merge.py @@ -0,0 +1,184 @@ +import json +from pathlib import Path +from specify_cli import strip_json_comments, merge_json_files + +# --- Dimension 1: JSONC Parsing & Tokenization --- + +def test_strip_json_comments_edge_cases(): + """Test complex comment scenarios and string protections.""" + jsonc = """ + { + "url": "https://github.com/spec-kit", // URL with // + "path": "C:\\\\Users\\\\Admin\\\\Documents", /* Path with backslashes */ + "escaped": "String with \\\"quote\\\" inside", + "nested_comments": "This /* is not */ a comment", + "mixed": { + "a": [1, /* mid-array comment */ 2], + "b": 3 // end of line + }, + "even_backslashes": "\\\\\\\\", + "odd_backslashes": "\\\\\\\"" + } + """ + stripped = strip_json_comments(jsonc) + parsed = json.loads(stripped) + assert parsed["url"] == "https://github.com/spec-kit" + assert parsed["path"] == "C:\\Users\\Admin\\Documents" + assert parsed["escaped"] == 'String with "quote" inside' + assert parsed["nested_comments"] == "This /* is not */ a comment" + assert parsed["mixed"]["a"] == [1, 2] + assert parsed["even_backslashes"] == "\\\\" + assert parsed["odd_backslashes"] == "\\\"" + +def test_strip_json_comments_trailing_commas_advanced(): + """Test various trailing comma positions (nested, arrays, objects).""" + jsonc = """ + { + "list": [ + {"id": 1,}, + {"id": 2,}, + ], + "metadata": { + "tags": ["a", "b", ], + }, + } + """ + stripped = strip_json_comments(jsonc) + parsed = json.loads(stripped) + assert len(parsed["list"]) == 2 + assert parsed["list"][0]["id"] == 1 + assert parsed["metadata"]["tags"] == ["a", "b"] + +def test_strip_json_comments_safe_commas_in_strings(): + """Verify that commas followed by closing characters INSIDE strings are not removed.""" + jsonc = """ + { + "tricky_string": "value, ]", + "another_one": "nested, }", + "normal": [1, 2, ] + } + """ + stripped = strip_json_comments(jsonc) + parsed = json.loads(stripped) + assert parsed["tricky_string"] == "value, ]" + assert parsed["another_one"] == "nested, }" + assert parsed["normal"] == [1, 2] + +def test_strip_json_comments_whitespace(): + """Ensure stripping works with various whitespace and empty values.""" + jsonc = "{ \n\n \"a\": 1\n, \n \"b\": 2 // comment \n }" + parsed = json.loads(strip_json_comments(jsonc)) + assert parsed == {"a": 1, "b": 2} + +# --- Dimension 2: Polite Deep Merge Strategy --- + +def test_merge_json_files_type_mismatch_preservation(tmp_path): + """If user has a string but template wants a dict, PRESERVE user's string.""" + existing_file = tmp_path / "settings.json" + # User might have overridden a setting with a simple string or different type + existing_file.write_text('{"chat.editor.fontFamily": "CustomFont"}') + + # Template might expect a dict for the same key (hypothetically) + new_settings = { + "chat.editor.fontFamily": {"font": "TemplateFont"} + } + + merged = merge_json_files(existing_file, new_settings) + assert merged["chat.editor.fontFamily"] == "CustomFont" + +def test_merge_json_files_deep_nesting(tmp_path): + """Verify deep recursive merging of new keys.""" + existing_file = tmp_path / "settings.json" + existing_file.write_text(""" + { + "a": { + "b": { + "c": 1 + } + } + } + """) + + new_settings = { + "a": { + "b": { + "d": 2 # New nested key + }, + "e": 3 # New mid-level key + } + } + + merged = merge_json_files(existing_file, new_settings) + assert merged["a"]["b"]["c"] == 1 + assert merged["a"]["b"]["d"] == 2 + assert merged["a"]["e"] == 3 + +def test_merge_json_files_empty_existing(tmp_path): + """Merging into an empty/new file.""" + existing_file = tmp_path / "empty.json" + existing_file.write_text("{}") + + new_settings = {"a": 1} + merged = merge_json_files(existing_file, new_settings) + assert merged == {"a": 1} + +# --- Dimension 3: Real-world Simulation --- + +def test_merge_vscode_realistic_scenario(tmp_path): + """A realistic VSCode settings.json with many existing preferences.""" + existing_file = tmp_path / "vscode_settings.json" + existing_file.write_text(""" + { + "editor.fontSize": 12, + "editor.formatOnSave": true, + "files.exclude": { + "**/.git": true, + "**/node_modules": true + }, + "chat.promptFilesRecommendations": { + "existing.tool": true + } // User comment + } + """) + + template_settings = { + "chat.promptFilesRecommendations": { + "speckit.specify": True, + "speckit.plan": True + }, + "chat.tools.terminal.autoApprove": { + ".specify/scripts/bash/": True + } + } + + merged = merge_json_files(existing_file, template_settings) + + # Check preservation + assert merged["editor.fontSize"] == 12 + assert merged["files.exclude"]["**/.git"] is True + assert merged["chat.promptFilesRecommendations"]["existing.tool"] is True + + # Check additions + assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True + assert merged["chat.tools.terminal.autoApprove"][".specify/scripts/bash/"] is True + +# --- Dimension 4: Error Handling & Robustness --- + +def test_merge_json_files_with_bom(tmp_path): + """Test files with UTF-8 BOM (sometimes created on Windows).""" + existing_file = tmp_path / "bom.json" + content = '{"a": 1}' + # Prepend UTF-8 BOM + existing_file.write_bytes(b'\xef\xbb\xbf' + content.encode('utf-8')) + + new_settings = {"b": 2} + merged = merge_json_files(existing_file, new_settings) + assert merged == {"a": 1, "b": 2} + +def test_merge_json_files_not_a_dictionary_template(tmp_path): + """If for some reason new_content is not a dict, PRESERVE existing settings instead of wiping.""" + existing_file = tmp_path / "ok.json" + existing_file.write_text('{"a": 1}') + + # Secure fallback: return existing settings to avoid clobbering + assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1}